401 files changed, 19757 insertions, 2490 deletions
diff --git a/.github/workflows/containers/github-action-ci-windows/Dockerfile b/.github/workflows/containers/github-action-ci-windows/Dockerfile
index 640d34d..9ddf501 100644
--- a/.github/workflows/containers/github-action-ci-windows/Dockerfile
+++ b/.github/workflows/containers/github-action-ci-windows/Dockerfile
@@ -90,7 +90,7 @@ RUN powershell -Command \
 RUN git config --system core.longpaths true & \
     git config --global core.autocrlf false
 
-ARG RUNNER_VERSION=2.328.0
+ARG RUNNER_VERSION=2.329.0
 ENV RUNNER_VERSION=$RUNNER_VERSION
 
 RUN powershell -Command \
diff --git a/.github/workflows/containers/github-action-ci/Dockerfile b/.github/workflows/containers/github-action-ci/Dockerfile
index 1d3f5f9..1b376dd 100644
--- a/.github/workflows/containers/github-action-ci/Dockerfile
+++ b/.github/workflows/containers/github-action-ci/Dockerfile
@@ -2,7 +2,7 @@ FROM docker.io/library/ubuntu:24.04 AS base
 ENV LLVM_SYSROOT=/opt/llvm
 
 FROM base AS stage1-toolchain
-ENV LLVM_VERSION=21.1.1
+ENV LLVM_VERSION=21.1.3
 
 RUN apt-get update && \
     apt-get install -y \
@@ -99,7 +99,7 @@ WORKDIR /home/gha
 
 FROM ci-container AS ci-container-agent
 
-ENV GITHUB_RUNNER_VERSION=2.328.0
+ENV GITHUB_RUNNER_VERSION=2.329.0
 
 RUN mkdir actions-runner && \
     cd actions-runner && \
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index 1e0dc70..2b85d8b 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -12,6 +12,8 @@ on:
 jobs:
   code_formatter:
     runs-on: ubuntu-24.04
+    container:
+      image: 'ghcr.io/llvm/ci-ubuntu-24.04-format'
     timeout-minutes: 30
     concurrency:
       group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
@@ -23,6 +25,14 @@ jobs:
         with:
           fetch-depth: 2
 
+      # We need to set the repo checkout as safe, otherwise tj-actions/changed-files
+      # will fail due to the changed ownership inside the container.
+      # TODO(boomanaiden154): We should probably fix this by having the default user
+      # in the container have the same ID as the GHA user on the host.
+      - name: Set Safe Directory
+        run: |
+          chown -R root $(pwd)
+
       - name: Get changed files
         id: changed-files
         uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
@@ -39,24 +49,6 @@ jobs:
           echo "Formatting files:"
           echo "$CHANGED_FILES"
 
-      # The clang format version should always be upgraded to the first version
-      # of a release cycle (x.1.0) or the last version of a release cycle, or
-      # if there have been relevant clang-format backports.
-      - name: Install clang-format
-        uses: aminya/setup-cpp@a276e6e3d1db9160db5edc458e99a30d3b109949 # v1.7.1
-        with:
-          clangformat: 21.1.0
-
-      - name: Setup Python env
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-        with:
-          python-version: '3.13'
-          cache: 'pip'
-          cache-dependency-path: 'llvm/utils/git/requirements_formatting.txt'
-
-      - name: Install python dependencies
-        run: pip install -r llvm/utils/git/requirements_formatting.txt
-
       - name: Run code formatter
         env:
           GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
diff --git a/bolt/docs/BinaryAnalysis.md b/bolt/docs/BinaryAnalysis.md
index b13410c..07f096e 100644
--- a/bolt/docs/BinaryAnalysis.md
+++ b/bolt/docs/BinaryAnalysis.md
@@ -1,7 +1,7 @@
 # BOLT-based binary analysis
 
 As part of post-link-time optimizing, BOLT needs to perform a range of analyses
-on binaries such as recontructing control flow graphs, and more.
+on binaries such as reconstructing control flow graphs, and more.
 
 The `llvm-bolt-binary-analysis` tool enables running requested binary analyses
 on binaries, and generating reports. It does this by building on top of the
diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index 151399d..43cecee 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -375,7 +375,7 @@
 
 - `--use-old-text`
 
-  Re-use space in old .text if possible (relocation mode)
+  Reuse space in old .text if possible (relocation mode)
 
 - `-v <uint>`
 
diff --git a/bolt/docs/RuntimeLibrary.md b/bolt/docs/RuntimeLibrary.md
index 58d9497..b969ebd 100644
--- a/bolt/docs/RuntimeLibrary.md
+++ b/bolt/docs/RuntimeLibrary.md
@@ -15,7 +15,7 @@ However, this approach quickly becomes awkward if we want to insert a lot of cod
 Currently, our runtime library is written in C++ and contains code that helps us instrument a binary.
 
 ### Limitations
-Our library is not written with regular C++ code as it is not linked against any other libraries (this means we cannnot rely on anything defined on libstdc++, glibc, libgcc etc), but is self sufficient. In runtime/CMakeLists.txt, we can see it is built with -ffreestanding, which requires the compiler to avoid using a runtime library by itself.
+Our library is not written with regular C++ code as it is not linked against any other libraries (this means we cannot rely on anything defined on libstdc++, glibc, libgcc etc), but is self sufficient. In runtime/CMakeLists.txt, we can see it is built with -ffreestanding, which requires the compiler to avoid using a runtime library by itself.
 
 While this requires us to make our own syscalls, it does simplify our linker a lot, which is very limited and can only do basic function name resolving. However, this is a big improvement in comparison with programmatically generating the code in assembly language using MCInsts.
 
diff --git a/bolt/docs/doxygen.cfg.in b/bolt/docs/doxygen.cfg.in
index 538285f..de8b1f7b 100644
--- a/bolt/docs/doxygen.cfg.in
+++ b/bolt/docs/doxygen.cfg.in
@@ -1070,7 +1070,7 @@ HTML_STYLESHEET        =
 # defined cascading style sheet that is included after the standard style sheets
 # created by doxygen. Using this option one can overrule certain style aspects.
 # This is preferred over using HTML_STYLESHEET since it does not replace the
-# standard style sheet and is therefor more robust against future updates.
+# standard style sheet and is therefore more robust against future updates.
 # Doxygen will copy the style sheet file to the output directory. For an example
 # see the documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index f5e9887..7b10b2d 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -1336,7 +1336,7 @@ public:
     ColdCodeSectionName = Name.str();
   }
 
-  /// Return true iif the function will halt execution on entry.
+  /// Return true if the function will halt execution on entry.
   bool trapsOnEntry() const { return TrapsOnEntry; }
 
   /// Make the function always trap on entry. Other than the trap instruction,
diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h
index 4c3c277..95e958f 100644
--- a/bolt/include/bolt/Core/DIEBuilder.h
+++ b/bolt/include/bolt/Core/DIEBuilder.h
@@ -60,7 +60,7 @@ public:
     uint32_t UnitLength = 0;
     bool IsConstructed = false;
     // A map of DIE offsets in original DWARF section to DIE ID.
-    // Whih is used to access DieInfoVector.
+    // Which is used to access DieInfoVector.
     std::unordered_map<uint64_t, uint32_t> DIEIDMap;
 
     // Some STL implementations don't have a noexcept move constructor for
diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h
index 8149789..7c8ea12ee 100644
--- a/bolt/include/bolt/Core/DebugData.h
+++ b/bolt/include/bolt/Core/DebugData.h
@@ -326,8 +326,8 @@ public:
   /// Write out entries in to .debug_addr section for CUs.
   virtual std::optional<uint64_t> finalize(const size_t BufferSize);
 
-  /// Return buffer with all the entries in .debug_addr already writen out using
-  /// update(...).
+  /// Return buffer with all the entries in .debug_addr already written out
+  /// using update(...).
   virtual std::unique_ptr<AddressSectionBuffer> releaseBuffer() {
     return std::move(Buffer);
   }
@@ -409,7 +409,7 @@ protected:
   std::mutex WriterMutex;
   std::unique_ptr<AddressSectionBuffer> Buffer;
   std::unique_ptr<raw_svector_ostream> AddressStream;
-  /// Used to track sections that were not modified so that they can be re-used.
+  /// Used to track sections that were not modified so that they can be reused.
   static DenseMap<uint64_t, uint64_t> UnmodifiedAddressOffsets;
 };
 
diff --git a/bolt/include/bolt/Core/DebugNames.h b/bolt/include/bolt/Core/DebugNames.h
index cc4e13a..4ec49ca 100644
--- a/bolt/include/bolt/Core/DebugNames.h
+++ b/bolt/include/bolt/Core/DebugNames.h
@@ -65,7 +65,7 @@ public:
   void setCurrentUnit(DWARFUnit &Unit, const uint64_t UnitStartOffset);
   /// Emit Accelerator table.
   void emitAccelTable();
-  /// Returns true if the table was crated.
+  /// Returns true if the table was created.
   bool isCreated() const { return NeedToCreate; }
   /// Returns buffer containing the accelerator table.
   std::unique_ptr<DebugBufferVector> releaseBuffer() {
@@ -91,7 +91,7 @@ private:
   uint64_t CurrentUnitOffset = 0;
   const DWARFUnit *CurrentUnit = nullptr;
   std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap;
-  /// Contains a map of TU hashes to a Foreign TU indecies.
+  /// Contains a map of TU hashes to a Foreign TU indices.
   /// This is used to reduce the size of Foreign TU list since there could be
   /// multiple TUs with the same hash.
   DenseMap<uint64_t, uint32_t> TUHashToIndexMap;
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 2772de7..d666c10 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -432,7 +432,7 @@ public:
     return Analysis->isConditionalBranch(Inst);
   }
 
-  /// Returns true if Inst is a condtional move instruction
+  /// Returns true if Inst is a conditional move instruction
   virtual bool isConditionalMove(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
     return false;
@@ -1564,7 +1564,7 @@ public:
   }
 
   /// Get the default def_in and live_out registers for the function
-  /// Currently only used for the Stoke optimzation
+  /// Currently only used for the Stoke optimization
   virtual void getDefaultDefIn(BitVector &Regs) const {
     llvm_unreachable("not implemented");
   }
diff --git a/bolt/include/bolt/Passes/FrameAnalysis.h b/bolt/include/bolt/Passes/FrameAnalysis.h
index d71c338..5ce85be 100644
--- a/bolt/include/bolt/Passes/FrameAnalysis.h
+++ b/bolt/include/bolt/Passes/FrameAnalysis.h
@@ -37,7 +37,7 @@ struct FrameIndexEntry {
   int64_t StackOffset;
   uint8_t Size;
 
-  /// If this is false, we will never atempt to remove or optimize this
+  /// If this is false, we will never attempt to remove or optimize this
   /// instruction. We just use it to keep track of stores we don't fully
   /// understand but we know it may write to a frame position.
   bool IsSimple;
diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h
index df3ea96..84da453 100644
--- a/bolt/include/bolt/Passes/LongJmp.h
+++ b/bolt/include/bolt/Passes/LongJmp.h
@@ -30,7 +30,7 @@ namespace bolt {
 /// 64-bit range, we guarantee it can reach any code location.
 ///
 class LongJmpPass : public BinaryFunctionPass {
-  /// Used to implement stub grouping (re-using a stub from one function into
+  /// Used to implement stub grouping (reusing a stub from one function into
   /// another)
   using StubTy = std::pair<uint64_t, BinaryBasicBlock *>;
   using StubGroupTy = SmallVector<StubTy, 4>;
diff --git a/bolt/include/bolt/Passes/ProfileQualityStats.h b/bolt/include/bolt/Passes/ProfileQualityStats.h
index 86fc88c..ee74b12 100644
--- a/bolt/include/bolt/Passes/ProfileQualityStats.h
+++ b/bolt/include/bolt/Passes/ProfileQualityStats.h
@@ -49,7 +49,7 @@
 // aggregates the block gaps into 2 values for the function: "weighted" is the
 // weighted average of the block conservation gaps, where the weights depend on
 // each block's execution count and instruction count; "worst" is the worst
-// (biggest) block gap acorss all basic blocks in the function with an execution
+// (biggest) block gap across all basic blocks in the function with an execution
 // count of > 500. The pass then reports the 95th percentile of the weighted and
 // worst values of the 1000 functions in a single BOLT-INFO line. The smaller
 // the reported values are, the better the BOLT profile satisfies the function
diff --git a/bolt/include/bolt/Passes/ReorderAlgorithm.h b/bolt/include/bolt/Passes/ReorderAlgorithm.h
index 95d9e83..42bb333 100644
--- a/bolt/include/bolt/Passes/ReorderAlgorithm.h
+++ b/bolt/include/bolt/Passes/ReorderAlgorithm.h
@@ -26,7 +26,7 @@ namespace bolt {
 
 /// Objects of this class implement various basic block clustering algorithms.
 /// Basic block clusters are chains of basic blocks that should be laid out
-/// in this order to maximize performace. These algorithms group basic blocks
+/// in this order to maximize performance. These algorithms group basic blocks
 /// into clusters using execution profile data and various heuristics.
 class ClusterAlgorithm {
 public:
diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h
index 6242456..cab346b 100644
--- a/bolt/include/bolt/Rewrite/DWARFRewriter.h
+++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h
@@ -128,7 +128,7 @@ private:
   CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
                                    GDBIndex &GDBIndexSection);
 
-  /// Process and write out CUs that are passsed in.
+  /// Process and write out CUs that are passed in.
   void finalizeCompileUnits(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
                             CUOffsetMap &CUMap,
                             const std::list<DWARFUnit *> &CUs,
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 206d8eef..46b3372 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -844,7 +844,7 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
     auto isSibling = std::bind(&BinaryContext::areRelatedFragments, this,
                                &Function, std::placeholders::_1);
     assert(llvm::all_of(JT->Parents, isSibling) &&
-           "cannot re-use jump table of a different function");
+           "cannot reuse jump table of a different function");
     (void)isSibling;
     if (opts::Verbosity > 2) {
       this->outs() << "BOLT-INFO: multiple fragments access the same jump table"
@@ -860,7 +860,7 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
     return JT->getFirstLabel();
   }
 
-  // Re-use the existing symbol if possible.
+  // Reuse the existing symbol if possible.
   MCSymbol *JTLabel = nullptr;
   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
     if (!isInternalSymbolName(Object->getSymbol()->getName()))
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 9687892..4dfd4ba 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -3875,7 +3875,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
     if (FunctionSymbol == Symbol)
       return 0;
 
-  // Check all secondary entries available as either basic blocks or lables.
+  // Check all secondary entries available as either basic blocks or labels.
   uint64_t NumEntries = 1;
   for (const BinaryBasicBlock *BB : BasicBlocks) {
     MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
diff --git a/bolt/lib/Core/BinaryFunctionCallGraph.cpp b/bolt/lib/Core/BinaryFunctionCallGraph.cpp
index f0c46a8..af22419 100644
--- a/bolt/lib/Core/BinaryFunctionCallGraph.cpp
+++ b/bolt/lib/Core/BinaryFunctionCallGraph.cpp
@@ -122,7 +122,7 @@ buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
       // create a node for a function unless it was the target of a call from
       // a hot block.  The alternative would be to set the count to one or
       // accumulate the number of calls from the callsite into the function
-      // samples.  Results from perfomance testing seem to favor the zero
+      // samples.  Results from performance testing seem to favor the zero
       // count though, so I'm leaving it this way for now.
       return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
     }
diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp
index 7ce55f9..5b628f6 100644
--- a/bolt/lib/Core/DIEBuilder.cpp
+++ b/bolt/lib/Core/DIEBuilder.cpp
@@ -137,7 +137,7 @@ void DIEBuilder::updateReferences() {
                                   DIEInteger(NewAddr));
   }
 
-  // Handling referenes in location expressions.
+  // Handling references in location expressions.
   for (LocWithReference &LocExpr : getState().LocWithReferencesToProcess) {
     SmallVector<uint8_t, 32> Buffer;
     DataExtractor Data(StringRef((const char *)LocExpr.BlockData.data(),
@@ -336,7 +336,7 @@ void DIEBuilder::buildCompileUnits(const bool Init) {
     registerUnit(*DU, false);
   }
 
-  // Using DULIst since it can be modified by cross CU refrence resolution.
+  // Using DULIst since it can be modified by cross CU reference resolution.
   for (DWARFUnit *DU : getState().DUList) {
     if (DU->isTypeUnit())
       continue;
@@ -508,7 +508,7 @@ void DIEBuilder::finish() {
     UnitStartOffset += CurUnitInfo.UnitLength;
   };
   // Computing offsets for .debug_types section.
-  // It's processed first when CU is registered so will be at the begginnig of
+  // It's processed first when CU is registered so will be at the beginning of
   // the vector.
   uint64_t TypeUnitStartOffset = 0;
   for (DWARFUnit *CU : getState().DUList) {
diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp
index e05f28f..24a4c85 100644
--- a/bolt/lib/Core/DebugData.cpp
+++ b/bolt/lib/Core/DebugData.cpp
@@ -876,7 +876,7 @@ void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit,
   DIEValue StrListBaseAttrInfo =
       Die.findAttribute(dwarf::DW_AT_str_offsets_base);
   auto RetVal = ProcessedBaseOffsets.find(*Val);
-  // Handling re-use of str-offsets section.
+  // Handling reuse of str-offsets section.
   if (RetVal == ProcessedBaseOffsets.end() || StrOffsetSectionWasModified) {
     initialize(Unit);
     // Update String Offsets that were modified.
@@ -1167,7 +1167,7 @@ void DwarfLineTable::emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
 // For functions that we do not modify we output them as raw data.
 // Re-constructing .debug_line_str so that offsets are correct for those
 // debug line tables.
-// Bonus is that when we output a final binary we can re-use .debug_line_str
+// Bonus is that when we output a final binary we can reuse .debug_line_str
 // section. So we don't have to do the SHF_ALLOC trick we did with
 // .debug_line.
 static void parseAndPopulateDebugLineStr(BinarySection &LineStrSection,
diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp
index a9d98a6..6be2c5a 100644
--- a/bolt/lib/Core/DebugNames.cpp
+++ b/bolt/lib/Core/DebugNames.cpp
@@ -55,7 +55,7 @@ DWARF5AcceleratorTable::DWARF5AcceleratorTable(
           llvm::hash_value(llvm::StringRef(CStr)), StrOffset);
       if (!R.second)
         BC.errs()
-            << "BOLT-WARNING: [internal-dwarf-error]: collision occured on "
+            << "BOLT-WARNING: [internal-dwarf-error]: collision occurred on "
             << CStr << " at offset : 0x" << Twine::utohexstr(StrOffset)
             << ". Previous string offset is: 0x"
             << Twine::utohexstr(R.first->second) << ".\n";
@@ -86,7 +86,7 @@ void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit,
   if (Unit.isTypeUnit()) {
     if (DWOID) {
       // We adding an entry for a DWO TU. The DWO CU might not have any entries,
-      // so need to add it to the list pre-emptively.
+      // so need to add it to the list preemptively.
       auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
       if (Iter.second)
         CUList.push_back(BADCUOFFSET);
diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp
index c3ddeda..5d21bdb 100644
--- a/bolt/lib/Passes/Aligner.cpp
+++ b/bolt/lib/Passes/Aligner.cpp
@@ -60,7 +60,7 @@ namespace llvm {
 namespace bolt {
 
 // Align function to the specified byte-boundary (typically, 64) offsetting
-// the fuction by not more than the corresponding value
+// the function by not more than the corresponding value
 static void alignMaxBytes(BinaryFunction &Function) {
   Function.setAlignment(opts::AlignFunctions);
   Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
@@ -68,7 +68,7 @@ static void alignMaxBytes(BinaryFunction &Function) {
 }
 
 // Align function to the specified byte-boundary (typically, 64) offsetting
-// the fuction by not more than the minimum over
+// the function by not more than the minimum over
 // -- the size of the function
 // -- the specified number of bytes
 static void alignCompact(BinaryFunction &Function,
diff --git a/bolt/lib/Passes/FrameAnalysis.cpp b/bolt/lib/Passes/FrameAnalysis.cpp
index f568039..0b26da3 100644
--- a/bolt/lib/Passes/FrameAnalysis.cpp
+++ b/bolt/lib/Passes/FrameAnalysis.cpp
@@ -198,7 +198,7 @@ public:
         if (CFIStack.empty())
           dbgs() << "Assertion is about to fail: " << BF.getPrintName() << "\n";
         assert(!CFIStack.empty() && "Corrupt CFI stack");
-        std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
+        std::pair<int64_t, uint16_t> Elem = CFIStack.top();
         CFIStack.pop();
         CfaOffset = Elem.first;
         CfaReg = Elem.second;
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 60349f1..0859cd2 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -145,7 +145,7 @@ void RegReAssign::rankRegisters(BinaryFunction &Function) {
       const bool CannotUseREX = BC.MIB->cannotUseREX(Inst);
       const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode());
 
-      // Disallow substituitions involving regs in implicit uses lists
+      // Disallow substitutions involving regs in implicit uses lists
       for (MCPhysReg ImplicitUse : Desc.implicit_uses()) {
         const size_t RegEC =
             BC.MIB->getAliases(ImplicitUse, false).find_first();
@@ -153,7 +153,7 @@ void RegReAssign::rankRegisters(BinaryFunction &Function) {
             std::numeric_limits<decltype(RegScore)::value_type>::min();
       }
 
-      // Disallow substituitions involving regs in implicit defs lists
+      // Disallow substitutions involving regs in implicit defs lists
       for (MCPhysReg ImplicitDef : Desc.implicit_defs()) {
         const size_t RegEC =
             BC.MIB->getAliases(ImplicitDef, false).find_first();
@@ -174,7 +174,7 @@ void RegReAssign::rankRegisters(BinaryFunction &Function) {
         if (RegEC == 0)
           continue;
 
-        // Disallow substituitions involving regs in instrs that cannot use REX
+        // Disallow substitutions involving regs in instrs that cannot use REX
         // The relationship of X86 registers is shown in the diagram. BL and BH
         // do not have a direct alias relationship. However, if the BH register
         // cannot be swapped, then the BX/EBX/RBX registers cannot be swapped as
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp
index 4ea60f3..fe342cc 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -402,7 +402,7 @@ void StackLayoutModifier::classifyCFIs() {
         break;
       case MCCFIInstruction::OpRestoreState: {
         assert(!CFIStack.empty() && "Corrupt CFI stack");
-        std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
+        std::pair<int64_t, uint16_t> Elem = CFIStack.top();
         CFIStack.pop();
         CfaOffset = Elem.first;
         CfaReg = Elem.second;
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index eab669b..66a373a 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -386,7 +386,7 @@ private:
   }
 
   /// Compute sum of scores over jumps within \p BlockOrder given \p SplitIndex.
-  /// Increament Score.LocalScore in place by the sum.
+  /// Increment Score.LocalScore in place by the sum.
   void computeJumpScore(const BasicBlockOrder &BlockOrder,
                         const size_t SplitIndex, SplitScore &Score) {
 
@@ -413,7 +413,7 @@ private:
   }
 
   /// Compute sum of scores over calls originated in the current function
-  /// given \p SplitIndex. Increament Score.LocalScore in place by the sum.
+  /// given \p SplitIndex. Increment Score.LocalScore in place by the sum.
   void computeLocalCallScore(const BasicBlockOrder &BlockOrder,
                              const size_t SplitIndex, SplitScore &Score) {
     if (opts::CallScale == 0)
@@ -455,7 +455,7 @@ private:
   }
 
   /// Compute sum of splitting scores for cover calls of the input function.
-  /// Increament Score.CoverCallScore in place by the sum.
+  /// Increment Score.CoverCallScore in place by the sum.
   void computeCoverCallScore(const BasicBlockOrder &BlockOrder,
                              const size_t SplitIndex,
                              const std::vector<CallInfo> &CoverCalls,
@@ -467,7 +467,7 @@ private:
       assert(CI.Length >= Score.HotSizeReduction &&
              "Length of cover calls must exceed reduced size of hot fragment.");
       // Compute the new length of the call, which is shorter than the original
-      // one by the size of the splitted fragment minus the total size increase.
+      // one by the size of the split fragment minus the total size increase.
       const size_t NewCallLength = CI.Length - Score.HotSizeReduction;
       Score.CoverCallScore += computeCallScore(CI.Count, NewCallLength);
     }
@@ -502,12 +502,12 @@ private:
 
     // First part of LocalScore is the sum over call edges originated in the
     // input function. These edges can get shorter or longer depending on
-    // SplitIndex. Score.LocalScore is increamented in place.
+    // SplitIndex. Score.LocalScore is incremented in place.
     computeLocalCallScore(BlockOrder, SplitIndex, Score);
 
     // Second part of LocalScore is the sum over jump edges with src basic block
     // and dst basic block in the current function. Score.LocalScore is
-    // increamented in place.
+    // incremented in place.
     computeJumpScore(BlockOrder, SplitIndex, Score);
 
     // Compute CoverCallScore and store in Score in place.
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index afe2421..277d4bb 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -907,7 +907,7 @@ ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
   StringRef NumStr = NumStrRes.get();
   uint64_t Num;
   if (NumStr.getAsInteger(16, Num)) {
-    reportError("expected hexidecimal number");
+    reportError("expected hexadecimal number");
     Diag << "Found: " << NumStr << "\n";
     return make_error_code(llvm::errc::io_error);
   }
diff --git a/bolt/lib/Rewrite/BuildIDRewriter.cpp b/bolt/lib/Rewrite/BuildIDRewriter.cpp
index d50416f..706a3d0 100644
--- a/bolt/lib/Rewrite/BuildIDRewriter.cpp
+++ b/bolt/lib/Rewrite/BuildIDRewriter.cpp
@@ -48,7 +48,7 @@ public:
 };
 
 Error BuildIDRewriter::sectionInitializer() {
-  // Typically, build ID will reside in .note.gnu.build-id section. Howerver,
+  // Typically, build ID will reside in .note.gnu.build-id section. However,
   // a linker script can change the section name and such is the case with
   // the Linux kernel. Hence, we iterate over all note sections.
   for (BinarySection &NoteSection : BC.sections()) {
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 7366d2a..a7a4b66 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -1723,7 +1723,7 @@ StringRef getSectionName(const SectionRef &Section) {
   return Name;
 }
 
-// Exctracts an appropriate slice if input is DWP.
+// Extracts an appropriate slice if input is DWP.
 // Applies patches or overwrites the section.
 std::optional<StringRef> updateDebugData(
     DWARFContext &DWCtx, StringRef SectionName, StringRef SectionContents,
@@ -1759,7 +1759,7 @@ std::optional<StringRef> updateDebugData(
     auto Iter = OverridenSections.find(Kind);
     if (Iter == OverridenSections.end()) {
       errs()
-          << "BOLT-WARNING: [internal-dwarf-error]: Could not find overriden "
+          << "BOLT-WARNING: [internal-dwarf-error]: Could not find overridden "
              "section for: "
           << Twine::utohexstr(DWOId) << ".\n";
       return std::nullopt;
@@ -1991,7 +1991,7 @@ void DWARFRewriter::convertToRangesPatchDebugInfo(
     }
   }
 
-  // HighPC was conveted into DW_AT_ranges.
+  // HighPC was converted into DW_AT_ranges.
   // For DWARF5 we only access ranges through index.
 
   DIEBldr.replaceValue(&Die, HighPCAttrInfo.getAttribute(), dwarf::DW_AT_ranges,
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index ee021fe..947d899 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -308,7 +308,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
     Contents.append(OSE.str().begin(), OSE.str().end());
   };
 
-  // Emit indiviual pseudo probes in a inline tree node
+  // Emit individual pseudo probes in a inline tree node
   // Probe index, type, attribute, address type and address are encoded
   // Address of the first probe is absolute.
   // Other probes' address are represented by delta
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index a0e7995..9580163 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1087,7 +1087,7 @@ void RewriteInstance::discoverFileObjects() {
 
     if (SymbolAddress == Section->getAddress() + Section->getSize()) {
       assert(SymbolSize == 0 &&
-             "unexpect non-zero sized symbol at end of section");
+             "unexpected non-zero sized symbol at end of section");
       LLVM_DEBUG(
           dbgs()
           << "BOLT-DEBUG: rejecting as symbol points to end of its section\n");
@@ -2440,7 +2440,7 @@ void RewriteInstance::processDynamicRelocations() {
   }
 
   // The rest of dynamic relocations - DT_RELA.
-  // The static executable might have .rela.dyn secion and not have PT_DYNAMIC
+  // The static executable might have .rela.dyn section and not have PT_DYNAMIC
   if (!DynamicRelocationsSize && BC->IsStaticExecutable) {
     ErrorOr<BinarySection &> DynamicRelSectionOrErr =
         BC->getUniqueSectionByName(getRelaDynSectionName());
@@ -5017,7 +5017,7 @@ void RewriteInstance::updateELFSymbolTable(
     if (!Section)
       return false;
 
-    // Remove the section symbol iif the corresponding section was stripped.
+    // Remove the section symbol if the corresponding section was stripped.
     if (Symbol.getType() == ELF::STT_SECTION) {
       if (!getNewSectionIndex(Symbol.st_shndx))
         return true;
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index df4f421..6954cb2 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1296,7 +1296,7 @@ public:
     AArch64_AM::ShiftExtendType ExtendType =
         AArch64_AM::getArithExtendType(OperandExtension);
     if (ShiftVal != 2) {
-      // TODO: Handle the patten where ShiftVal != 2.
+      // TODO: Handle the pattern where ShiftVal != 2.
       // The following code sequence below has no shift amount,
       // the range could be 0 to 4.
       // The pattern comes from libc, it occurs when the binary is static.
@@ -1626,7 +1626,7 @@ public:
   int getUncondBranchEncodingSize() const override { return 28; }
 
   // This helper function creates the snippet of code that compares a register
-  // RegNo with an immedaite Imm, and jumps to Target if they are equal.
+  // RegNo with an immediate Imm, and jumps to Target if they are equal.
   // cmp RegNo, #Imm
   // b.eq Target
   // where cmp is an alias for subs, which results in the code below:
@@ -1648,7 +1648,7 @@ public:
   }
 
   // This helper function creates the snippet of code that compares a register
-  // RegNo with an immedaite Imm, and jumps to Target if they are not equal.
+  // RegNo with an immediate Imm, and jumps to Target if they are not equal.
   // cmp RegNo, #Imm
   // b.ne Target
   // where cmp is an alias for subs, which results in the code below:
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 9026a9d..5fca5e8 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -2715,7 +2715,7 @@ public:
 
     bool FoundOne = false;
 
-    // Iterate only through src operands that arent also dest operands
+    // Iterate only through src operands that aren't also dest operands
     for (unsigned Index = InstDesc.getNumDefs() + (HasLHS ? 1 : 0),
                   E = InstDesc.getNumOperands();
          Index != E; ++Index) {
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 095612a..5be04d2 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -285,7 +285,7 @@ cl::opt<bool> TimeRewrite("time-rewrite",
 
 cl::opt<bool> UseOldText(
     "use-old-text",
-    cl::desc("re-use space in old .text if possible (relocation mode)"),
+    cl::desc("reuse space in old .text if possible (relocation mode)"),
     cl::cat(BoltCategory));
 
 cl::opt<bool> UpdateDebugSections(
diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp
index 672b042..de89630 100644
--- a/bolt/runtime/hugify.cpp
+++ b/bolt/runtime/hugify.cpp
@@ -24,7 +24,7 @@
   {}
 #endif
 
-// Function constains trampoline to _start,
+// Function constrains trampoline to _start,
 // so we can resume regular execution of the function that we hooked.
 extern void __bolt_hugify_start_program();
 
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
index 1f54a50..f586db2 100644
--- a/bolt/runtime/instr.cpp
+++ b/bolt/runtime/instr.cpp
@@ -214,7 +214,7 @@ private:
 /// __bolt_instr_setup, our initialization routine.
 BumpPtrAllocator *GlobalAlloc;
 
-// Base address which we substract from recorded PC values when searching for
+// Base address which we subtract from recorded PC values when searching for
 // indirect call description entries. Needed because indCall descriptions are
 // mapped read-only and contain static addresses. Initialized in
 // __bolt_instr_setup.
@@ -261,7 +261,7 @@ struct SimpleHashTableEntryBase {
     // Currently we have to do it the ugly way because
     // we want every message to be printed atomically via a single call to
     // __write. If we use reportNumber() and others nultiple times, we'll get
-    // garbage in mulithreaded environment
+    // garbage in multithreaded environment
     char Buf[BufSize];
     char *Ptr = Buf;
     Ptr = intToStr(Ptr, __getpid(), 10);
@@ -1585,7 +1585,7 @@ __bolt_instr_data_dump(int FD, const char *LibPath = nullptr,
 /// at user-specified intervals
 void watchProcess() {
   timespec ts, rem;
-  uint64_t Ellapsed = 0ull;
+  uint64_t Elapsed = 0ull;
   int FD = openProfile();
   uint64_t ppid;
   if (__bolt_instr_wait_forks) {
@@ -1615,10 +1615,10 @@ void watchProcess() {
       break;
     }
 
-    if (++Ellapsed < __bolt_instr_sleep_time)
+    if (++Elapsed < __bolt_instr_sleep_time)
       continue;
 
-    Ellapsed = 0;
+    Elapsed = 0;
     __bolt_instr_data_dump(FD);
     if (__bolt_instr_no_counters_clear == false)
       __bolt_instr_clear_counters();
diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h
index 77c9cfc..b1d04f9 100644
--- a/bolt/runtime/sys_aarch64.h
+++ b/bolt/runtime/sys_aarch64.h
@@ -41,7 +41,7 @@
 // Anonymous namespace covering everything but our library entry point
 namespace {
 
-// Get the difference between runtime addrress of .text section and
+// Get the difference between runtime address of .text section and
 // static address in section header table. Can be extracted from arbitrary
 // pc value recorded at runtime to get the corresponding static address, which
 // in turn can be used to search for indirect call description. Needed because
diff --git a/bolt/runtime/sys_riscv64.h b/bolt/runtime/sys_riscv64.h
index 00a21e4..442fa2e0 100644
--- a/bolt/runtime/sys_riscv64.h
+++ b/bolt/runtime/sys_riscv64.h
@@ -105,7 +105,7 @@
 // Anonymous namespace covering everything but our library entry point
 namespace {
 
-// Get the difference between runtime addrress of .text section and
+// Get the difference between runtime address of .text section and
 // static address in section header table. Can be extracted from arbitrary
 // pc value recorded at runtime to get the corresponding static address, which
 // in turn can be used to search for indirect call description. Needed because
diff --git a/bolt/runtime/sys_x86_64.h b/bolt/runtime/sys_x86_64.h
index ca2c693..933e939 100644
--- a/bolt/runtime/sys_x86_64.h
+++ b/bolt/runtime/sys_x86_64.h
@@ -40,7 +40,7 @@
 
 namespace {
 
-// Get the difference between runtime addrress of .text section and
+// Get the difference between runtime address of .text section and
 // static address in section header table. Can be extracted from arbitrary
 // pc value recorded at runtime to get the corresponding static address, which
 // in turn can be used to search for indirect call description. Needed because
@@ -171,8 +171,9 @@ uint64_t __exit(uint64_t code) {
 #if !defined(__APPLE__)
 // We use a stack-allocated buffer for string manipulation in many pieces of
 // this code, including the code that prints each line of the fdata file. This
-// buffer needs to accomodate large function names, but shouldn't be arbitrarily
-// large (dynamically allocated) for simplicity of our memory space usage.
+// buffer needs to accommodate large function names, but shouldn't be
+// arbitrarily large (dynamically allocated) for simplicity of our memory space
+// usage.
 
 // Declare some syscall wrappers we use throughout this code to avoid linking
 // against system libc.
diff --git a/bolt/test/AArch64/constant-island-alignment.s b/bolt/test/AArch64/constant-island-alignment.s
index 957c470..99fe733 100644
--- a/bolt/test/AArch64/constant-island-alignment.s
+++ b/bolt/test/AArch64/constant-island-alignment.s
@@ -3,7 +3,7 @@
 # RUN: split-file %s %t
 
 // For the first test case, in case the nop before .Lci will be removed
-// the pointer to exit function won't be alinged and the test will fail.
+// the pointer to exit function won't be aligned and the test will fail.
 
 # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
 # RUN:   %t/xword_align.s -o %t_xa.o
diff --git a/bolt/test/AArch64/ifunc.test b/bolt/test/AArch64/ifunc.test
index 3da42c6..15ecc35 100644
--- a/bolt/test/AArch64/ifunc.test
+++ b/bolt/test/AArch64/ifunc.test
@@ -9,7 +9,7 @@
 // RUN:   FileCheck --check-prefix=REL_CHECK %s
 
 // Non-pie static executable doesn't generate PT_DYNAMIC, check relocation
-// is readed successfully and IPLT trampoline has been identified by bolt.
+// is read successfully and IPLT trampoline has been identified by bolt.
 // RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -no-pie \
 // RUN:   -o %t.O3_nopie.exe -Wl,-q
 // RUN: llvm-readelf -l %t.O3_nopie.exe | \
diff --git a/bolt/test/X86/cdsplit-call-scale.s b/bolt/test/X86/cdsplit-call-scale.s
index 66f3003..caa11b6 100644
--- a/bolt/test/X86/cdsplit-call-scale.s
+++ b/bolt/test/X86/cdsplit-call-scale.s
@@ -1,8 +1,8 @@
 ## Test the control of aggressiveness of 3-way splitting by -call-scale.
-## When -call-scale=0.0, the tested function is 2-way splitted.
-## When -call-scale=1.0, the tested function is 3-way splitted with 5 blocks
+## When -call-scale=0.0, the tested function is 2-way split.
+## When -call-scale=1.0, the tested function is 3-way split with 5 blocks
 ## in warm because of the increased benefit of shortening the call edges.
-## When -call-scale=1000.0, the tested function is still 3-way splitted with
+## When -call-scale=1000.0, the tested function is still 3-way split with
 ## 5 blocks in warm because cdsplit does not allow hot-warm splitting to break
 ## a fall through branch from a basic block to its most likely successor.
 
diff --git a/bolt/test/X86/dwarf5-two-cu-str-offset-table.test b/bolt/test/X86/dwarf5-two-cu-str-offset-table.test
index e59664e..488635b 100644
--- a/bolt/test/X86/dwarf5-two-cu-str-offset-table.test
+++ b/bolt/test/X86/dwarf5-two-cu-str-offset-table.test
@@ -8,7 +8,7 @@
 # RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt >> %t.txt
 # RUN: cat %t.txt | FileCheck --check-prefix=CHECK %s
 
-## This test checks we correclty re-renerate .debug_str_offsets.
+## This test checks we correctly re-renerate .debug_str_offsets.
 
 # CHECK: .debug_str_offsets contents
 # CHECK-NEXT: 0x00000000: Contribution size = 52, Format = DWARF32, Version = 5
diff --git a/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
index dc6255f..0cb62ed 100644
--- a/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
+++ b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
@@ -7,7 +7,7 @@
 # RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.exe | FileCheck -check-prefix=PRE-BOLT %s
 # RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt | FileCheck -check-prefix=POST-BOLT %s
 
-## This test checks we correclty re-generate .debug_str_offsets when there are type units that have an offset not shared with CU.
+## This test checks we correctly re-generate .debug_str_offsets when there are type units that have an offset not shared with CU.
 
 # PRE-BOLT: .debug_str_offsets contents
 # PRE-BOLT-NEXT: Contribution size = 24, Format = DWARF32, Version = 5
diff --git a/bolt/test/X86/jt-symbol-disambiguation-3.s b/bolt/test/X86/jt-symbol-disambiguation-3.s
index 22b34ce..c06fd3b 100644
--- a/bolt/test/X86/jt-symbol-disambiguation-3.s
+++ b/bolt/test/X86/jt-symbol-disambiguation-3.s
@@ -1,6 +1,6 @@
 ## In this test case, we reproduce the behavior seen in gcc where the
 ## base address of a jump table is decremented by some number and ends up
-## at the exact addess of a jump table from another function. After
+## at the exact address of a jump table from another function. After
 ## linking, the instruction references another jump table and that
 ## confuses BOLT.
 ## We repro here the following issue:
@@ -28,7 +28,7 @@
 # ----
 # Func foo contains a jump table whose start is colocated with a
 # jump table reference in another function. However, the other function
-# does not use the first entries of it and is merely doing arithmetics
+# does not use the first entries of it and is merely doing arithmetic
 # to save the creation of unused first entries.
 # ----
   .globl foo
diff --git a/bolt/test/X86/split-landing-pad.s b/bolt/test/X86/split-landing-pad.s
index 681f14f..149193d 100644
--- a/bolt/test/X86/split-landing-pad.s
+++ b/bolt/test/X86/split-landing-pad.s
@@ -1,5 +1,5 @@
 ## This test reproduces the case where C++ exception handling is used and split
-## function optimization is enabled. In particular, function foo is splitted
+## function optimization is enabled. In particular, function foo is split
 ## to two fragments:
 ##    foo: contains 2 try blocks, which invokes bar to throw exception
 ##    foo.cold.1: contains 2 corresponding catch blocks (landing pad)
diff --git a/bolt/test/runtime/X86/asm-dump.c b/bolt/test/runtime/X86/asm-dump.c
index 7656fda..fa0de9b 100644
--- a/bolt/test/runtime/X86/asm-dump.c
+++ b/bolt/test/runtime/X86/asm-dump.c
@@ -30,7 +30,7 @@
  * Reconstruct fdata
  * RUN: link_fdata %t/main.s %t.o %t.fdata.reconst
  *
- * XXX: reenable once dumping data is supported
+ * XXX: re-enable once dumping data is supported
  * Check if reoptimized file produces the same results
  * dontrun: %t.exe.reopt > %t.result.reopt
  * dontrun: cmp %t.result %t.result.reopt
diff --git a/bolt/test/runtime/wait_file.sh b/bolt/test/runtime/wait_file.sh
index 42d4c5b..7346476 100644
--- a/bolt/test/runtime/wait_file.sh
+++ b/bolt/test/runtime/wait_file.sh
@@ -12,7 +12,7 @@ check_file() {
 
     fuser -s "$file"
     local ret=$?
-    if [ $ret -eq 1 ]; then # noone has file open
+    if [ $ret -eq 1 ]; then # no one has file open
         return 0
     fi
     if [ $ret -eq 0 ]; then # file open by some processes
diff --git a/bolt/utils/bughunter.sh b/bolt/utils/bughunter.sh
index c5dddc4..d5ce059 100755
--- a/bolt/utils/bughunter.sh
+++ b/bolt/utils/bughunter.sh
@@ -28,7 +28,7 @@
 #
 #   TIMEOUT_OR_CMD    - optional timeout or command on optimized binary command
 #                       if the value is a number with an optional trailing letter
-#                       [smhd] it is considered a paramter to "timeout",
+#                       [smhd] it is considered a parameter to "timeout",
 #                       otherwise it's a shell command that wraps the optimized
 #                       binary command.
 #
diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp
index 799c64b..5d5388e 100644
--- a/clang-tools-extra/clangd/FindTarget.cpp
+++ b/clang-tools-extra/clangd/FindTarget.cpp
@@ -1040,8 +1040,8 @@ private:
     if (auto *S = N.get<Stmt>())
       return refInStmt(S, Resolver);
     if (auto *NNSL = N.get<NestedNameSpecifierLoc>()) {
-      if (auto TL = NNSL->getAsTypeLoc())
-        return refInTypeLoc(NNSL->getAsTypeLoc(), Resolver);
+      if (TypeLoc TL = NNSL->getAsTypeLoc())
+        return refInTypeLoc(TL, Resolver);
       // (!) 'DeclRelation::Alias' ensures we do not lose namespace aliases.
       NestedNameSpecifierLoc Qualifier = NNSL->getAsNamespaceAndPrefix().Prefix;
       SourceLocation NameLoc = NNSL->getLocalBeginLoc();
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index db2b0f6..edb872c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -435,6 +435,7 @@ Bug Fixes to C++ Support
 - Suppress ``-Wdeprecated-declarations`` in implicitly generated functions. (#GH147293)
 - Fix a crash when deleting a pointer to an incomplete array (#GH150359).
 - Fixed a mismatched lambda scope bug when propagating up ``consteval`` within nested lambdas. (#GH145776)
+- Disallow immediate escalation in destructors. (#GH109096)
 - Fix an assertion failure when expression in assumption attribute
   (``[[assume(expr)]]``) creates temporary objects.
 - Fix the dynamic_cast to final class optimization to correctly handle
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 2d10489..01f0b27 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2780,6 +2780,25 @@ usual build cycle when using sample profilers for optimization:
        /clang:-fdebug-info-for-profiling /clang:-funique-internal-linkage-names ^
        code.cc /Fe:code -fuse-ld=lld /link /debug:dwarf
 
+   [OPTIONAL] Pseudo instrumentation can be used as the anchor for accurate
+   profile mapping with the ``-fpseudo-probe-for-profiling`` option.
+
+   On Linux:
+
+   .. code-block:: console
+
+     $ clang++ -O2 -gline-tables-only \
+       -fpseudo-probe-for-profiling -funique-internal-linkage-names \
+       code.cc -o code
+
+   On Windows:
+
+   .. code-block:: winbatch
+
+     > clang-cl /O2 -gdwarf -gline-tables-only ^
+       -fpseudo-probe-for-profiling /clang:-funique-internal-linkage-names ^
+       code.cc /Fe:code -fuse-ld=lld /link /debug:dwarf
+
 .. note::
 
    :ref:`-funique-internal-linkage-names <funique_internal_linkage_names>`
@@ -2881,6 +2900,25 @@ usual build cycle when using sample profilers for optimization:
        /clang:-fdebug-info-for-profiling /clang:-funique-internal-linkage-names ^
        -fprofile-sample-use=code.prof code.cc /Fe:code
 
+   [OPTIONAL] Pseudo instrumentation can be used as the anchor for accurate
+   profile mapping with the ``-fpseudo-probe-for-profiling`` option.
+
+   On Linux:
+
+   .. code-block:: console
+
+     $ clang++ -O2 \
+        -fpseudo-probe-for-profiling -funique-internal-linkage-names \
+       -fprofile-sample-use=code.prof code.cc -o code
+
+   On Windows:
+
+   .. code-block:: winbatch
+
+     > clang-cl /O2 ^
+       -fpseudo-probe-for-profiling /clang:-funique-internal-linkage-names ^
+       -fprofile-sample-use=code.prof code.cc /Fe:code
+
    [OPTIONAL] Sampling-based profiles can have inaccuracies or missing block/
    edge counters. The profile inference algorithm (profi) can be used to infer
    missing blocks and edge counts, and improve the quality of profile data.
diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index 1e351f3..83f2b18 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -1301,46 +1301,25 @@ struct OpenACCReductionRecipe {
   // AST), or in a separate collection when being semantically analyzed.
   llvm::ArrayRef<CombinerRecipe> CombinerRecipes;
 
+  bool isSet() const { return AllocaDecl; }
+
+private:
+  friend class OpenACCReductionClause;
   OpenACCReductionRecipe(VarDecl *A, llvm::ArrayRef<CombinerRecipe> Combiners)
       : AllocaDecl(A), CombinerRecipes(Combiners) {}
-
-  bool isSet() const { return AllocaDecl; }
 };
 
 // A version of the above that is used for semantic analysis, at a time before
 // the OpenACCReductionClause node has been created.  This one has storage for
 // the CombinerRecipe, since Trailing storage for it doesn't exist yet.
-struct OpenACCReductionRecipeWithStorage : OpenACCReductionRecipe {
-private:
-  llvm::SmallVector<CombinerRecipe, 1> CombinerRecipeStorage;
-
-public:
-  OpenACCReductionRecipeWithStorage(VarDecl *A,
-                                    llvm::ArrayRef<CombinerRecipe> Combiners)
-      : OpenACCReductionRecipe(A, {}), CombinerRecipeStorage(Combiners) {
-    CombinerRecipes = CombinerRecipeStorage;
-  }
+struct OpenACCReductionRecipeWithStorage {
+  VarDecl *AllocaDecl;
+  llvm::SmallVector<OpenACCReductionRecipe::CombinerRecipe, 1> CombinerRecipes;
 
   OpenACCReductionRecipeWithStorage(
-      const OpenACCReductionRecipeWithStorage &Other)
-      : OpenACCReductionRecipe(Other),
-        CombinerRecipeStorage(Other.CombinerRecipeStorage) {
-    CombinerRecipes = CombinerRecipeStorage;
-  }
-
-  OpenACCReductionRecipeWithStorage(OpenACCReductionRecipeWithStorage &&Other)
-      : OpenACCReductionRecipe(std::move(Other)),
-        CombinerRecipeStorage(std::move(Other.CombinerRecipeStorage)) {
-    CombinerRecipes = CombinerRecipeStorage;
-  }
-
-  // There is no real problem implementing these, we just have to make sure the
-  // array-ref this inherits from stays in sync. But as we don't need it at the
-  // moment, make sure we don't accidentially call these.
-  OpenACCReductionRecipeWithStorage &
-  operator=(OpenACCReductionRecipeWithStorage &&) = delete;
-  OpenACCReductionRecipeWithStorage &
-  operator=(const OpenACCReductionRecipeWithStorage &) = delete;
+      VarDecl *A,
+      llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> Combiners)
+      : AllocaDecl(A), CombinerRecipes(Combiners) {}
 
   static OpenACCReductionRecipeWithStorage Empty() {
     return OpenACCReductionRecipeWithStorage(/*AllocaDecl=*/nullptr, {});
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index b197d81..006a453 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -110,19 +110,20 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
   }
 
   let Features = "sse3" in {
-    foreach Op = ["addsub", "hadd", "hsub"] in {
+    foreach Op = ["addsub"] in {
       def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
       def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
     }
   }
 
-  let Features = "ssse3" in {
-    foreach Op = ["phadd", "phsub"] in {
-      def Op#w128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
-      def Op#sw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
-      def Op#d128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+    foreach Op = ["hadd", "hsub"] in {
+      def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
+      def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
     }
+  }
 
+  let Features = "ssse3" in {
     def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
     def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
     def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
@@ -137,7 +138,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
 
 // AVX
 let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in {
-  foreach Op = ["addsub", "hadd", "hsub", "max", "min"] in {
+  foreach Op = ["addsub", "max", "min"] in {
     def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
     def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
   }
@@ -316,6 +317,14 @@ let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
   def palignr128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant int)">;
 }
 
+let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+  foreach Op = ["phadd", "phsub"] in {
+    def Op#w128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+    def Op#sw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+    def Op#d128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  }
+}
+
 let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
   def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
   def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
@@ -515,6 +524,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
   def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">;
   def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">;
   def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">;
+
+  foreach Op = ["hadd", "hsub"] in {
+    def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
+    def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
+  }
 }
 
 let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
@@ -592,12 +606,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
 let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
   def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
   def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
-  def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
-  def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
-  def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
-  def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
-  def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
-  def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+
   def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
   def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
   def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
@@ -666,6 +675,13 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
   def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
   def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
 
+  def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+  def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+  def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  
   def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
   def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
   def pshufd256  : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
@@ -2121,24 +2137,18 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
   def movdqa64store256_mask : X86Builtin<"void(_Vector<4, long long int *>, _Vector<4, long long int>, unsigned char)">;
 }
 
-let Features = "avx512ifma", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512ifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def vpmadd52huq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
   def vpmadd52luq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
 }
 
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpmadd52huq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
   def vpmadd52luq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
 }
 
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
   def vpmadd52luq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
 }
 
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index ceb16174..ea73ed9 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1211,6 +1211,25 @@ public:
       TiedOperand = N;
       // Don't copy Name or constraint string.
     }
+
+    // For output operand constraints, the target can set bounds to indicate
+    // that the result value is guaranteed to fall within a certain range.
+    // This will cause corresponding assertions to be emitted that will allow
+    // for potential optimization based of that guarantee.
+    //
+    // NOTE: This re-uses the `ImmRange` fields to store the range, which are
+    // otherwise unused for constraint types used for output operands.
+    void setOutputOperandBounds(unsigned Min, unsigned Max) {
+      ImmRange.Min = Min;
+      ImmRange.Max = Max;
+      ImmRange.isConstrained = true;
+    }
+    std::optional<std::pair<unsigned, unsigned>>
+    getOutputOperandBounds() const {
+      return ImmRange.isConstrained
+                 ? std::make_pair(ImmRange.Min, ImmRange.Max)
+                 : std::optional<std::pair<unsigned, unsigned>>();
+    }
   };
 
   /// Validate register name used for global register variables.
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
index bb62223..610e349 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -814,6 +814,14 @@ def CIR_GlobalCtorAttr : CIR_GlobalCtorDtor<"Ctor", "ctor"> {
   }];
 }
 
+def CIR_GlobalDtorAttr : CIR_GlobalCtorDtor<"Dtor", "dtor"> {
+  let summary = "Marks a function as a global destructor";
+  let description = [{
+      Marks a function as a global destructor in the module dtors list.
+      The function will be executed before the module unloading.
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // BitfieldInfoAttr
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
index feb08d60..e915371 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
@@ -43,6 +43,7 @@ def CIR_Dialect : Dialect {
     static llvm::StringRef getSideEffectAttrName() { return "side_effect"; }
     static llvm::StringRef getModuleLevelAsmAttrName() { return "cir.module_asm"; }
     static llvm::StringRef getGlobalCtorsAttrName() { return "cir.global_ctors"; }
+    static llvm::StringRef getGlobalDtorsAttrName() { return "cir.global_dtors"; }
 
     void registerAttributes();
     void registerTypes();
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 27fe0cc..baab156 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -89,6 +89,19 @@ class CIR_Op<string mnemonic, list<Trait> traits = []> :
 }
 
 //===----------------------------------------------------------------------===//
+// CIR Operation Traits
+//===----------------------------------------------------------------------===//
+
+class HasAtMostOneOfAttrsPred<list<string> names> :
+  CPred<!foldl("0", names, acc, name,  acc # " + (" # name # " ? 1 : 0)")
+        # " <= 1">;
+
+class HasAtMostOneOfAttrs<list<string> names> : PredOpTrait<
+  "has only one of the optional attributes: " # !interleave(names, ", "),
+  HasAtMostOneOfAttrsPred<!foreach(name, names, "$" # name)>
+>;
+
+//===----------------------------------------------------------------------===//
 // CastOp
 //===----------------------------------------------------------------------===//
 
@@ -2422,9 +2435,17 @@ def CIR_GetMemberOp : CIR_Op<"get_member"> {
 // TODO(CIR): FuncOp is still a tiny shell of what it will become.  Many more
 // properties and attributes will be added as upstreaming continues.
 
+def CIR_OptionalPriorityAttr : OptionalAttr<
+  DefaultValuedAttr<
+    ConfinedAttr<I32Attr, [IntMinValue<101>, IntMaxValue<65535>]>,
+    "65535"
+  >
+>;
+
 def CIR_FuncOp : CIR_Op<"func", [
   AutomaticAllocationScope, CallableOpInterface, FunctionOpInterface,
   DeclareOpInterfaceMethods<CIRGlobalValueInterface>,
+  HasAtMostOneOfAttrs<["global_ctor_priority", "global_dtor_priority"]>,
   IsolatedFromAbove
 ]> {
   let summary = "Declare or define a function";
@@ -2449,6 +2470,12 @@ def CIR_FuncOp : CIR_Op<"func", [
     without a prototype and, consequently, may contain calls with invalid
     arguments and undefined behavior.
 
+    The `global_ctor` keyword indicates whether a function should execute before
+    `main()` function, as specified by `__attribute__((constructor))`. An
+    execution priority can also be specified `global_ctor(<priority>)`.
+    Similarly, for global destructors both `global_dtor` and
+    `global_dtor(<priority>)` are available.
+
     Example:
 
     ```mlir
@@ -2487,7 +2514,9 @@ def CIR_FuncOp : CIR_Op<"func", [
                        UnitAttr:$comdat,
                        OptionalAttr<DictArrayAttr>:$arg_attrs,
                        OptionalAttr<DictArrayAttr>:$res_attrs,
-                       OptionalAttr<FlatSymbolRefAttr>:$aliasee);
+                       OptionalAttr<FlatSymbolRefAttr>:$aliasee,
+                       CIR_OptionalPriorityAttr:$global_ctor_priority,
+                       CIR_OptionalPriorityAttr:$global_dtor_priority);
 
   let regions = (region AnyRegion:$body);
 
@@ -4300,7 +4329,7 @@ def CIR_AllocExceptionOp : CIR_Op<"alloc.exception"> {
 // Atomic operations
 //===----------------------------------------------------------------------===//
 
-def CIR_AtomicXchg : CIR_Op<"atomic.xchg", [
+def CIR_AtomicXchgOp : CIR_Op<"atomic.xchg", [
   AllTypesMatch<["result", "val"]>,
   TypesMatchWith<"type of 'val' must match the pointee type of 'ptr'",
     "ptr", "val", "mlir::cast<cir::PointerType>($_self).getPointee()">
@@ -4318,9 +4347,7 @@ def CIR_AtomicXchg : CIR_Op<"atomic.xchg", [
     Example:
 
     ```mlir
-    %res = cir.atomic.xchg(%ptr : !cir.ptr<!u64i>,
-                           %val : !u64i,
-                           seq_cst) : !u64i
+    %res = cir.atomic.xchg seq_cst %ptr, %val : !cir.ptr<!u64i> -> !u64i
     ```
   }];
 
@@ -4335,12 +4362,16 @@ def CIR_AtomicXchg : CIR_Op<"atomic.xchg", [
   let assemblyFormat = [{
     $mem_order (`volatile` $is_volatile^)?
     $ptr `,` $val
-    `:` qualified(type($ptr)) `->` type($result) attr-dict
+    `:` functional-type(operands, results) attr-dict
   }];
 }
 
-def CIR_AtomicCmpXchg : CIR_Op<"atomic.cmpxchg", [
-  AllTypesMatch<["old", "expected", "desired"]>
+def CIR_AtomicCmpXchgOp : CIR_Op<"atomic.cmpxchg", [
+  AllTypesMatch<["old", "expected", "desired"]>,
+  TypesMatchWith<"type of 'expected' must match the pointee type of 'ptr'",
+    "ptr", "expected", "mlir::cast<cir::PointerType>($_self).getPointee()">,
+  TypesMatchWith<"type of 'desired' must match the pointee type of 'ptr'",
+    "ptr", "desired", "mlir::cast<cir::PointerType>($_self).getPointee()">
 ]> {
   let summary = "Atomic compare and exchange";
   let description = [{
@@ -4373,12 +4404,9 @@ def CIR_AtomicCmpXchg : CIR_Op<"atomic.cmpxchg", [
     Example:
 
     ```mlir
-    %old, %success = cir.atomic.cmpxchg(%ptr : !cir.ptr<!u64i>,
-                                        %expected : !u64i,
-                                        %desired : !u64i,
-                                        success = seq_cst,
-                                        failure = seq_cst) weak
-                                        : (!u64i, !cir.bool)
+    %old, %success = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire)
+        %ptr, %expected, %desired
+        : (!cir.ptr<!u64i>, !u64i, !u64i) -> (!u64i, !cir.bool)
     ```
   }];
   let results = (outs CIR_AnyType:$old, CIR_BoolType:$success);
@@ -4392,20 +4420,13 @@ def CIR_AtomicCmpXchg : CIR_Op<"atomic.cmpxchg", [
                        UnitAttr:$is_volatile);
 
   let assemblyFormat = [{
-    `(`
-      $ptr `:` qualified(type($ptr)) `,`
-      $expected `:` type($expected) `,`
-      $desired `:` type($desired) `,`
-      `success` `=`  $succ_order `,`
-      `failure` `=`  $fail_order
-    `)`
-    (`align` `(` $alignment^ `)`)?
     (`weak` $weak^)?
+    `success` `(` $succ_order `)` `failure` `(` $fail_order `)`
+    $ptr `,` $expected `,` $desired
+    (`align` `(` $alignment^ `)`)?
     (`volatile` $is_volatile^)?
-    `:` `(` type($old) `,` type($success) `)` attr-dict
+    `:` functional-type(operands, results) attr-dict
   }];
-
-  let hasVerifier = 1;
 }
 
 #endif // CLANG_CIR_DIALECT_IR_CIROPS_TD
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index df82ca1..4fbae15 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -39,7 +39,6 @@ struct MissingFeatures {
   static bool opGlobalUsedOrCompilerUsed() { return false; }
   static bool opGlobalAnnotations() { return false; }
   static bool opGlobalCtorPriority() { return false; }
-  static bool opGlobalDtorList() { return false; }
   static bool setDSOLocal() { return false; }
   static bool setComdat() { return false; }
 
@@ -175,6 +174,10 @@ struct MissingFeatures {
   static bool atomicScope() { return false; }
   static bool atomicSyncScopeID() { return false; }
 
+  // Global ctor handling
+  static bool globalCtorLexOrder() { return false; }
+  static bool globalCtorAssociatedData() { return false; }
+
   // Misc
   static bool abiArgInfo() { return false; }
   static bool addHeapAllocSiteMetadata() { return false; }
diff --git a/clang/include/clang/Sema/Template.h b/clang/include/clang/Sema/Template.h
index 60c7d27..e963439b 100644
--- a/clang/include/clang/Sema/Template.h
+++ b/clang/include/clang/Sema/Template.h
@@ -205,8 +205,8 @@ enum class TemplateSubstitutionKind : char {
 
     /// Add a new outmost level to the multi-level template argument
     /// list.
-    /// A 'Final' substitution means that Subst* nodes won't be built
-    /// for the replacements.
+    /// A 'Final' substitution means that these Args don't need to be
+    /// resugared later.
     void addOuterTemplateArguments(Decl *AssociatedDecl, ArgList Args,
                                    bool Final) {
       assert(!NumRetainedOuterLevels &&
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 84c5ecc..2d3cb6a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2587,6 +2587,82 @@ static bool interp__builtin_ia32_pmul(
   return true;
 }
 
+static bool interp_builtin_horizontal_int_binop(
+    InterpState &S, CodePtr OpPC, const CallExpr *Call,
+    llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
+  const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+  PrimType ElemT = *S.getContext().classify(VT->getElementType());
+  bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+  const Pointer &RHS = S.Stk.pop<Pointer>();
+  const Pointer &LHS = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+  unsigned NumElts = VT->getNumElements();
+  unsigned EltBits = S.getASTContext().getIntWidth(VT->getElementType());
+  unsigned EltsPerLane = 128 / EltBits;
+  unsigned Lanes = NumElts * EltBits / 128;
+  unsigned DestIndex = 0;
+
+  for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
+    unsigned LaneStart = Lane * EltsPerLane;
+    for (unsigned I = 0; I < EltsPerLane; I += 2) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+        APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt();
+        APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt();
+        APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned);
+        Dst.elem<T>(DestIndex++) = static_cast<T>(ResL);
+      });
+    }
+
+    for (unsigned I = 0; I < EltsPerLane; I += 2) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+        APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt();
+        APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt();
+        APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned);
+        Dst.elem<T>(DestIndex++) = static_cast<T>(ResR);
+      });
+    }
+  }
+  Dst.initializeAllElements();
+  return true;
+}
+
+static bool interp_builtin_horizontal_fp_binop(
+    InterpState &S, CodePtr OpPC, const CallExpr *Call,
+    llvm::function_ref<APFloat(const APFloat &, const APFloat &,
+                               llvm::RoundingMode)>
+        Fn) {
+  const Pointer &RHS = S.Stk.pop<Pointer>();
+  const Pointer &LHS = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+  FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+  llvm::RoundingMode RM = getRoundingMode(FPO);
+  const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+
+  unsigned NumElts = VT->getNumElements();
+  unsigned EltBits = S.getASTContext().getTypeSize(VT->getElementType());
+  unsigned NumLanes = NumElts * EltBits / 128;
+  unsigned NumElemsPerLane = NumElts / NumLanes;
+  unsigned HalfElemsPerLane = NumElemsPerLane / 2;
+
+  for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
+    using T = PrimConv<PT_Float>::T;
+    for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
+      APFloat Elem1 = LHS.elem<T>(L + (2 * E) + 0).getAPFloat();
+      APFloat Elem2 = LHS.elem<T>(L + (2 * E) + 1).getAPFloat();
+      Dst.elem<T>(L + E) = static_cast<T>(Fn(Elem1, Elem2, RM));
+    }
+    for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
+      APFloat Elem1 = RHS.elem<T>(L + (2 * E) + 0).getAPFloat();
+      APFloat Elem2 = RHS.elem<T>(L + (2 * E) + 1).getAPFloat();
+      Dst.elem<T>(L + E + HalfElemsPerLane) =
+          static_cast<T>(Fn(Elem1, Elem2, RM));
+    }
+  }
+  Dst.initializeAllElements();
+  return true;
+}
+
 static bool interp__builtin_elementwise_triop_fp(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
     llvm::function_ref<APFloat(const APFloat &, const APFloat &,
@@ -3665,6 +3741,53 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case Builtin::BI__builtin_elementwise_min:
     return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
 
+  case clang::X86::BI__builtin_ia32_phaddw128:
+  case clang::X86::BI__builtin_ia32_phaddw256:
+  case clang::X86::BI__builtin_ia32_phaddd128:
+  case clang::X86::BI__builtin_ia32_phaddd256:
+    return interp_builtin_horizontal_int_binop(
+        S, OpPC, Call,
+        [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
+  case clang::X86::BI__builtin_ia32_phaddsw128:
+  case clang::X86::BI__builtin_ia32_phaddsw256:
+    return interp_builtin_horizontal_int_binop(
+        S, OpPC, Call,
+        [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); });
+  case clang::X86::BI__builtin_ia32_phsubw128:
+  case clang::X86::BI__builtin_ia32_phsubw256:
+  case clang::X86::BI__builtin_ia32_phsubd128:
+  case clang::X86::BI__builtin_ia32_phsubd256:
+    return interp_builtin_horizontal_int_binop(
+        S, OpPC, Call,
+        [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
+  case clang::X86::BI__builtin_ia32_phsubsw128:
+  case clang::X86::BI__builtin_ia32_phsubsw256:
+    return interp_builtin_horizontal_int_binop(
+        S, OpPC, Call,
+        [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); });
+  case clang::X86::BI__builtin_ia32_haddpd:
+  case clang::X86::BI__builtin_ia32_haddps:
+  case clang::X86::BI__builtin_ia32_haddpd256:
+  case clang::X86::BI__builtin_ia32_haddps256:
+    return interp_builtin_horizontal_fp_binop(
+        S, OpPC, Call,
+        [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
+          APFloat F = LHS;
+          F.add(RHS, RM);
+          return F;
+        });
+  case clang::X86::BI__builtin_ia32_hsubpd:
+  case clang::X86::BI__builtin_ia32_hsubps:
+  case clang::X86::BI__builtin_ia32_hsubpd256:
+  case clang::X86::BI__builtin_ia32_hsubps256:
+    return interp_builtin_horizontal_fp_binop(
+        S, OpPC, Call,
+        [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
+          APFloat F = LHS;
+          F.subtract(RHS, RM);
+          return F;
+        });
+
   case clang::X86::BI__builtin_ia32_pmuldq128:
   case clang::X86::BI__builtin_ia32_pmuldq256:
   case clang::X86::BI__builtin_ia32_pmuldq512:
@@ -3695,6 +3818,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
           return F;
         });
 
+  case X86::BI__builtin_ia32_vpmadd52luq128:
+  case X86::BI__builtin_ia32_vpmadd52luq256:
+  case X86::BI__builtin_ia32_vpmadd52luq512:
+    return interp__builtin_elementwise_triop(
+        S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+          return A + (B.trunc(52) * C.trunc(52)).zext(64);
+        });
+  case X86::BI__builtin_ia32_vpmadd52huq128:
+  case X86::BI__builtin_ia32_vpmadd52huq256:
+  case X86::BI__builtin_ia32_vpmadd52huq512:
+    return interp__builtin_elementwise_triop(
+        S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+          return A + llvm::APIntOps::mulhu(B.trunc(52), C.trunc(52)).zext(64);
+        });
+
   case X86::BI__builtin_ia32_vpshldd128:
   case X86::BI__builtin_ia32_vpshldd256:
   case X86::BI__builtin_ia32_vpshldd512:
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c734155..69cbf6e 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3316,6 +3316,10 @@ bool FunctionDecl::isImmediateEscalating() const {
       CD && CD->isInheritingConstructor())
     return CD->getInheritedConstructor().getConstructor();
 
+  // Destructors are not immediate escalating.
+  if (isa<CXXDestructorDecl>(this))
+    return false;
+
   // - a function that results from the instantiation of a templated entity
   // defined with the constexpr specifier.
   TemplatedKind TK = getTemplatedKind();
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index dfdfef2..51c0382 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11974,6 +11974,54 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+
+  case X86::BI__builtin_ia32_vpmadd52luq128:
+  case X86::BI__builtin_ia32_vpmadd52luq256:
+  case X86::BI__builtin_ia32_vpmadd52luq512: {
+    APValue A, B, C;
+    if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
+        !EvaluateAsRValue(Info, E->getArg(1), B) ||
+        !EvaluateAsRValue(Info, E->getArg(2), C))
+      return false;
+
+    unsigned ALen = A.getVectorLength();
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(ALen);
+
+    for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) {
+      APInt AElt = A.getVectorElt(EltNum).getInt();
+      APInt BElt = B.getVectorElt(EltNum).getInt().trunc(52);
+      APInt CElt = C.getVectorElt(EltNum).getInt().trunc(52);
+      APSInt ResElt(AElt + (BElt * CElt).zext(64), false);
+      ResultElements.push_back(APValue(ResElt));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+  case X86::BI__builtin_ia32_vpmadd52huq128:
+  case X86::BI__builtin_ia32_vpmadd52huq256:
+  case X86::BI__builtin_ia32_vpmadd52huq512: {
+    APValue A, B, C;
+    if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
+        !EvaluateAsRValue(Info, E->getArg(1), B) ||
+        !EvaluateAsRValue(Info, E->getArg(2), C))
+      return false;
+
+    unsigned ALen = A.getVectorLength();
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(ALen);
+
+    for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) {
+      APInt AElt = A.getVectorElt(EltNum).getInt();
+      APInt BElt = B.getVectorElt(EltNum).getInt().trunc(52);
+      APInt CElt = C.getVectorElt(EltNum).getInt().trunc(52);
+      APSInt ResElt(AElt + llvm::APIntOps::mulhu(BElt, CElt).zext(64), false);
+      ResultElements.push_back(APValue(ResElt));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+
   case clang::X86::BI__builtin_ia32_vprotbi:
   case clang::X86::BI__builtin_ia32_vprotdi:
   case clang::X86::BI__builtin_ia32_vprotqi:
@@ -12381,6 +12429,169 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
+  case clang::X86::BI__builtin_ia32_phaddw128:
+  case clang::X86::BI__builtin_ia32_phaddw256:
+  case clang::X86::BI__builtin_ia32_phaddd128:
+  case clang::X86::BI__builtin_ia32_phaddd256:
+  case clang::X86::BI__builtin_ia32_phaddsw128:
+  case clang::X86::BI__builtin_ia32_phaddsw256:
+
+  case clang::X86::BI__builtin_ia32_phsubw128:
+  case clang::X86::BI__builtin_ia32_phsubw256:
+  case clang::X86::BI__builtin_ia32_phsubd128:
+  case clang::X86::BI__builtin_ia32_phsubd256:
+  case clang::X86::BI__builtin_ia32_phsubsw128:
+  case clang::X86::BI__builtin_ia32_phsubsw256: {
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+
+    unsigned NumElts = SourceLHS.getVectorLength();
+    unsigned EltBits = Info.Ctx.getIntWidth(DestEltTy);
+    unsigned EltsPerLane = 128 / EltBits;
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(NumElts);
+
+    for (unsigned LaneStart = 0; LaneStart != NumElts;
+         LaneStart += EltsPerLane) {
+      for (unsigned I = 0; I != EltsPerLane; I += 2) {
+        APSInt LHSA = SourceLHS.getVectorElt(LaneStart + I).getInt();
+        APSInt LHSB = SourceLHS.getVectorElt(LaneStart + I + 1).getInt();
+        switch (E->getBuiltinCallee()) {
+        case clang::X86::BI__builtin_ia32_phaddw128:
+        case clang::X86::BI__builtin_ia32_phaddw256:
+        case clang::X86::BI__builtin_ia32_phaddd128:
+        case clang::X86::BI__builtin_ia32_phaddd256: {
+          APSInt Res(LHSA + LHSB, DestUnsigned);
+          ResultElements.push_back(APValue(Res));
+          break;
+        }
+        case clang::X86::BI__builtin_ia32_phaddsw128:
+        case clang::X86::BI__builtin_ia32_phaddsw256: {
+          APSInt Res(LHSA.sadd_sat(LHSB));
+          ResultElements.push_back(APValue(Res));
+          break;
+        }
+        case clang::X86::BI__builtin_ia32_phsubw128:
+        case clang::X86::BI__builtin_ia32_phsubw256:
+        case clang::X86::BI__builtin_ia32_phsubd128:
+        case clang::X86::BI__builtin_ia32_phsubd256: {
+          APSInt Res(LHSA - LHSB, DestUnsigned);
+          ResultElements.push_back(APValue(Res));
+          break;
+        }
+        case clang::X86::BI__builtin_ia32_phsubsw128:
+        case clang::X86::BI__builtin_ia32_phsubsw256: {
+          APSInt Res(LHSA.ssub_sat(LHSB));
+          ResultElements.push_back(APValue(Res));
+          break;
+        }
+        }
+      }
+      for (unsigned I = 0; I != EltsPerLane; I += 2) {
+        APSInt RHSA = SourceRHS.getVectorElt(LaneStart + I).getInt();
+        APSInt RHSB = SourceRHS.getVectorElt(LaneStart + I + 1).getInt();
+        switch (E->getBuiltinCallee()) {
+        case clang::X86::BI__builtin_ia32_phaddw128:
+        case clang::X86::BI__builtin_ia32_phaddw256:
+        case clang::X86::BI__builtin_ia32_phaddd128:
+        case clang::X86::BI__builtin_ia32_phaddd256: {
+          APSInt Res(RHSA + RHSB, DestUnsigned);
+          ResultElements.push_back(APValue(Res));
+          break;
+        }
+        case clang::X86::BI__builtin_ia32_phaddsw128:
+        case clang::X86::BI__builtin_ia32_phaddsw256: {
+          APSInt Res(RHSA.sadd_sat(RHSB));
+          ResultElements.push_back(APValue(Res));
+          break;
+        }
+        case clang::X86::BI__builtin_ia32_phsubw128:
+        case clang::X86::BI__builtin_ia32_phsubw256:
+        case clang::X86::BI__builtin_ia32_phsubd128:
+        case clang::X86::BI__builtin_ia32_phsubd256: {
+          APSInt Res(RHSA - RHSB, DestUnsigned);
+          ResultElements.push_back(APValue(Res));
+          break;
+        }
+        case clang::X86::BI__builtin_ia32_phsubsw128:
+        case clang::X86::BI__builtin_ia32_phsubsw256: {
+          APSInt Res(RHSA.ssub_sat(RHSB));
+          ResultElements.push_back(APValue(Res));
+          break;
+        }
+        }
+      }
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+  case clang::X86::BI__builtin_ia32_haddpd:
+  case clang::X86::BI__builtin_ia32_haddps:
+  case clang::X86::BI__builtin_ia32_haddps256:
+  case clang::X86::BI__builtin_ia32_haddpd256:
+  case clang::X86::BI__builtin_ia32_hsubpd:
+  case clang::X86::BI__builtin_ia32_hsubps:
+  case clang::X86::BI__builtin_ia32_hsubps256:
+  case clang::X86::BI__builtin_ia32_hsubpd256: {
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+    unsigned NumElts = SourceLHS.getVectorLength();
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(NumElts);
+    llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    unsigned EltBits = Info.Ctx.getTypeSize(DestEltTy);
+    unsigned NumLanes = NumElts * EltBits / 128;
+    unsigned NumElemsPerLane = NumElts / NumLanes;
+    unsigned HalfElemsPerLane = NumElemsPerLane / 2;
+
+    for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
+      for (unsigned I = 0; I != HalfElemsPerLane; ++I) {
+        APFloat LHSA = SourceLHS.getVectorElt(L + (2 * I) + 0).getFloat();
+        APFloat LHSB = SourceLHS.getVectorElt(L + (2 * I) + 1).getFloat();
+        switch (E->getBuiltinCallee()) {
+        case clang::X86::BI__builtin_ia32_haddpd:
+        case clang::X86::BI__builtin_ia32_haddps:
+        case clang::X86::BI__builtin_ia32_haddps256:
+        case clang::X86::BI__builtin_ia32_haddpd256:
+          LHSA.add(LHSB, RM);
+          break;
+        case clang::X86::BI__builtin_ia32_hsubpd:
+        case clang::X86::BI__builtin_ia32_hsubps:
+        case clang::X86::BI__builtin_ia32_hsubps256:
+        case clang::X86::BI__builtin_ia32_hsubpd256:
+          LHSA.subtract(LHSB, RM);
+          break;
+        }
+        ResultElements.push_back(APValue(LHSA));
+      }
+      for (unsigned I = 0; I != HalfElemsPerLane; ++I) {
+        APFloat RHSA = SourceRHS.getVectorElt(L + (2 * I) + 0).getFloat();
+        APFloat RHSB = SourceRHS.getVectorElt(L + (2 * I) + 1).getFloat();
+        switch (E->getBuiltinCallee()) {
+        case clang::X86::BI__builtin_ia32_haddpd:
+        case clang::X86::BI__builtin_ia32_haddps:
+        case clang::X86::BI__builtin_ia32_haddps256:
+        case clang::X86::BI__builtin_ia32_haddpd256:
+          RHSA.add(RHSB, RM);
+          break;
+        case clang::X86::BI__builtin_ia32_hsubpd:
+        case clang::X86::BI__builtin_ia32_hsubps:
+        case clang::X86::BI__builtin_ia32_hsubps256:
+        case clang::X86::BI__builtin_ia32_hsubpd256:
+          RHSA.subtract(RHSB, RM);
+          break;
+        }
+        ResultElements.push_back(APValue(RHSA));
+      }
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
   case Builtin::BI__builtin_elementwise_fshl:
   case Builtin::BI__builtin_elementwise_fshr: {
     APValue SourceHi, SourceLo, SourceShift;
diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp
index 17c6bec..142c932 100644
--- a/clang/lib/AST/OpenACCClause.cpp
+++ b/clang/lib/AST/OpenACCClause.cpp
@@ -509,7 +509,7 @@ OpenACCReductionClause *OpenACCReductionClause::Create(
     ArrayRef<OpenACCReductionRecipeWithStorage> Recipes,
     SourceLocation EndLoc) {
   size_t NumCombiners = llvm::accumulate(
-      Recipes, 0, [](size_t Num, const OpenACCReductionRecipe &R) {
+      Recipes, 0, [](size_t Num, const OpenACCReductionRecipeWithStorage &R) {
         return Num + R.CombinerRecipes.size();
       });
 
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index 485308f..9b68de1 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -9,9 +9,11 @@
 #include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h"
 #include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h"
 #include "clang/Analysis/Analyses/PostOrderCFGView.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/TimeProfiler.h"
 
 namespace clang::lifetimes::internal {
+using llvm::isa_and_present;
 
 static bool isGslPointerType(QualType QT) {
   if (const auto *RD = QT->getAsCXXRecordDecl()) {
@@ -108,7 +110,7 @@ void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) {
   // Specifically for conversion operators,
   // like `std::string_view p = std::string{};`
   if (isGslPointerType(MCE->getType()) &&
-      isa<CXXConversionDecl>(MCE->getCalleeDecl())) {
+      isa_and_present<CXXConversionDecl>(MCE->getCalleeDecl())) {
     // The argument is the implicit object itself.
     handleFunctionCall(MCE, MCE->getMethodDecl(),
                        {MCE->getImplicitObjectArgument()},
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index 8ecbd3c..2dec26e 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -525,7 +525,8 @@ std::unique_ptr<WarningsSpecialCaseList>
 WarningsSpecialCaseList::create(const llvm::MemoryBuffer &Input,
                                 std::string &Err) {
   auto WarningSuppressionList = std::make_unique<WarningsSpecialCaseList>();
-  if (!WarningSuppressionList->createInternal(&Input, Err))
+  if (!WarningSuppressionList->createInternal(&Input, Err,
+                                              /*OrderBySize=*/true))
     return nullptr;
   return WarningSuppressionList;
 }
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 9e03a08..18641a9 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1568,6 +1568,7 @@ bool AArch64TargetInfo::validateAsmConstraint(
     if (const unsigned Len = matchAsmCCConstraint(Name)) {
       Name += Len - 1;
       Info.setAllowsRegister();
+      Info.setOutputOperandBounds(0, 2);
       return true;
     }
   }
diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index 13b8623..30f846c 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -99,6 +99,16 @@ bool SystemZTargetInfo::validateAsmConstraint(
   case 'T': // Likewise, plus an index
     Info.setAllowsMemory();
     return true;
+  case '@':
+    // CC condition changes.
+    if (StringRef(Name) == "@cc") {
+      Name += 2;
+      Info.setAllowsRegister();
+      // SystemZ has 2-bits CC, and hence Interval [0, 4).
+      Info.setOutputOperandBounds(0, 4);
+      return true;
+    }
+    return false;
   }
 }
 
@@ -161,6 +171,9 @@ unsigned SystemZTargetInfo::getMinGlobalAlign(uint64_t Size,
 
 void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
                                          MacroBuilder &Builder) const {
+  // Inline assembly supports SystemZ flag outputs.
+  Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");
+
   Builder.defineMacro("__s390__");
   Builder.defineMacro("__s390x__");
   Builder.defineMacro("__zarch__");
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index dc2185e..4e15d5a 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -136,6 +136,12 @@ public:
 
   std::string convertConstraint(const char *&Constraint) const override {
     switch (Constraint[0]) {
+    case '@': // Flag output operand.
+      if (llvm::StringRef(Constraint) == "@cc") {
+        Constraint += 2;
+        return std::string("{@cc}");
+      }
+      break;
     case 'p': // Keep 'p' constraint.
       return std::string("p");
     case 'Z':
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 6eb4db5..187815c 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1516,6 +1516,7 @@ bool X86TargetInfo::validateAsmConstraint(
     if (auto Len = matchAsmCCConstraint(Name)) {
       Name += Len - 1;
       Info.setAllowsRegister();
+      Info.setOutputOperandBounds(0, 2);
       return true;
     }
     return false;
diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
index 0f4d6d2..a9983f8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
@@ -255,7 +255,7 @@ static void emitAtomicCmpXchg(CIRGenFunction &cgf, AtomicExpr *e, bool isWeak,
   mlir::Value expected = builder.createLoad(loc, val1);
   mlir::Value desired = builder.createLoad(loc, val2);
 
-  auto cmpxchg = cir::AtomicCmpXchg::create(
+  auto cmpxchg = cir::AtomicCmpXchgOp::create(
       builder, loc, expected.getType(), builder.getBoolTy(), ptr.getPointer(),
       expected, desired,
       cir::MemOrderAttr::get(&cgf.getMLIRContext(), successOrder),
@@ -404,7 +404,7 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest,
   case AtomicExpr::AO__c11_atomic_exchange:
   case AtomicExpr::AO__atomic_exchange_n:
   case AtomicExpr::AO__atomic_exchange:
-    opName = cir::AtomicXchg::getOperationName();
+    opName = cir::AtomicXchgOp::getOperationName();
     break;
 
   case AtomicExpr::AO__opencl_atomic_init:
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index fe1ea56..82b1051 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -451,15 +451,47 @@ void CIRGenModule::emitGlobalFunctionDefinition(clang::GlobalDecl gd,
   setNonAliasAttributes(gd, funcOp);
   assert(!cir::MissingFeatures::opFuncAttributesForDefinition());
 
-  if (funcDecl->getAttr<ConstructorAttr>())
-    errorNYI(funcDecl->getSourceRange(), "constructor attribute");
-  if (funcDecl->getAttr<DestructorAttr>())
-    errorNYI(funcDecl->getSourceRange(), "destructor attribute");
+  auto getPriority = [this](const auto *attr) -> int {
+    Expr *e = attr->getPriority();
+    if (e)
+      return e->EvaluateKnownConstInt(this->getASTContext()).getExtValue();
+    return attr->DefaultPriority;
+  };
+
+  if (const ConstructorAttr *ca = funcDecl->getAttr<ConstructorAttr>())
+    addGlobalCtor(funcOp, getPriority(ca));
+  if (const DestructorAttr *da = funcDecl->getAttr<DestructorAttr>())
+    addGlobalDtor(funcOp, getPriority(da));
 
   if (funcDecl->getAttr<AnnotateAttr>())
     errorNYI(funcDecl->getSourceRange(), "deferredAnnotations");
 }
 
+/// Track functions to be called before main() runs.
+void CIRGenModule::addGlobalCtor(cir::FuncOp ctor,
+                                 std::optional<int> priority) {
+  assert(!cir::MissingFeatures::globalCtorLexOrder());
+  assert(!cir::MissingFeatures::globalCtorAssociatedData());
+
+  // Traditional LLVM codegen directly adds the function to the list of global
+  // ctors. In CIR we just add a global_ctor attribute to the function. The
+  // global list is created in LoweringPrepare.
+  //
+  // FIXME(from traditional LLVM): Type coercion of void()* types.
+  ctor.setGlobalCtorPriority(priority);
+}
+
+/// Add a function to the list that will be called when the module is unloaded.
+void CIRGenModule::addGlobalDtor(cir::FuncOp dtor,
+                                 std::optional<int> priority) {
+  if (codeGenOpts.RegisterGlobalDtorsWithAtExit &&
+      (!getASTContext().getTargetInfo().getTriple().isOSAIX()))
+    errorNYI(dtor.getLoc(), "registerGlobalDtorsWithAtExit");
+
+  // FIXME(from traditional LLVM): Type coercion of void()* types.
+  dtor.setGlobalDtorPriority(priority);
+}
+
 void CIRGenModule::handleCXXStaticMemberVarInstantiation(VarDecl *vd) {
   VarDecl::DefinitionKind dk = vd->isThisDeclarationADefinition();
   if (dk == VarDecl::Definition && vd->hasAttr<DLLImportAttr>())
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index f627bae..690f0ed 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -159,6 +159,13 @@ public:
                                       bool isConstant = false,
                                       mlir::Operation *insertPoint = nullptr);
 
+  /// Add a global constructor or destructor to the module.
+  /// The priority is optional, if not specified, the default priority is used.
+  void addGlobalCtor(cir::FuncOp ctor,
+                     std::optional<int> priority = std::nullopt);
+  void addGlobalDtor(cir::FuncOp dtor,
+                     std::optional<int> priority = std::nullopt);
+
   bool shouldZeroInitPadding() const {
     // In C23 (N3096) $6.7.10:
     // """
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 3d86f71..ce4ae7e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -1005,7 +1005,7 @@ public:
                       /*temporary=*/nullptr, OpenACCReductionOperator::Invalid,
                       Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType,
                       opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType,
-                      privateOp);
+                      privateOp, /*reductionCombinerRecipes=*/{});
           // TODO: OpenACC: The dialect is going to change in the near future to
           // have these be on a different operation, so when that changes, we
           // probably need to change these here.
@@ -1046,7 +1046,7 @@ public:
                       OpenACCReductionOperator::Invalid,
                       Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType,
                       opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType,
-                      firstPrivateOp);
+                      firstPrivateOp, /*reductionCombinerRecipe=*/{});
 
           // TODO: OpenACC: The dialect is going to change in the near future to
           // have these be on a different operation, so when that changes, we
@@ -1088,7 +1088,7 @@ public:
                       /*temporary=*/nullptr, clause.getReductionOp(),
                       Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType,
                       opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType,
-                      reductionOp);
+                      reductionOp, varRecipe.CombinerRecipes);
 
           operation.addReduction(builder.getContext(), reductionOp, recipe);
         }
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
index 24a5fc2..ce14aa8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
@@ -527,16 +527,142 @@ void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy(
 // doesn't restore it aftewards.
 void OpenACCRecipeBuilderBase::createReductionRecipeCombiner(
     mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
-    mlir::acc::ReductionRecipeOp recipe, size_t numBounds) {
+    mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType,
+    llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> combinerRecipes) {
   mlir::Block *block =
       createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc,
                         numBounds, /*isInit=*/false);
   builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
   CIRGenFunction::LexicalScope ls(cgf, loc, block);
 
-  mlir::BlockArgument lhsArg = block->getArgument(0);
+  mlir::Value lhsArg = block->getArgument(0);
+  mlir::Value rhsArg = block->getArgument(1);
+  llvm::MutableArrayRef<mlir::BlockArgument> boundsRange =
+      block->getArguments().drop_front(2);
+
+  if (llvm::any_of(combinerRecipes, [](auto &r) { return r.Op == nullptr; })) {
+    cgf.cgm.errorNYI(loc, "OpenACC Reduction combiner not generated");
+    mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0));
+    return;
+  }
+
+  // apply the bounds so that we can get our bounds emitted correctly.
+  for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
+    std::tie(lhsArg, rhsArg) =
+        createBoundsLoop(lhsArg, rhsArg, boundArg, loc, /*inverse=*/false);
+
+  // Emitter for when we know this isn't a struct or array we have to loop
+  // through. This should work for the 'field' once the get-element call has
+  // been made.
+  auto emitSingleCombiner =
+      [&](mlir::Value lhsArg, mlir::Value rhsArg,
+          const OpenACCReductionRecipe::CombinerRecipe &combiner) {
+        mlir::Type elementTy =
+            mlir::cast<cir::PointerType>(lhsArg.getType()).getPointee();
+        CIRGenFunction::DeclMapRevertingRAII declMapRAIILhs{cgf, combiner.LHS};
+        cgf.setAddrOfLocalVar(
+            combiner.LHS, Address{lhsArg, elementTy,
+                                  cgf.getContext().getDeclAlign(combiner.LHS)});
+        CIRGenFunction::DeclMapRevertingRAII declMapRAIIRhs{cgf, combiner.RHS};
+        cgf.setAddrOfLocalVar(
+            combiner.RHS, Address{rhsArg, elementTy,
+                                  cgf.getContext().getDeclAlign(combiner.RHS)});
+
+        [[maybe_unused]] mlir::LogicalResult stmtRes =
+            cgf.emitStmt(combiner.Op, /*useCurrentScope=*/true);
+      };
+
+  // Emitter for when we know this is either a non-array or element of an array
+  // (which also shouldn't be an array type?). This function should generate the
+  // initialization code for an entire 'array-element'/non-array, including
+  // diving into each element of a struct (if necessary).
+  auto emitCombiner = [&](mlir::Value lhsArg, mlir::Value rhsArg, QualType ty) {
+    assert(!ty->isArrayType() && "Array type shouldn't get here");
+    if (const auto *rd = ty->getAsRecordDecl()) {
+      if (combinerRecipes.size() == 1 &&
+          cgf.getContext().hasSameType(ty, combinerRecipes[0].LHS->getType())) {
+        // If this is a 'top level' operator on the type we can just emit this
+        // as a simple one.
+        emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]);
+      } else {
+        // else we have to handle each individual field after after a
+        // get-element.
+        for (const auto &[field, combiner] :
+             llvm::zip_equal(rd->fields(), combinerRecipes)) {
+          mlir::Type fieldType = cgf.convertType(field->getType());
+          auto fieldPtr = cir::PointerType::get(fieldType);
+
+          mlir::Value lhsField = builder.createGetMember(
+              loc, fieldPtr, lhsArg, field->getName(), field->getFieldIndex());
+          mlir::Value rhsField = builder.createGetMember(
+              loc, fieldPtr, rhsArg, field->getName(), field->getFieldIndex());
+
+          emitSingleCombiner(lhsField, rhsField, combiner);
+        }
+      }
+
+    } else {
+      // if this is a single-thing (because we should know this isn't an array,
+      // as Sema wouldn't let us get here), we can just do a normal emit call.
+      emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]);
+    }
+  };
+
+  if (const auto *cat = cgf.getContext().getAsConstantArrayType(origType)) {
+    // If we're in an array, we have to emit the combiner for each element of
+    // the array.
+    auto itrTy = mlir::cast<cir::IntType>(cgf.PtrDiffTy);
+    auto itrPtrTy = cir::PointerType::get(itrTy);
+
+    mlir::Value zero =
+        builder.getConstInt(loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), 0);
+    mlir::Value itr =
+        cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr",
+                              cgf.cgm.getSize(cgf.getPointerAlign()));
+    builder.CIRBaseBuilderTy::createStore(loc, zero, itr);
+
+    builder.setInsertionPointAfter(builder.createFor(
+        loc,
+        /*condBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          auto loadItr = cir::LoadOp::create(builder, loc, {itr});
+          mlir::Value arraySize = builder.getConstInt(
+              loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), cat->getZExtSize());
+          auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadItr,
+                                           arraySize);
+          builder.createCondition(cmp);
+        },
+        /*bodyBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          auto loadItr = cir::LoadOp::create(builder, loc, {itr});
+          auto lhsElt = builder.getArrayElement(
+              loc, loc, lhsArg, cgf.convertType(cat->getElementType()), loadItr,
+              /*shouldDecay=*/true);
+          auto rhsElt = builder.getArrayElement(
+              loc, loc, rhsArg, cgf.convertType(cat->getElementType()), loadItr,
+              /*shouldDecay=*/true);
+
+          emitCombiner(lhsElt, rhsElt, cat->getElementType());
+          builder.createYield(loc);
+        },
+        /*stepBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          auto loadItr = cir::LoadOp::create(builder, loc, {itr});
+          auto inc = cir::UnaryOp::create(builder, loc, loadItr.getType(),
+                                          cir::UnaryOpKind::Inc, loadItr);
+          builder.CIRBaseBuilderTy::createStore(loc, inc, itr);
+          builder.createYield(loc);
+        }));
 
-  mlir::acc::YieldOp::create(builder, locEnd, lhsArg);
+  } else if (origType->isArrayType()) {
+    cgf.cgm.errorNYI(loc,
+                     "OpenACC Reduction combiner non-constant array recipe");
+  } else {
+    emitCombiner(lhsArg, rhsArg, origType);
+  }
+
+  builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
+  mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0));
 }
 
 } // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
index a5da744..745d424 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
@@ -64,10 +64,10 @@ protected:
   // that this function is not 'insertion point' clean, in that it alters the
   // insertion point to be inside of the 'combiner' section of the recipe, but
   // doesn't restore it aftewards.
-  void createReductionRecipeCombiner(mlir::Location loc, mlir::Location locEnd,
-                                     mlir::Value mainOp,
-                                     mlir::acc::ReductionRecipeOp recipe,
-                                     size_t numBounds);
+  void createReductionRecipeCombiner(
+      mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
+      mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType,
+      llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> combinerRecipes);
 
   void createInitRecipe(mlir::Location loc, mlir::Location locEnd,
                         SourceRange exprRange, mlir::Value mainOp,
@@ -169,7 +169,9 @@ public:
       const Expr *varRef, const VarDecl *varRecipe, const VarDecl *temporary,
       OpenACCReductionOperator reductionOp, DeclContext *dc, QualType origType,
       size_t numBounds, llvm::ArrayRef<QualType> boundTypes, QualType baseType,
-      mlir::Value mainOp) {
+      mlir::Value mainOp,
+      llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe>
+          reductionCombinerRecipes) {
     assert(!varRecipe->getType()->isSpecificBuiltinType(
                BuiltinType::ArraySection) &&
            "array section shouldn't make it to recipe creation");
@@ -208,7 +210,8 @@ public:
       createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
                        recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
                        origType, /*emitInitExpr=*/true);
-      createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds);
+      createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds,
+                                    origType, reductionCombinerRecipes);
     } else {
       static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>);
       createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 5f88590..7af3dc1 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -15,6 +15,7 @@
 #include "clang/CIR/Dialect/IR/CIROpsEnums.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 
+#include "mlir/IR/DialectImplementation.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/FunctionImplementation.h"
 #include "mlir/Support/LLVM.h"
@@ -1720,6 +1721,43 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) {
     hasAlias = true;
   }
 
+  auto parseGlobalDtorCtor =
+      [&](StringRef keyword,
+          llvm::function_ref<void(std::optional<int> prio)> createAttr)
+      -> mlir::LogicalResult {
+    if (mlir::succeeded(parser.parseOptionalKeyword(keyword))) {
+      std::optional<int> priority;
+      if (mlir::succeeded(parser.parseOptionalLParen())) {
+        auto parsedPriority = mlir::FieldParser<int>::parse(parser);
+        if (mlir::failed(parsedPriority))
+          return parser.emitError(parser.getCurrentLocation(),
+                                  "failed to parse 'priority', of type 'int'");
+        priority = parsedPriority.value_or(int());
+        // Parse literal ')'
+        if (parser.parseRParen())
+          return failure();
+      }
+      createAttr(priority);
+    }
+    return success();
+  };
+
+  if (parseGlobalDtorCtor("global_ctor", [&](std::optional<int> priority) {
+        mlir::IntegerAttr globalCtorPriorityAttr =
+            builder.getI32IntegerAttr(priority.value_or(65535));
+        state.addAttribute(getGlobalCtorPriorityAttrName(state.name),
+                           globalCtorPriorityAttr);
+      }).failed())
+    return failure();
+
+  if (parseGlobalDtorCtor("global_dtor", [&](std::optional<int> priority) {
+        mlir::IntegerAttr globalDtorPriorityAttr =
+            builder.getI32IntegerAttr(priority.value_or(65535));
+        state.addAttribute(getGlobalDtorPriorityAttrName(state.name),
+                           globalDtorPriorityAttr);
+      }).failed())
+    return failure();
+
   // Parse the optional function body.
   auto *body = state.addRegion();
   OptionalParseResult parseResult = parser.parseOptionalRegion(
@@ -1801,6 +1839,18 @@ void cir::FuncOp::print(OpAsmPrinter &p) {
     p << ")";
   }
 
+  if (auto globalCtorPriority = getGlobalCtorPriority()) {
+    p << " global_ctor";
+    if (globalCtorPriority.value() != 65535)
+      p << "(" << globalCtorPriority.value() << ")";
+  }
+
+  if (auto globalDtorPriority = getGlobalDtorPriority()) {
+    p << " global_dtor";
+    if (globalDtorPriority.value() != 65535)
+      p << "(" << globalDtorPriority.value() << ")";
+  }
+
   // Print the body if this is not an external function.
   Region &body = getOperation()->getRegion(0);
   if (!body.empty()) {
@@ -2851,20 +2901,6 @@ mlir::LogicalResult cir::ThrowOp::verify() {
 }
 
 //===----------------------------------------------------------------------===//
-// AtomicCmpXchg
-//===----------------------------------------------------------------------===//
-
-LogicalResult cir::AtomicCmpXchg::verify() {
-  mlir::Type pointeeType = getPtr().getType().getPointee();
-
-  if (pointeeType != getExpected().getType() ||
-      pointeeType != getDesired().getType())
-    return emitOpError("ptr, expected and desired types must match");
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
 // TypeInfoAttr
 //===----------------------------------------------------------------------===//
 
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index dbff0b9..d99c362 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -105,6 +105,8 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
 
   /// List of ctors and their priorities to be called before main()
   llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalCtorList;
+  /// List of dtors and their priorities to be called when unloading module.
+  llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalDtorList;
 
   void setASTContext(clang::ASTContext *c) {
     astCtx = c;
@@ -823,10 +825,13 @@ void LoweringPreparePass::buildGlobalCtorDtorList() {
                         mlir::ArrayAttr::get(&getContext(), globalCtors));
   }
 
-  // We will eventual need to populate a global_dtor list, but that's not
-  // needed for globals with destructors. It will only be needed for functions
-  // that are marked as global destructors with an attribute.
-  assert(!cir::MissingFeatures::opGlobalDtorList());
+  if (!globalDtorList.empty()) {
+    llvm::SmallVector<mlir::Attribute> globalDtors =
+        prepareCtorDtorAttrList<cir::GlobalDtorAttr>(&getContext(),
+                                                     globalDtorList);
+    mlirModule->setAttr(cir::CIRDialect::getGlobalDtorsAttrName(),
+                        mlir::ArrayAttr::get(&getContext(), globalDtors));
+  }
 }
 
 void LoweringPreparePass::buildCXXGlobalInitFunc() {
@@ -975,22 +980,28 @@ void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) {
 }
 
 void LoweringPreparePass::runOnOp(mlir::Operation *op) {
-  if (auto arrayCtor = dyn_cast<ArrayCtor>(op))
+  if (auto arrayCtor = dyn_cast<cir::ArrayCtor>(op)) {
     lowerArrayCtor(arrayCtor);
-  else if (auto arrayDtor = dyn_cast<cir::ArrayDtor>(op))
+  } else if (auto arrayDtor = dyn_cast<cir::ArrayDtor>(op)) {
     lowerArrayDtor(arrayDtor);
-  else if (auto cast = mlir::dyn_cast<cir::CastOp>(op))
+  } else if (auto cast = mlir::dyn_cast<cir::CastOp>(op)) {
     lowerCastOp(cast);
-  else if (auto complexDiv = mlir::dyn_cast<cir::ComplexDivOp>(op))
+  } else if (auto complexDiv = mlir::dyn_cast<cir::ComplexDivOp>(op)) {
     lowerComplexDivOp(complexDiv);
-  else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op))
+  } else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op)) {
     lowerComplexMulOp(complexMul);
-  else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op))
+  } else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op)) {
     lowerGlobalOp(glob);
-  else if (auto dynamicCast = mlir::dyn_cast<cir::DynamicCastOp>(op))
+  } else if (auto dynamicCast = mlir::dyn_cast<cir::DynamicCastOp>(op)) {
     lowerDynamicCastOp(dynamicCast);
-  else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op))
+  } else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op)) {
     lowerUnaryOp(unary);
+  } else if (auto fnOp = dyn_cast<cir::FuncOp>(op)) {
+    if (auto globalCtor = fnOp.getGlobalCtorPriority())
+      globalCtorList.emplace_back(fnOp.getName(), globalCtor.value());
+    else if (auto globalDtor = fnOp.getGlobalDtorPriority())
+      globalDtorList.emplace_back(fnOp.getName(), globalDtor.value());
+  }
 }
 
 void LoweringPreparePass::runOnOperation() {
@@ -1003,7 +1014,7 @@ void LoweringPreparePass::runOnOperation() {
   op->walk([&](mlir::Operation *op) {
     if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp,
                   cir::ComplexMulOp, cir::ComplexDivOp, cir::DynamicCastOp,
-                  cir::GlobalOp, cir::UnaryOp>(op))
+                  cir::FuncOp, cir::GlobalOp, cir::UnaryOp>(op))
       opsToTransform.push_back(op);
   });
 
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 26e0ba9..3abba3d 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -694,8 +694,8 @@ getLLVMMemOrder(std::optional<cir::MemOrder> memorder) {
   llvm_unreachable("unknown memory order");
 }
 
-mlir::LogicalResult CIRToLLVMAtomicCmpXchgLowering::matchAndRewrite(
-    cir::AtomicCmpXchg op, OpAdaptor adaptor,
+mlir::LogicalResult CIRToLLVMAtomicCmpXchgOpLowering::matchAndRewrite(
+    cir::AtomicCmpXchgOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
   mlir::Value expected = adaptor.getExpected();
   mlir::Value desired = adaptor.getDesired();
@@ -719,8 +719,8 @@ mlir::LogicalResult CIRToLLVMAtomicCmpXchgLowering::matchAndRewrite(
   return mlir::success();
 }
 
-mlir::LogicalResult CIRToLLVMAtomicXchgLowering::matchAndRewrite(
-    cir::AtomicXchg op, OpAdaptor adaptor,
+mlir::LogicalResult CIRToLLVMAtomicXchgOpLowering::matchAndRewrite(
+    cir::AtomicXchgOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
   assert(!cir::MissingFeatures::atomicSyncScopeID());
   mlir::LLVM::AtomicOrdering llvmOrder = getLLVMMemOrder(adaptor.getMemOrder());
@@ -2598,7 +2598,13 @@ void ConvertCIRToLLVMPass::runOnOperation() {
                       return std::make_pair(ctorAttr.getName(),
                                             ctorAttr.getPriority());
                     });
-  assert(!cir::MissingFeatures::opGlobalDtorList());
+  // Emit the llvm.global_dtors array.
+  buildCtorDtorList(module, cir::CIRDialect::getGlobalDtorsAttrName(),
+                    "llvm.global_dtors", [](mlir::Attribute attr) {
+                      auto dtorAttr = mlir::cast<cir::GlobalDtorAttr>(attr);
+                      return std::make_pair(dtorAttr.getName(),
+                                            dtorAttr.getPriority());
+                    });
 }
 
 mlir::LogicalResult CIRToLLVMBrOpLowering::matchAndRewrite(
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 92636f2..fdc1a11 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2674,7 +2674,8 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
               const llvm::ArrayRef<LValue> ResultRegDests,
               const llvm::ArrayRef<QualType> ResultRegQualTys,
               const llvm::BitVector &ResultTypeRequiresCast,
-              const llvm::BitVector &ResultRegIsFlagReg) {
+              const std::vector<std::optional<std::pair<unsigned, unsigned>>>
+                  &ResultBounds) {
   CGBuilderTy &Builder = CGF.Builder;
   CodeGenModule &CGM = CGF.CGM;
   llvm::LLVMContext &CTX = CGF.getLLVMContext();
@@ -2685,18 +2686,20 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
   // ResultRegDests can be also populated by addReturnRegisterOutputs() above,
   // in which case its size may grow.
   assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
-  assert(ResultRegIsFlagReg.size() <= ResultRegDests.size());
+  assert(ResultBounds.size() <= ResultRegDests.size());
 
   for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
     llvm::Value *Tmp = RegResults[i];
     llvm::Type *TruncTy = ResultTruncRegTypes[i];
 
-    if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
-      // Target must guarantee the Value `Tmp` here is lowered to a boolean
-      // value.
-      llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2);
+    if ((i < ResultBounds.size()) && ResultBounds[i].has_value()) {
+      const auto [LowerBound, UpperBound] = ResultBounds[i].value();
+      // FIXME: Support for nonzero lower bounds not yet implemented.
+      assert(LowerBound == 0 && "Output operand lower bound is not zero.");
+      llvm::Constant *UpperBoundConst =
+          llvm::ConstantInt::get(Tmp->getType(), UpperBound);
       llvm::Value *IsBooleanValue =
-          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two);
+          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, UpperBoundConst);
       llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume);
       Builder.CreateCall(FnAssume, IsBooleanValue);
     }
@@ -2825,7 +2828,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   std::vector<llvm::Type *> ArgElemTypes;
   std::vector<llvm::Value*> Args;
   llvm::BitVector ResultTypeRequiresCast;
-  llvm::BitVector ResultRegIsFlagReg;
+  std::vector<std::optional<std::pair<unsigned, unsigned>>> ResultBounds;
 
   // Keep track of inout constraints.
   std::string InOutConstraints;
@@ -2883,8 +2886,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
       ResultRegQualTys.push_back(QTy);
       ResultRegDests.push_back(Dest);
 
-      bool IsFlagReg = llvm::StringRef(OutputConstraint).starts_with("{@cc");
-      ResultRegIsFlagReg.push_back(IsFlagReg);
+      ResultBounds.emplace_back(Info.getOutputOperandBounds());
 
       llvm::Type *Ty = ConvertTypeForMem(QTy);
       const bool RequiresCast = Info.allowsRegister() &&
@@ -3231,7 +3233,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
 
   EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes,
                 ResultRegDests, ResultRegQualTys, ResultTypeRequiresCast,
-                ResultRegIsFlagReg);
+                ResultBounds);
 
   // If this is an asm goto with outputs, repeat EmitAsmStores, but with a
   // different insertion point; one for each indirect destination and with
@@ -3242,7 +3244,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
       Builder.SetInsertPoint(Succ, --(Succ->end()));
       EmitAsmStores(*this, S, CBRRegResults[Succ], ResultRegTypes,
                     ResultTruncRegTypes, ResultRegDests, ResultRegQualTys,
-                    ResultTypeRequiresCast, ResultRegIsFlagReg);
+                    ResultTypeRequiresCast, ResultBounds);
     }
   }
 }
diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp
index 0ef39b6..0d0941e 100644
--- a/clang/lib/CodeGen/Targets/RISCV.cpp
+++ b/clang/lib/CodeGen/Targets/RISCV.cpp
@@ -680,22 +680,22 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
     if (const auto *ED = Ty->getAsEnumDecl())
       Ty = ED->getIntegerType();
 
-    // All integral types are promoted to XLen width
-    if (Size < XLen && Ty->isIntegralOrEnumerationType()) {
-      return extendType(Ty, CGT.ConvertType(Ty));
-    }
-
     if (const auto *EIT = Ty->getAs<BitIntType>()) {
-      if (EIT->getNumBits() < XLen)
+
+      if (XLen == 64 && EIT->getNumBits() == 32)
         return extendType(Ty, CGT.ConvertType(Ty));
-      if (EIT->getNumBits() > 128 ||
-          (!getContext().getTargetInfo().hasInt128Type() &&
-           EIT->getNumBits() > 64))
-        return getNaturalAlignIndirect(
-            Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
-            /*ByVal=*/false);
+
+      if (EIT->getNumBits() <= 2 * XLen)
+        return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty));
+      return getNaturalAlignIndirect(
+          Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
+          /*ByVal=*/false);
     }
 
+    // All integral types are promoted to XLen width
+    if (Size < XLen && Ty->isIntegralOrEnumerationType())
+      return extendType(Ty, CGT.ConvertType(Ty));
+
     return ABIArgInfo::getDirect();
   }
 
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index c2956a1..cb3fc1c 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -41,8 +41,7 @@ static constexpr std::array<StringRef, 14> QtPropertyKeywords = {
 
 bool FormatToken::isQtProperty() const {
   assert(llvm::is_sorted(QtPropertyKeywords));
-  return std::binary_search(QtPropertyKeywords.begin(),
-                            QtPropertyKeywords.end(), TokenText);
+  return llvm::binary_search(QtPropertyKeywords, TokenText);
 }
 
 // Sorted common C++ non-keyword types.
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 4aaca2d..e150aa6 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -834,10 +834,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadd_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadd_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
 }
 
 /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit
@@ -866,10 +865,9 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadd_epi32(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadd_epi32(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
 }
 
 /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
@@ -901,10 +899,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadds_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadds_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
 }
 
 /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -937,10 +934,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsub_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsub_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
 }
 
 /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit
@@ -969,10 +965,9 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsub_epi32(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsub_epi32(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
 }
 
 /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -1005,10 +1000,9 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsubs_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsubs_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
 }
 
 /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a
diff --git a/clang/lib/Headers/avx512ifmaintrin.h b/clang/lib/Headers/avx512ifmaintrin.h
index f01b322..625a8ff 100644
--- a/clang/lib/Headers/avx512ifmaintrin.h
+++ b/clang/lib/Headers/avx512ifmaintrin.h
@@ -15,54 +15,53 @@
 #define __IFMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS                                                     \
+  constexpr                                                                    \
+      __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \
+                     __min_vector_width__(512)))
+#else
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"),     \
                  __min_vector_width__(512)))
+#endif
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y,
-                                                (__v8di) __Z);
+_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di)__X, (__v8di)__Y,
+                                                (__v8di)__Z);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y),
-                                   (__v8di)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52hi_epu64(
+    __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
-                                   (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64(
+    __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
+      (__v8di)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y,
-                                                (__v8di) __Z);
+_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di)__X, (__v8di)__Y,
+                                                (__v8di)__Z);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y),
-                                   (__v8di)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52lo_epu64(
+    __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
-                                   (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52lo_epu64(
+    __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
+      (__v8di)_mm512_setzero_si512());
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avx512ifmavlintrin.h b/clang/lib/Headers/avx512ifmavlintrin.h
index a72b561..c4449c7 100644
--- a/clang/lib/Headers/avx512ifmavlintrin.h
+++ b/clang/lib/Headers/avx512ifmavlintrin.h
@@ -8,13 +8,24 @@
  *===-----------------------------------------------------------------------===
  */
 #ifndef __IMMINTRIN_H
-#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
+#error                                                                         \
+    "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
 #endif
 
 #ifndef __IFMAVLINTRIN_H
 #define __IFMAVLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avx512ifma,avx512vl"),                  \
+                           __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avx512ifma,avx512vl"),                  \
+                           __min_vector_width__(256)))
+#else
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512ifma,avx512vl"),                            \
@@ -24,6 +35,8 @@
                  __target__("avx512ifma,avx512vl"),                            \
                  __min_vector_width__(256)))
 
+#endif
+
 #define _mm_madd52hi_epu64(X, Y, Z)                                            \
   ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y),            \
                                           (__v2di)(Z)))
@@ -41,70 +54,57 @@
                                           (__v4di)(Z)))
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52hi_epu64(__W, __X, __Y),
-                                      (__v2di)__W);
+_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
-                                      (__v2di)_mm_setzero_si128());
+_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
+      (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y),
-                                   (__v4di)__W);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64(
+    __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
-                                   (__v4di)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64(
+    __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
+      (__v4di)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52lo_epu64(__W, __X, __Y),
-                                      (__v2di)__W);
+_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
-                                      (__v2di)_mm_setzero_si128());
+_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
+      (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y),
-                                   (__v4di)__W);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64(
+    __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
-                                   (__v4di)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64(
+    __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
+      (__v4di)_mm256_setzero_si256());
 }
 
-
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
 
diff --git a/clang/lib/Headers/avxifmaintrin.h b/clang/lib/Headers/avxifmaintrin.h
index 5c782d2a..a2ef601 100644
--- a/clang/lib/Headers/avxifmaintrin.h
+++ b/clang/lib/Headers/avxifmaintrin.h
@@ -15,12 +15,21 @@
 #define __AVXIFMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avxifma"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avxifma"), __min_vector_width__(256)))
+#else
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
                  __min_vector_width__(256)))
+#endif
 
 // must vex-encoding
 
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 123fa79..696ec31 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -694,9 +694,8 @@ _mm256_xor_ps(__m256 __a, __m256 __b)
 ///    elements of a vector of [4 x double].
 /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
 ///    both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_hadd_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_hadd_pd(__m256d __a, __m256d __b) {
   return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -717,9 +716,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b)
 ///    index 2, 3, 6, 7 of a vector of [8 x float].
 /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
 ///    both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_hadd_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a,
+                                                                   __m256 __b) {
   return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -740,9 +738,8 @@ _mm256_hadd_ps(__m256 __a, __m256 __b)
 ///    odd-indexed elements of a vector of [4 x double].
 /// \returns A 256-bit vector of [4 x double] containing the horizontal
 ///    differences of both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_hsub_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_hsub_pd(__m256d __a, __m256d __b) {
   return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -763,9 +760,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b)
 ///    elements with index 2, 3, 6, 7 of a vector of [8 x float].
 /// \returns A 256-bit vector of [8 x float] containing the horizontal
 ///    differences of both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_hsub_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
+                                                                   __m256 __b) {
   return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
 }
 
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index f0c9b2b..42bd343 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -83,9 +83,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b)
 ///    destination.
 /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
 ///    both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_hadd_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a,
+                                                                  __m128 __b) {
   return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -106,9 +105,8 @@ _mm_hadd_ps(__m128 __a, __m128 __b)
 ///    bits of the destination.
 /// \returns A 128-bit vector of [4 x float] containing the horizontal
 ///    differences of both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_hsub_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_ps(__m128 __a,
+                                                                  __m128 __b) {
   return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -168,9 +166,8 @@ _mm_moveldup_ps(__m128 __a)
 ///    A 128-bit vector of [2 x double] containing the right source operand.
 /// \returns A 128-bit vector of [2 x double] containing the alternating sums
 ///    and differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_addsub_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_addsub_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -191,9 +188,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b)
 ///    destination.
 /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
 ///    both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_hadd_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -214,9 +210,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b)
 ///    the destination.
 /// \returns A 128-bit vector of [2 x double] containing the horizontal
 ///    differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_hsub_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
 }
 
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index 3fc9f98..9d007c8 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -202,10 +202,9 @@ _mm_abs_epi32(__m128i __a) {
 ///    destination.
 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
 ///    both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadd_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -225,10 +224,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b)
 ///    destination.
 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
 ///    both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadd_epi32(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_epi32(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
 }
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -248,11 +246,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
 ///    destination.
 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
 ///    operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadd_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phaddw128(
-        (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(__builtin_ia32_phaddw128(
+      (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
 }
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -272,11 +269,10 @@ _mm_hadd_pi16(__m64 __a, __m64 __b)
 ///    destination.
 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
 ///    operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadd_pi32(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phaddd128(
-        (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(__builtin_ia32_phaddd128(
+      (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
 }
 
 /// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -299,10 +295,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
 ///    destination.
 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
 ///    sums of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadds_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadds_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -325,11 +320,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
 ///    destination.
 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
 ///    sums of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadds_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phaddsw128(
-        (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a,
+                                                                    __m64 __b) {
+  return __trunc64(__builtin_ia32_phaddsw128(
+      (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
 }
 
 /// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -349,10 +343,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b)
 ///    the destination.
 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
 ///    of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsub_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -372,10 +365,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b)
 ///    the destination.
 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
 ///    of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsub_epi32(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_epi32(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
 }
 
 /// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -395,11 +387,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
 ///    the destination.
 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
 ///    of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hsub_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phsubw128(
-        (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(__builtin_ia32_phsubw128(
+      (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
 }
 
 /// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -419,11 +410,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
 ///    the destination.
 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
 ///    of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hsub_pi32(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phsubd128(
-        (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(__builtin_ia32_phsubd128(
+      (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
 }
 
 /// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -446,10 +436,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
 ///    the destination.
 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
 ///    differences of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsubs_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsubs_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -472,11 +461,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
 ///    the destination.
 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
 ///    differences of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hsubs_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phsubsw128(
-        (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a,
+                                                                    __m64 __b) {
+  return __trunc64(__builtin_ia32_phsubsw128(
+      (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
 }
 
 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -556,10 +544,9 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) {
 ///    A 128-bit vector of [8 x i16] containing one of the source operands.
 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
 ///    products of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mulhrs_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a,
+                                                              __m128i __b) {
+  return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 7c44efd..87dd682 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -606,15 +606,15 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments(
       Constraint.mappingOccurenceList();
   // The empty MLTAL situation should only occur when evaluating non-dependent
   // constraints.
-  if (!MLTAL.getNumSubstitutedLevels())
-    MLTAL.addOuterTemplateArguments(TD, {}, /*Final=*/false);
-  SubstitutedOuterMost =
-      llvm::to_vector_of<TemplateArgument>(MLTAL.getOutermost());
+  if (MLTAL.getNumSubstitutedLevels())
+    SubstitutedOuterMost =
+        llvm::to_vector_of<TemplateArgument>(MLTAL.getOutermost());
   unsigned Offset = 0;
   for (unsigned I = 0, MappedIndex = 0; I < Used.size(); I++) {
     TemplateArgument Arg;
     if (Used[I])
-      Arg = CTAI.SugaredConverted[MappedIndex++];
+      Arg = S.Context.getCanonicalTemplateArgument(
+          CTAI.SugaredConverted[MappedIndex++]);
     if (I < SubstitutedOuterMost.size()) {
       SubstitutedOuterMost[I] = Arg;
       Offset = I + 1;
@@ -626,8 +626,10 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments(
   if (Offset < SubstitutedOuterMost.size())
     SubstitutedOuterMost.erase(SubstitutedOuterMost.begin() + Offset);
 
-  MLTAL.replaceOutermostTemplateArguments(TD, SubstitutedOuterMost);
-  return std::move(MLTAL);
+  MultiLevelTemplateArgumentList SubstitutedTemplateArgs;
+  SubstitutedTemplateArgs.addOuterTemplateArguments(TD, SubstitutedOuterMost,
+                                                    /*Final=*/false);
+  return std::move(SubstitutedTemplateArgs);
 }
 
 ExprResult ConstraintSatisfactionChecker::EvaluateSlow(
diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp
index ead9781..17078e8 100644
--- a/clang/lib/Sema/SemaOpenACCClause.cpp
+++ b/clang/lib/Sema/SemaOpenACCClause.cpp
@@ -1924,7 +1924,7 @@ bool SemaOpenACC::CheckReductionVarType(Expr *VarExpr) {
   // off here. This will result in CurType being the actual 'type' of the
   // expression, which is what we are looking to check.
   QualType CurType = isa<ArraySectionExpr>(VarExpr)
-                         ? ArraySectionExpr::getBaseOriginalType(VarExpr)
+                         ? cast<ArraySectionExpr>(VarExpr)->getElementType()
                          : VarExpr->getType();
 
   // This can happen when we have a dependent type in an array element that the
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 85e3d20..73fd33a 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -5727,7 +5727,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
   Function->setDeclarationNameLoc(NameLocPointsToPattern());
 
   EnterExpressionEvaluationContextForFunction EvalContext(
-      *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
+      *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, Function);
 
   Qualifiers ThisTypeQuals;
   CXXRecordDecl *ThisContext = nullptr;
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 0ae784c..1444114 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -251,6 +251,8 @@ public:
                                         const Expr *Ex,
                                         const MemRegion *MR,
                                         bool hypothetical);
+  static const StringLiteral *getStringLiteralFromRegion(const MemRegion *MR);
+
   SVal getCStringLength(CheckerContext &C,
                         ProgramStateRef &state,
                         const Expr *Ex,
@@ -983,6 +985,21 @@ SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
   return strLength;
 }
 
+const StringLiteral *
+CStringChecker::getStringLiteralFromRegion(const MemRegion *MR) {
+  switch (MR->getKind()) {
+  case MemRegion::StringRegionKind:
+    return cast<StringRegion>(MR)->getStringLiteral();
+  case MemRegion::NonParamVarRegionKind:
+    if (const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
+        Decl->getType().isConstQualified() && Decl->hasGlobalStorage())
+      return dyn_cast_or_null<StringLiteral>(Decl->getInit());
+    return nullptr;
+  default:
+    return nullptr;
+  }
+}
+
 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
                                       const Expr *Ex, SVal Buf,
                                       bool hypothetical) const {
@@ -1013,30 +1030,19 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
   // its length. For anything we can't figure out, just return UnknownVal.
   MR = MR->StripCasts();
 
-  switch (MR->getKind()) {
-  case MemRegion::StringRegionKind: {
-    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
-    // so we can assume that the byte length is the correct C string length.
-    SValBuilder &svalBuilder = C.getSValBuilder();
-    QualType sizeTy = svalBuilder.getContext().getSizeType();
-    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
-    return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
-  }
-  case MemRegion::NonParamVarRegionKind: {
+  if (const StringLiteral *StrLit = getStringLiteralFromRegion(MR)) {
     // If we have a global constant with a string literal initializer,
     // compute the initializer's length.
-    const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
-    if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
-      if (const Expr *Init = Decl->getInit()) {
-        if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
-          SValBuilder &SvalBuilder = C.getSValBuilder();
-          QualType SizeTy = SvalBuilder.getContext().getSizeType();
-          return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
-        }
-      }
-    }
-    [[fallthrough]];
+    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
+    // so we can assume that the byte length is the correct C string length.
+    // FIXME: Embedded null characters are not handled.
+    SValBuilder &SVB = C.getSValBuilder();
+    return SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType());
   }
+
+  switch (MR->getKind()) {
+  case MemRegion::StringRegionKind:
+  case MemRegion::NonParamVarRegionKind:
   case MemRegion::SymbolicRegionKind:
   case MemRegion::AllocaRegionKind:
   case MemRegion::ParamVarRegionKind:
@@ -1046,10 +1052,28 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
   case MemRegion::CompoundLiteralRegionKind:
     // FIXME: Can we track this? Is it necessary?
     return UnknownVal();
-  case MemRegion::ElementRegionKind:
-    // FIXME: How can we handle this? It's not good enough to subtract the
-    // offset from the base string length; consider "123\x00567" and &a[5].
+  case MemRegion::ElementRegionKind: {
+    // If an offset into the string literal is used, use the original length
+    // minus the offset.
+    // FIXME: Embedded null characters are not handled.
+    const ElementRegion *ER = cast<ElementRegion>(MR);
+    const SubRegion *SuperReg =
+        cast<SubRegion>(ER->getSuperRegion()->StripCasts());
+    const StringLiteral *StrLit = getStringLiteralFromRegion(SuperReg);
+    if (!StrLit)
+      return UnknownVal();
+    SValBuilder &SVB = C.getSValBuilder();
+    NonLoc Idx = ER->getIndex();
+    QualType SizeTy = SVB.getContext().getSizeType();
+    NonLoc LengthVal =
+        SVB.makeIntVal(StrLit->getLength(), SizeTy).castAs<NonLoc>();
+    if (state->assume(SVB.evalBinOpNN(state, BO_LE, Idx, LengthVal,
+                                      SVB.getConditionType())
+                          .castAs<DefinedOrUnknownSVal>(),
+                      true))
+      return SVB.evalBinOp(state, BO_Sub, LengthVal, Idx, SizeTy);
     return UnknownVal();
+  }
   default:
     // Other regions (mostly non-data) can't have a reliable C string length.
     // In this case, an error is emitted and UndefinedVal is returned.
@@ -1074,6 +1098,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
 
 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
   ProgramStateRef &state, const Expr *expr, SVal val) const {
+  // FIXME: use getStringLiteralFromRegion (and remove unused parameters)?
 
   // Get the memory region pointed to by the val.
   const MemRegion *bufRegion = val.getAsRegion();
diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index 84a9c43..6108931 100644
--- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -1111,6 +1111,10 @@ SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
   assert(!BinaryOperator::isComparisonOp(op) &&
          "arguments to comparison ops must be of the same type");
 
+  SVal simplifiedRhs = simplifySVal(state, rhs);
+  if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
+    rhs = *simplifiedRhsAsNonLoc;
+
   // Special case: rhs is a zero constant.
   if (rhs.isZeroConstant())
     return lhs;
diff --git a/clang/test/Analysis/loc-folding.cpp b/clang/test/Analysis/loc-folding.cpp
new file mode 100644
index 0000000..1fcb066
--- /dev/null
+++ b/clang/test/Analysis/loc-folding.cpp
@@ -0,0 +1,61 @@
+// RUN: %clang_analyze_cc1 -verify %s -analyzer-config eagerly-assume=false \
+// RUN:   -analyzer-checker=core,debug.ExprInspection
+
+void clang_analyzer_eval(bool);
+
+void element_constant() {
+  char arr[10];
+  clang_analyzer_eval(arr + 1 > arr); // expected-warning{{TRUE}}
+}
+
+void element_known() {
+  char arr[10];
+  int off = 1;
+  clang_analyzer_eval(arr + off > arr); // expected-warning{{TRUE}}
+}
+
+void element_constrained(int off) {
+  char arr[10];
+  if (off == 1) {
+    clang_analyzer_eval(arr + off > arr); // expected-warning{{TRUE}}
+  }
+}
+
+void element_unknown(int off) {
+  char arr[10];
+  clang_analyzer_eval(arr + off > arr); // expected-warning{{UNKNOWN}}
+}
+
+void element_complex(int off) {
+  char arr[10];
+  int comp = off * 2;
+  if (off == 1) {
+    clang_analyzer_eval(arr + comp); // expected-warning{{TRUE}}
+  }
+}
+
+void base_constant(int *arr) {
+  clang_analyzer_eval(arr + 1 > arr); // expected-warning{{TRUE}}
+}
+
+void base_known(int *arr) {
+  int off = 1;
+  clang_analyzer_eval(arr + off > arr); // expected-warning{{TRUE}}
+}
+
+void base_constrained(int *arr, int off) {
+  if (off == 1) {
+    clang_analyzer_eval(arr + off > arr); // expected-warning{{TRUE}}
+  }
+}
+
+void base_unknown(int *arr, int off) {
+  clang_analyzer_eval(arr + off > arr); // expected-warning{{UNKNOWN}}
+}
+
+void base_complex(int *arr, int off) {
+  int comp = off * 2;
+  if (off == 1) {
+    clang_analyzer_eval(arr + comp > arr); // expected-warning{{TRUE}}
+  }
+}
diff --git a/clang/test/Analysis/string.c b/clang/test/Analysis/string.c
index cdd3627..9d24583 100644
--- a/clang/test/Analysis/string.c
+++ b/clang/test/Analysis/string.c
@@ -82,16 +82,21 @@ size_t strlen(const char *s);
 
 void strlen_constant0(void) {
   clang_analyzer_eval(strlen("123") == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(&("123"[1])) == 2); // expected-warning{{TRUE}}
 }
 
 void strlen_constant1(void) {
   const char *a = "123";
   clang_analyzer_eval(strlen(a) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(a + 1) == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(a + 3) == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(a + 4)); // expected-warning{{UNKNOWN}}
 }
 
 void strlen_constant2(char x) {
   char a[] = "123";
   clang_analyzer_eval(strlen(a) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(a + 1) == 2); // expected-warning{{UNKNOWN}}
 
   a[0] = x;
   clang_analyzer_eval(strlen(a) == 3); // expected-warning{{UNKNOWN}}
@@ -105,10 +110,12 @@ char global_non_const_arr[] = "op";
 
 void strlen_global_constant_ptr(void) {
   clang_analyzer_eval(strlen(global_str_ptr) == 4); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(global_str_ptr + 1) == 3); // expected-warning{{TRUE}}
 }
 
 void strlen_global_constant_arr(void) {
   clang_analyzer_eval(strlen(global_str_arr) == 4); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strlen(global_str_arr + 1) == 3); // expected-warning{{TRUE}}
 }
 
 void strlen_global_non_const_ptr(void) {
@@ -235,6 +242,17 @@ void testStrlenCallee(void) {
   clang_analyzer_eval(lenBefore == lenAfter); // expected-warning{{UNKNOWN}}
 }
 
+void strlen_symbolic_offset(unsigned x) {
+  const char *str = "abcd";
+  if (x < 1 || x > 3)
+    return;
+  // FIXME: these should be TRUE
+  clang_analyzer_eval(strlen(str + x) >= 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(strlen(str + x) <= 3); // expected-warning{{UNKNOWN}}
+  if (x != 1)
+    return;
+  clang_analyzer_eval(strlen(str + x) == 3); // expected-warning{{TRUE}}
+}
 
 //===----------------------------------------------------------------------===
 // strnlen()
@@ -244,32 +262,38 @@ size_t strnlen(const char *s, size_t maxlen);
 
 void strnlen_constant0(void) {
   clang_analyzer_eval(strnlen("123", 10) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(&("123"[1]), 10) == 2); // expected-warning{{TRUE}}
 }
 
 void strnlen_constant1(void) {
   const char *a = "123";
   clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(a + 1, 10) == 2); // expected-warning{{TRUE}}
 }
 
 void strnlen_constant2(char x) {
   char a[] = "123";
   clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(a + 1, 10) == 2); // expected-warning{{UNKNOWN}}
   a[0] = x;
   clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{UNKNOWN}}
 }
 
 void strnlen_constant4(void) {
   clang_analyzer_eval(strnlen("123456", 3) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(&("123456"[1]), 3) == 3); // expected-warning{{TRUE}}
 }
 
 void strnlen_constant5(void) {
   const char *a = "123456";
   clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(a + 1, 3) == 3); // expected-warning{{TRUE}}
 }
 
 void strnlen_constant6(char x) {
   char a[] = "123456";
   clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(strnlen(a + 1, 3) == 3); // expected-warning{{UNKNOWN}}
   a[0] = x;
   clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{UNKNOWN}}
 }
@@ -326,6 +350,19 @@ void strnlen_at_actual(size_t limit) {
   }
 }
 
+void strnlen_at_actual_1(size_t limit) {
+  const char *str = "abc";
+  size_t len = strnlen(str + 1, limit);
+  clang_analyzer_eval(len <= 2); // expected-warning{{TRUE}}
+  // This is due to eager assertion in strnlen.
+  if (limit == 0) {
+    clang_analyzer_eval(len == 0); // expected-warning{{TRUE}}
+  } else {
+    clang_analyzer_eval(len == 2); // expected-warning{{UNKNOWN}}
+    clang_analyzer_eval(len < 2); // expected-warning{{UNKNOWN}}
+  }
+}
+
 //===----------------------------------------------------------------------===
 // strcpy()
 //===----------------------------------------------------------------------===
diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c
index 76289c5..440010a 100644
--- a/clang/test/CIR/CodeGen/atomic.c
+++ b/clang/test/CIR/CodeGen/atomic.c
@@ -211,7 +211,7 @@ void c11_atomic_cmpxchg_strong(_Atomic(int) *ptr, int *expected, int desired) {
 
   __c11_atomic_compare_exchange_strong(ptr, expected, desired,
                                        __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
-  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) : (!s32i, !cir.bool)
+  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
   // CIR-NEXT:    %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool
   // CIR-NEXT:    cir.if %[[FAILED]] {
   // CIR-NEXT:      cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i>
@@ -249,7 +249,7 @@ void c11_atomic_cmpxchg_weak(_Atomic(int) *ptr, int *expected, int desired) {
 
   __c11_atomic_compare_exchange_weak(ptr, expected, desired,
                                      __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
-  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) weak : (!s32i, !cir.bool)
+  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
   // CIR-NEXT:    %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool
   // CIR-NEXT:    cir.if %[[FAILED]] {
   // CIR-NEXT:      cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i>
@@ -286,7 +286,7 @@ void atomic_cmpxchg(int *ptr, int *expected, int *desired) {
   // OGCG-LABEL: @atomic_cmpxchg
 
   __atomic_compare_exchange(ptr, expected, desired, /*weak=*/0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
-  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) : (!s32i, !cir.bool)
+  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
   // CIR-NEXT:    %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool
   // CIR-NEXT:    cir.if %[[FAILED]] {
   // CIR-NEXT:      cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i>
@@ -317,7 +317,7 @@ void atomic_cmpxchg(int *ptr, int *expected, int *desired) {
   // OGCG-NEXT:    store i8 %[[SUCCESS_2]], ptr %{{.+}}, align 1
 
   __atomic_compare_exchange(ptr, expected, desired, /*weak=*/1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
-  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) weak : (!s32i, !cir.bool)
+  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
   // CIR-NEXT:    %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool
   // CIR-NEXT:    cir.if %[[FAILED]] {
   // CIR-NEXT:      cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i>
@@ -354,7 +354,7 @@ void atomic_cmpxchg_n(int *ptr, int *expected, int desired) {
   // OGCG-LABEL: @atomic_cmpxchg_n
 
   __atomic_compare_exchange_n(ptr, expected, desired, /*weak=*/0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
-  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) : (!s32i, !cir.bool)
+  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
   // CIR-NEXT:    %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool
   // CIR-NEXT:    cir.if %[[FAILED]] {
   // CIR-NEXT:      cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i>
@@ -385,7 +385,7 @@ void atomic_cmpxchg_n(int *ptr, int *expected, int desired) {
   // OGCG-NEXT:    store i8 %[[SUCCESS_2]], ptr %{{.+}}, align 1
 
   __atomic_compare_exchange_n(ptr, expected, desired, /*weak=*/1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
-  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) weak : (!s32i, !cir.bool)
+  // CIR:         %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
   // CIR-NEXT:    %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool
   // CIR-NEXT:    cir.if %[[FAILED]] {
   // CIR-NEXT:      cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i>
@@ -427,12 +427,12 @@ void c11_atomic_exchange(_Atomic(int) *ptr, int value) {
   __c11_atomic_exchange(ptr, value, __ATOMIC_RELEASE);
   __c11_atomic_exchange(ptr, value, __ATOMIC_ACQ_REL);
   __c11_atomic_exchange(ptr, value, __ATOMIC_SEQ_CST);
-  // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
 
   // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} monotonic, align 4
   // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} acquire, align 4
@@ -460,12 +460,12 @@ void atomic_exchange(int *ptr, int *value, int *old) {
   __atomic_exchange(ptr, value, old, __ATOMIC_RELEASE);
   __atomic_exchange(ptr, value, old, __ATOMIC_ACQ_REL);
   __atomic_exchange(ptr, value, old, __ATOMIC_SEQ_CST);
-  // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
 
   // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} monotonic, align 4
   // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} acquire, align 4
@@ -493,12 +493,12 @@ void atomic_exchange_n(int *ptr, int value) {
   __atomic_exchange_n(ptr, value, __ATOMIC_RELEASE);
   __atomic_exchange_n(ptr, value, __ATOMIC_ACQ_REL);
   __atomic_exchange_n(ptr, value, __ATOMIC_SEQ_CST);
-  // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
 
   // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} monotonic, align 4
   // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} acquire, align 4
diff --git a/clang/test/CIR/CodeGen/global-ctor-dtor.cpp b/clang/test/CIR/CodeGen/global-ctor-dtor.cpp
new file mode 100644
index 0000000..2e03ff3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/global-ctor-dtor.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2> %t-before.cir
+// RUN: FileCheck --input-file=%t-before.cir %s --check-prefix=CIR-BEFORE-LPP
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR-AFTER
+// RUN: FileCheck --check-prefix=CIR-AFTER --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+extern int bar();
+void foo(void) __attribute__((constructor));
+void foo(void) {
+  bar();
+}
+
+// CIR-BEFORE-LPP: cir.func dso_local @_Z3foov() global_ctor
+
+void foo2(void) __attribute__((constructor(777)));
+void foo2(void) {
+  bar();
+}
+
+// CIR-BEFORE-LPP: cir.func dso_local @_Z4foo2v() global_ctor(777)
+
+void foo3(void) __attribute__((destructor));
+void foo3(void) {
+  bar();
+}
+
+// CIR-BEFORE-LPP: cir.func dso_local @_Z4foo3v() global_dtor
+
+void foo4(void) __attribute__((destructor(789)));
+void foo4(void) {
+  bar();
+}
+
+// CIR-BEFORE-LPP: cir.func dso_local @_Z4foo4v() global_dtor(789)
+
+// CIR-AFTER: module @{{.*}} attributes {cir.global_ctors = [#cir.global_ctor<"_Z3foov", 65535>, #cir.global_ctor<"_Z4foo2v", 777>], cir.global_dtors = [#cir.global_dtor<"_Z4foo3v", 65535>, #cir.global_dtor<"_Z4foo4v", 789>]
+
+// LLVM: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_Z3foov, ptr null }, { i32, ptr, ptr } { i32 777, ptr @_Z4foo2v, ptr null }]
+// LLVM: @llvm.global_dtors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_Z4foo3v, ptr null }, { i32, ptr, ptr } { i32 789, ptr @_Z4foo4v, ptr null }]
+
+// OGCG: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_Z3foov, ptr null }, { i32, ptr, ptr } { i32 777, ptr @_Z4foo2v, ptr null }]
+// OGCG: @llvm.global_dtors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_Z4foo3v, ptr null }, { i32, ptr, ptr } { i32 789, ptr @_Z4foo4v, ptr null }]
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
index 040ddd3..ee4fffe 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 struct DefaultOperators {
   int i;
@@ -43,12 +43,43 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(*:someVar)
-
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init]
@@ -71,10 +102,42 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(max:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
@@ -101,7 +164,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(min:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
@@ -128,7 +191,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(&:someVarNoFloats)
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}})
@@ -146,7 +209,27 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   for(int i = 0; i < 5; ++i);
@@ -167,7 +250,27 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   for(int i = 0; i < 5; ++i);
@@ -188,10 +291,30 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) 
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(&&:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
@@ -218,7 +341,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(||:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
@@ -245,7 +368,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 
 #pragma acc parallel loop reduction(+:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_add__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init {
@@ -286,10 +409,65 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(*:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
@@ -388,11 +566,67 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(max:someVarArr)
+
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
@@ -493,7 +727,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(min:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
@@ -664,10 +898,53 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(|:someVarArrNoFloats)
 // CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}})
@@ -702,7 +979,50 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   for(int i = 0; i < 5; ++i);
@@ -739,10 +1059,53 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(&&:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
@@ -844,9 +1207,9 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(||:someVarArr)
-// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init {
+// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
@@ -888,7 +1251,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 
 #pragma acc parallel loop reduction(+:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_add__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init {
@@ -935,9 +1298,67 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(*:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -983,9 +1404,67 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(max:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1033,7 +1512,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(min:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1081,7 +1560,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(&:someVarArrNoFloats[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1121,9 +1600,55 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(|:someVarArrNoFloats[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1163,9 +1688,55 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(^:someVarArrNoFloats[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1205,9 +1776,55 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(&&:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1255,7 +1872,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(||:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_lor__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1303,26 +1920,26 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 
 #pragma acc parallel loop reduction(+:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(*:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(max:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(min:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(&:someVarArrNoFloats[1:1])
   for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(|:someVarArrNoFloats[1:1])
   for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(^:someVarArrNoFloats[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(&&:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc parallel loop reduction(||:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
   // CHECK-NEXT: cir.func {{.*}}@_Z12acc_combined
 }
 
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp
index 6e885cc..472e4ac 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 template<typename T>
 void acc_combined() {
   T someVar;
@@ -13,7 +13,10 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -26,7 +29,10 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float>
 // CHECK-NEXT: }
 
@@ -111,7 +117,31 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -142,7 +172,31 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -302,6 +356,35 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -337,6 +420,35 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -372,6 +484,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -407,6 +520,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -442,6 +556,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -477,6 +592,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp
index 3d46ac7..112ff656 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 struct HasOperatorsInline {
   int i;
@@ -48,7 +48,7 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -79,7 +79,7 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -88,7 +88,7 @@ void acc_combined() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc parallel loop reduction(max:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init {
+// CHECK: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -172,7 +172,7 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -181,7 +181,7 @@ void acc_combined() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc parallel loop reduction(|:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init {
+// CHECK: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -203,7 +203,7 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -212,7 +212,7 @@ void acc_combined() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc parallel loop reduction(^:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init {
+// CHECK: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -234,7 +234,7 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -243,7 +243,7 @@ void acc_combined() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc parallel loop reduction(&&:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init {
+// CHECK: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -344,7 +344,29 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+//
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -466,7 +488,28 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -832,7 +875,28 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -896,7 +960,28 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -959,7 +1044,28 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -1216,6 +1322,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1293,6 +1425,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1524,6 +1682,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1601,6 +1785,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1678,6 +1888,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp
index e9669d3..7eaa822b 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 template<typename T>
 void acc_combined() {
@@ -14,7 +14,10 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -28,7 +31,10 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -67,7 +73,10 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -81,7 +90,10 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -95,7 +107,10 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
 
@@ -155,7 +170,31 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -186,7 +225,31 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -279,7 +342,31 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -309,7 +396,31 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -339,7 +450,31 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -437,6 +572,35 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -472,6 +636,35 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -507,6 +700,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -542,6 +736,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -577,6 +772,35 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -612,6 +836,35 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -647,6 +900,35 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -682,6 +964,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -717,6 +1000,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp
index c99e3c1..c2c0c77 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 struct HasOperatorsOutline {
   int i;
   unsigned u;
@@ -47,7 +47,7 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -55,7 +55,6 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
-
 #pragma acc parallel loop reduction(*:someVar)
 // CHECK: acc.reduction.recipe @reduction_mul__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
@@ -78,17 +77,17 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 //
 // CHECK-NEXT: } combiner {
-  for(int i=0;i < 5; ++i);
-#pragma acc parallel loop reduction(max:someVar)
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
 // CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_HasOperatorsOutline>)
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
-// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init {
+  for(int i=0;i < 5; ++i);
+#pragma acc parallel loop reduction(max:someVar)
+// CHECK: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -172,7 +171,7 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -181,7 +180,7 @@ void acc_combined() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc parallel loop reduction(|:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init {
+// CHECK: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -203,7 +202,7 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -212,7 +211,7 @@ void acc_combined() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc parallel loop reduction(^:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init {
+// CHECK: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -234,7 +233,7 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -243,7 +242,7 @@ void acc_combined() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc parallel loop reduction(&&:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init {
+// CHECK: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -344,7 +343,29 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+//
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -466,7 +487,28 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -832,7 +874,28 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -896,7 +959,28 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -959,7 +1043,28 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -982,6 +1087,7 @@ void acc_combined() {
 // CHECK-NEXT: }
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
+
   for(int i=0;i < 5; ++i);
 #pragma acc parallel loop reduction(&&:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init {
@@ -1106,8 +1212,7 @@ void acc_combined() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc parallel loop reduction(||:someVarArr)
-
-// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init {
+// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
@@ -1216,6 +1321,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1293,6 +1424,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1524,6 +1681,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1601,6 +1784,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1678,6 +1887,32 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c
index c99dc09..b439623 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -std=c23 -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -std=c23 -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 struct DefaultOperators {
   int i;
@@ -42,7 +42,39 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
   ;
@@ -69,7 +101,39 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
   ;
@@ -144,7 +208,27 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   ;
@@ -165,7 +249,27 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   ;
@@ -186,7 +290,27 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) 
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   ;
@@ -271,7 +395,62 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
   ;
@@ -373,7 +552,62 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
   ;
@@ -650,7 +884,50 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -680,7 +957,50 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -710,7 +1030,50 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -892,6 +1255,64 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
   ;
@@ -940,6 +1361,64 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
   ;
@@ -1056,7 +1535,6 @@ void acc_compute() {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
-
 // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
@@ -1079,6 +1557,52 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -1121,6 +1645,52 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -1163,6 +1733,52 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp
index b90a2fc..f997902 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 struct DefaultOperators {
   int i;
@@ -21,7 +21,6 @@ void acc_compute() {
   struct DefaultOperatorsNoFloats someVarNoFloats;
   struct DefaultOperatorsNoFloats someVarArrNoFloats[5];
 #pragma acc parallel reduction(+:someVar)
-  ;
 // CHECK: acc.reduction.recipe @reduction_add__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <add> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init]
@@ -44,11 +43,43 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
+  ;
 #pragma acc parallel reduction(*:someVar)
-
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init]
@@ -71,7 +102,39 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
   ;
@@ -146,7 +209,27 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   ;
@@ -167,7 +250,27 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   ;
@@ -188,7 +291,27 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) 
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   ;
@@ -286,7 +409,62 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
   ;
@@ -388,11 +566,67 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
   ;
 #pragma acc parallel reduction(max:someVarArr)
+
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
@@ -664,7 +898,50 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -702,7 +979,50 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -739,7 +1059,50 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -846,7 +1209,7 @@ void acc_compute() {
 // CHECK-NEXT: }
   ;
 #pragma acc parallel reduction(||:someVarArr)
-// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init {
+// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
@@ -935,6 +1298,64 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
   ;
@@ -983,6 +1404,64 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
   ;
@@ -1121,6 +1600,52 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -1163,6 +1688,52 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -1205,6 +1776,52 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   ;
@@ -1323,9 +1940,11 @@ void acc_compute() {
   ;
 #pragma acc parallel reduction(||:someVarArr[1:1])
   ;
+
   // CHECK-NEXT: cir.func {{.*}}@_Z11acc_compute
 }
 
 void uses() {
   acc_compute<DefaultOperators>();
 }
+
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c
index 0f7fd84..3e4583f 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 void acc_compute() {
   float someVar;
@@ -13,7 +13,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float>
 // CHECK-NEXT: }
   ;
@@ -26,7 +29,10 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float>
 // CHECK-NEXT: }
   ;
@@ -110,7 +116,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -141,7 +171,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -301,6 +355,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -336,6 +419,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -371,6 +483,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -406,6 +519,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -441,6 +555,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -476,6 +591,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp
index 4d99a43..833cfad 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 template<typename T>
 void acc_compute() {
@@ -14,7 +14,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float>
 // CHECK-NEXT: }
   ;
@@ -27,7 +30,10 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float>
 // CHECK-NEXT: }
 
@@ -112,7 +118,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -143,7 +173,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -303,6 +357,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -338,6 +421,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -373,6 +485,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -443,6 +556,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
@@ -478,6 +592,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   ;
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp
index ea00c07..ec4372d 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 struct HasOperatorsInline {
   int i;
@@ -48,7 +48,7 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -79,7 +79,7 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -88,7 +88,7 @@ void acc_compute() {
 // CHECK-NEXT: }
   ;
 #pragma acc parallel reduction(max:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init {
+// CHECK: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -172,7 +172,7 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -181,7 +181,7 @@ void acc_compute() {
 // CHECK-NEXT: }
   ;
 #pragma acc parallel reduction(|:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init {
+// CHECK: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -203,7 +203,7 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -212,7 +212,7 @@ void acc_compute() {
 // CHECK-NEXT: }
   ;
 #pragma acc parallel reduction(^:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init {
+// CHECK: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -234,7 +234,7 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -243,7 +243,7 @@ void acc_compute() {
 // CHECK-NEXT: }
   ;
 #pragma acc parallel reduction(&&:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init {
+// CHECK: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -344,7 +344,29 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+//
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -466,7 +488,28 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -832,7 +875,28 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -896,7 +960,28 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -959,7 +1044,28 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -1216,6 +1322,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1293,6 +1425,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1524,6 +1682,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1601,6 +1785,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1678,6 +1888,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c
index b170ed0bf..0cee5c6 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 void acc_compute() {
   int someVar;
@@ -13,7 +13,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   ;
@@ -27,7 +30,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   ;
@@ -66,7 +72,10 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   ;
@@ -80,7 +89,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   ;
@@ -94,9 +106,13 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
+
   ;
 #pragma acc parallel reduction(&&:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSi : !cir.ptr<!s32i> reduction_operator <land> init {
@@ -153,7 +169,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -184,7 +224,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -277,7 +341,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -307,7 +395,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -337,7 +449,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -435,6 +571,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -470,6 +635,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -505,6 +699,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -540,6 +735,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -575,6 +771,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -610,6 +835,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -645,6 +899,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -680,6 +963,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -715,6 +999,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp
index c678eae..822dd9f6 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 template<typename T>
 void acc_compute() {
@@ -14,7 +14,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   ;
@@ -28,7 +31,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   ;
@@ -67,7 +73,10 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   ;
@@ -81,7 +90,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   ;
@@ -95,7 +107,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
 
@@ -155,7 +170,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -186,7 +225,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -279,7 +342,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -309,7 +396,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -339,7 +450,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -437,6 +572,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -472,6 +636,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -507,6 +700,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -542,6 +736,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -577,6 +772,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -612,6 +836,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -647,6 +900,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -682,6 +964,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -717,6 +1000,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   ;
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp
index 9ccaea2..873bf51 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 struct HasOperatorsOutline {
   int i;
   unsigned u;
@@ -24,7 +24,7 @@ template<typename T>
 void acc_compute() {
   T someVar;
   T someVarArr[5];
-#pragma acc parallel reduction(+:someVar)
+#pragma acc parallel  reduction(+:someVar)
 // CHECK: acc.reduction.recipe @reduction_add__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <add> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
@@ -47,7 +47,7 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -55,8 +55,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-
-#pragma acc parallel reduction(*:someVar)
+#pragma acc parallel  reduction(*:someVar)
 // CHECK: acc.reduction.recipe @reduction_mul__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
@@ -78,17 +77,17 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 //
 // CHECK-NEXT: } combiner {
-  ;
-#pragma acc parallel reduction(max:someVar)
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
 // CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_HasOperatorsOutline>)
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
-// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init {
+  ;
+#pragma acc parallel  reduction(max:someVar)
+// CHECK: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -118,7 +117,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(min:someVar)
+#pragma acc parallel  reduction(min:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
@@ -149,7 +148,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(&:someVar)
+#pragma acc parallel  reduction(&:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
@@ -172,7 +171,7 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -180,8 +179,8 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(|:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init {
+#pragma acc parallel  reduction(|:someVar)
+// CHECK: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -203,7 +202,7 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -211,8 +210,8 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(^:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init {
+#pragma acc parallel  reduction(^:someVar)
+// CHECK: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -234,7 +233,7 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -242,8 +241,8 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(&&:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init {
+#pragma acc parallel  reduction(&&:someVar)
+// CHECK: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -273,7 +272,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(||:someVar)
+#pragma acc parallel  reduction(||:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
@@ -305,7 +304,7 @@ void acc_compute() {
 // CHECK-NEXT: }
   ;
 
-#pragma acc parallel reduction(+:someVarArr)
+#pragma acc parallel  reduction(+:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_add__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <add> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
@@ -344,7 +343,29 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+//
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -368,7 +389,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(*:someVarArr)
+#pragma acc parallel  reduction(*:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
@@ -466,7 +487,28 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -490,7 +532,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(max:someVarArr)
+#pragma acc parallel  reduction(max:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
@@ -612,7 +654,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(min:someVarArr)
+#pragma acc parallel  reduction(min:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
@@ -734,7 +776,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(&:someVarArr)
+#pragma acc parallel  reduction(&:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
@@ -832,7 +874,28 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -856,7 +919,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(|:someVarArr)
+#pragma acc parallel  reduction(|:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
@@ -896,7 +959,28 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -920,7 +1004,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(^:someVarArr)
+#pragma acc parallel  reduction(^:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
@@ -959,7 +1043,28 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -982,8 +1087,9 @@ void acc_compute() {
 // CHECK-NEXT: }
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
+
   ;
-#pragma acc parallel reduction(&&:someVarArr)
+#pragma acc parallel  reduction(&&:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
@@ -1105,9 +1211,8 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(||:someVarArr)
-
-// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init {
+#pragma acc parallel  reduction(||:someVarArr)
+// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
@@ -1171,7 +1276,7 @@ void acc_compute() {
 // CHECK-NEXT: }
   ;
 
-#pragma acc parallel reduction(+:someVarArr[2])
+#pragma acc parallel  reduction(+:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_add__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <add> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"]
@@ -1216,6 +1321,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1248,7 +1379,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(*:someVarArr[2])
+#pragma acc parallel  reduction(*:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"]
@@ -1293,6 +1424,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1325,7 +1482,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(max:someVarArr[2])
+#pragma acc parallel  reduction(max:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"]
@@ -1402,7 +1559,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(min:someVarArr[2])
+#pragma acc parallel  reduction(min:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"]
@@ -1479,7 +1636,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(&:someVarArr[2])
+#pragma acc parallel  reduction(&:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"]
@@ -1524,6 +1681,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1556,7 +1739,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(|:someVarArr[2])
+#pragma acc parallel  reduction(|:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"]
@@ -1601,6 +1784,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1633,7 +1842,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(^:someVarArr[2])
+#pragma acc parallel  reduction(^:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"]
@@ -1678,6 +1887,32 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1710,7 +1945,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(&&:someVarArr[2])
+#pragma acc parallel  reduction(&&:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"]
@@ -1787,7 +2022,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   ;
-#pragma acc parallel reduction(||:someVarArr[2])
+#pragma acc parallel  reduction(||:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_lor__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"]
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c
index 783aa9a..b2d1362 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 void acc_compute() {
   unsigned int someVar;
@@ -13,7 +13,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i>
 // CHECK-NEXT: }
   ;
@@ -27,7 +30,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i>
 // CHECK-NEXT: }
   ;
@@ -66,7 +72,10 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i>
 // CHECK-NEXT: }
   ;
@@ -80,7 +89,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i>
 // CHECK-NEXT: }
   ;
@@ -94,7 +106,10 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i>
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i>
 // CHECK-NEXT: }
   ;
@@ -153,7 +168,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -184,7 +223,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -277,7 +340,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -307,7 +394,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -337,7 +448,31 @@ void acc_compute() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -435,6 +570,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -470,6 +634,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -505,6 +698,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -540,6 +734,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -575,6 +770,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -610,6 +834,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -645,6 +898,35 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -680,6 +962,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
@@ -715,6 +998,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>>
 // CHECK-NEXT: }
   ;
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp
index 038afcaa..349e0fb 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 struct DefaultOperators {
   int i;
@@ -15,7 +15,7 @@ struct DefaultOperatorsNoFloats {
 };
 
 template<typename T>
-void acc_combined() {
+void acc_loop() {
   T someVar;
   T someVarArr[5];
   struct DefaultOperatorsNoFloats someVarNoFloats;
@@ -43,12 +43,43 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(*:someVar)
-
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init]
@@ -71,10 +102,42 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(max:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
@@ -101,7 +164,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(min:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
@@ -128,7 +191,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(&:someVarNoFloats)
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}})
@@ -146,7 +209,27 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   for(int i = 0; i < 5; ++i);
@@ -167,7 +250,27 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
   for(int i = 0; i < 5; ++i);
@@ -188,10 +291,30 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) 
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(&&:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
@@ -218,7 +341,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(||:someVar)
 // CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}})
@@ -245,7 +368,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 
 #pragma acc loop reduction(+:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_add__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init {
@@ -286,10 +409,65 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(*:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
@@ -388,11 +566,67 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(max:someVarArr)
+
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
@@ -493,7 +727,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(min:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
@@ -664,10 +898,53 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(|:someVarArrNoFloats)
 // CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}})
@@ -702,7 +979,50 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
   for(int i = 0; i < 5; ++i);
@@ -739,10 +1059,53 @@ void acc_combined() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+//
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+//
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(&&:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
@@ -844,9 +1207,9 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(||:someVarArr)
-// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init {
+// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
@@ -888,7 +1251,7 @@ void acc_combined() {
 // TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 
 #pragma acc loop reduction(+:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_add__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init {
@@ -935,9 +1298,67 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(*:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -983,9 +1404,67 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(max:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1033,7 +1512,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(min:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1081,7 +1560,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(&:someVarArrNoFloats[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1121,9 +1600,55 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(|:someVarArrNoFloats[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1163,9 +1688,55 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(^:someVarArrNoFloats[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1205,9 +1776,55 @@ void acc_combined() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i
+// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool
+// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(&&:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1255,7 +1872,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(||:someVarArr[2])
 // CHECK-NEXT: acc.reduction.recipe @reduction_lor__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
@@ -1303,29 +1920,29 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
 // CHECK-NEXT: }
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 
 #pragma acc loop reduction(+:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(*:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(max:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(min:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(&:someVarArrNoFloats[1:1])
   for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(|:someVarArrNoFloats[1:1])
   for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(^:someVarArrNoFloats[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(&&:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
+  for(int i = 0; i < 5; ++i);
 #pragma acc loop reduction(||:someVarArr[1:1])
-  for(int i=0;i < 5; ++i);
-  // CHECK-NEXT: cir.func {{.*}}@_Z12acc_combined
+  for(int i = 0; i < 5; ++i);
+  // CHECK-NEXT: cir.func {{.*}}@_Z8acc_loop
 }
 
 void uses() {
-  acc_combined<DefaultOperators>();
+  acc_loop<DefaultOperators>();
 }
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp
index 11b7c35..8d9269b 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 template<typename T>
 void acc_loop() {
@@ -14,7 +14,10 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -27,7 +30,10 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float>
 // CHECK-NEXT: }
 
@@ -112,7 +118,31 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -143,7 +173,31 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -303,6 +357,35 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -338,6 +421,35 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -373,6 +485,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -408,6 +521,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -443,6 +557,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -478,6 +593,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp
index d95da8c..1c89515 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 struct HasOperatorsInline {
   int i;
@@ -48,7 +48,7 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -79,7 +79,7 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -88,7 +88,7 @@ void acc_loop() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc loop reduction(max:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init {
+// CHECK: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -172,7 +172,7 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -181,7 +181,7 @@ void acc_loop() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc loop reduction(|:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init {
+// CHECK: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -203,7 +203,7 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -212,7 +212,7 @@ void acc_loop() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc loop reduction(^:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init {
+// CHECK: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -234,7 +234,7 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}):  
@@ -243,7 +243,7 @@ void acc_loop() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc loop reduction(&&:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init {
+// CHECK: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
@@ -344,7 +344,29 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+//
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -466,7 +488,28 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -832,7 +875,28 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -896,7 +960,28 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -959,7 +1044,28 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
@@ -1216,6 +1322,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1293,6 +1425,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1524,6 +1682,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1601,6 +1785,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1678,6 +1888,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT:  cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp
index d207e07b..72e9d1f 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 template<typename T>
 void acc_loop() {
@@ -14,7 +14,10 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -28,7 +31,10 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -67,7 +73,10 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -81,7 +90,10 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -95,7 +107,10 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i>
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]]
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i>
 // CHECK-NEXT: }
 
@@ -155,7 +170,31 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -186,7 +225,31 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -279,7 +342,31 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -309,7 +396,31 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -339,7 +450,31 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -437,6 +572,35 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -472,6 +636,35 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -507,6 +700,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -542,6 +736,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -577,6 +772,35 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -612,6 +836,35 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -647,6 +900,35 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]]
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -682,6 +964,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
@@ -717,6 +1000,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// TODO OpenACC: Expecting combination operation here
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>>
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp
index a33c25a..a36d41c1 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 struct HasOperatorsOutline {
   int i;
   unsigned u;
@@ -47,7 +47,7 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -55,7 +55,6 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
-
 #pragma acc loop reduction(*:someVar)
 // CHECK: acc.reduction.recipe @reduction_mul__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
@@ -78,17 +77,17 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 //
 // CHECK-NEXT: } combiner {
-  for(int i=0;i < 5; ++i);
-#pragma acc loop reduction(max:someVar)
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT:  cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
 // CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_HasOperatorsOutline>)
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
-// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init {
+  for(int i=0;i < 5; ++i);
+#pragma acc loop reduction(max:someVar)
+// CHECK: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -172,7 +171,7 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -181,7 +180,7 @@ void acc_loop() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc loop reduction(|:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init {
+// CHECK: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -203,7 +202,7 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -212,7 +211,7 @@ void acc_loop() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc loop reduction(^:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init {
+// CHECK: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -234,7 +233,7 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}):  
@@ -243,7 +242,7 @@ void acc_loop() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc loop reduction(&&:someVar)
-// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init {
+// CHECK: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
@@ -344,7 +343,29 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+//
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -466,7 +487,28 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -832,7 +874,28 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -896,7 +959,28 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -959,7 +1043,28 @@ void acc_loop() {
 //
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}})
-// TODO OpenACC: Expecting combination operation here
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
@@ -982,6 +1087,7 @@ void acc_loop() {
 // CHECK-NEXT: }
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: }
+
   for(int i=0;i < 5; ++i);
 #pragma acc loop reduction(&&:someVarArr)
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init {
@@ -1106,8 +1212,7 @@ void acc_loop() {
 // CHECK-NEXT: }
   for(int i=0;i < 5; ++i);
 #pragma acc loop reduction(||:someVarArr)
-
-// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init {
+// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
@@ -1216,6 +1321,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1293,6 +1424,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1524,6 +1681,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1601,6 +1784,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
@@ -1678,6 +1887,32 @@ void acc_loop() {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
 // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT:  cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): 
diff --git a/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp b/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp
index fc696ff..20ad7a3 100644
--- a/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
 
 // Note: unlike the 'private' recipe checks, this is just for spot-checking,
 // so this test isn't as comprehensive.  The same code paths are used for
@@ -90,7 +90,88 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: }
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
-// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: %[[LHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[LHS_TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, !u64i) -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: %[[RHS_TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: %[[RHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[RHS_TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, !u64i) -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHS_BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[LHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[LHS_BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr<!cir.array<!rec_NoOps x 5>>, !u64i) -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[RHS_BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHS_BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[RHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr<!cir.array<!rec_NoOps x 5>>, !u64i) -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHS_BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[LHS_BOUND1_STRIDE:.*]] = cir.ptr_stride %[[LHS_BOUND2_STRIDE_DECAY]], %[[ITR1_LOAD]] : (!cir.ptr<!rec_NoOps>, !u64i) -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[RHS_BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHS_BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[RHS_BOUND1_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND2_STRIDE_DECAY]], %[[ITR1_LOAD]] : (!cir.ptr<!rec_NoOps>, !u64i) -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[LHS_GET_I:.*]] = cir.get_member %[[LHS_BOUND1_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_NoOps> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_GET_I:.*]] = cir.get_member %[[RHS_BOUND1_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_NoOps> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_GET_I]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_GET_I]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OP:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw
+// CHECK-NEXT: cir.store{{.*}} %[[OP]], %[[LHS_GET_I]] : !s32i
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT:} destroy {
 // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -355,7 +436,89 @@ void do_things(unsigned A, unsigned B) {
 //
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } combiner {
-// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_TLA_LOAD:.*]] = cir.load %[[LHSARG]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[LHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[LHS_TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !u64i) -> !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[RHS_TLA_LOAD:.*]] = cir.load %[[RHSARG]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[RHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[RHS_TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !u64i) -> !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_BOUND3_STRIDE_LOAD:.*]] = cir.load %[[LHS_BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[LHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[LHS_BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr<!cir.ptr<!rec_NoOps>>, !u64i) -> !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[RHS_BOUND3_STRIDE_LOAD:.*]] = cir.load %[[RHS_BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[RHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr<!cir.ptr<!rec_NoOps>>, !u64i) -> !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[LHS_BOUND2_STRIDE_LOAD:.*]] = cir.load %[[LHS_BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_BOUND2_STRIDE_LOAD]], %[[ITR1_LOAD]] : (!cir.ptr<!rec_NoOps>, !u64i) -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[RHS_BOUND2_STRIDE_LOAD:.*]] = cir.load %[[RHS_BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND2_STRIDE_LOAD]], %[[ITR1_LOAD]] : (!cir.ptr<!rec_NoOps>, !u64i) -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[LHS_GET_I:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_NoOps> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_GET_I:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_NoOps> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_GET_I]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_GET_I]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: %[[OP:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw
+// CHECK-NEXT: cir.store{{.*}} %[[OP]], %[[LHS_GET_I]] : !s32i
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
diff --git a/clang/test/CIR/IR/atomic.cir b/clang/test/CIR/IR/atomic.cir
index 8520763..790297f 100644
--- a/clang/test/CIR/IR/atomic.cir
+++ b/clang/test/CIR/IR/atomic.cir
@@ -5,17 +5,30 @@
 
 cir.func @atomic_xchg(%ptr: !cir.ptr<!s32i>, %val: !s32i) {
   // CHECK-LABEL: @atomic_xchg
-  %0 = cir.atomic.xchg relaxed %ptr, %val : !cir.ptr<!s32i> -> !s32i
-  // CHECK: cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  %1 = cir.atomic.xchg consume %ptr, %val : !cir.ptr<!s32i> -> !s32i
-  // CHECK: cir.atomic.xchg consume %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  %2 = cir.atomic.xchg acquire %ptr, %val : !cir.ptr<!s32i> -> !s32i
-  // CHECK: cir.atomic.xchg acquire %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  %3 = cir.atomic.xchg release %ptr, %val : !cir.ptr<!s32i> -> !s32i
-  // CHECK: cir.atomic.xchg release %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  %4 = cir.atomic.xchg acq_rel %ptr, %val : !cir.ptr<!s32i> -> !s32i
-  // CHECK: cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
-  %5 = cir.atomic.xchg seq_cst %ptr, %val : !cir.ptr<!s32i> -> !s32i
-  // CHECK: cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i
+  %0 = cir.atomic.xchg relaxed %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CHECK: cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  %1 = cir.atomic.xchg consume %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CHECK: cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  %2 = cir.atomic.xchg acquire %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CHECK: cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  %3 = cir.atomic.xchg release %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CHECK: cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  %4 = cir.atomic.xchg acq_rel %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CHECK: cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  %5 = cir.atomic.xchg seq_cst %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CHECK: cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  cir.return
+}
+
+cir.func @atomic_cmpxchg(%ptr: !cir.ptr<!s32i>, %expected: !s32i, %desired: !s32i) {
+  // CHECK-LABEL: @atomic_cmpxchg
+  %0, %1 = cir.atomic.cmpxchg success(relaxed) failure(relaxed) %ptr, %expected, %desired : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
+  // CHECK: cir.atomic.cmpxchg success(relaxed) failure(relaxed) %{{.+}}, %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
+  %2, %3 = cir.atomic.cmpxchg weak success(relaxed) failure(relaxed) %ptr, %expected, %desired : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
+  // CHECK: cir.atomic.cmpxchg weak success(relaxed) failure(relaxed) %{{.+}}, %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
+  %4, %5 = cir.atomic.cmpxchg success(seq_cst) failure(acquire) %ptr, %expected, %desired : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
+  // CHECK: cir.atomic.cmpxchg success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
+  %6, %7 = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %ptr, %expected, %desired : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
+  // CHECK: cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool)
   cir.return
 }
diff --git a/clang/test/CIR/IR/func.cir b/clang/test/CIR/IR/func.cir
index d7e8184..6e91898 100644
--- a/clang/test/CIR/IR/func.cir
+++ b/clang/test/CIR/IR/func.cir
@@ -110,4 +110,36 @@ cir.func builtin @builtin() {
 }
 // CHECK: cir.func{{.*}} builtin @builtin()
 
+cir.func @global_ctor_func() global_ctor {
+  cir.return
+}
+
+// CHECK: cir.func @global_ctor_func() global_ctor {
+// CHECK:   cir.return
+// CHECK: }
+
+cir.func @global_ctor_with_priority() global_ctor(200) {
+  cir.return
+}
+
+// CHECK: cir.func @global_ctor_with_priority() global_ctor(200) {
+// CHECK:   cir.return
+// CHECK: }
+
+cir.func @global_dtor_func() global_dtor {
+  cir.return
+}
+
+// CHECK: cir.func @global_dtor_func() global_dtor {
+// CHECK:   cir.return
+// CHECK: }
+
+cir.func @global_dtor_with_priority() global_dtor(201) {
+  cir.return
+}
+
+// CHECK: cir.func @global_dtor_with_priority() global_dtor(201) {
+// CHECK:   cir.return
+// CHECK: }
+
 }
diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp
index 5f1243a..af2dce8 100644
--- a/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp
+++ b/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp
@@ -149,7 +149,7 @@ namespace std_example {
   template<typename T> constexpr bool is_same_v<T, T> = true;
 
   template<typename T, typename U> concept same_as = is_same_v<T, U>;
-  // expected-note@-1 {{because 'is_same_v<int, typename T2::inner>' evaluated to false}}
+  // expected-note@-1 {{because 'is_same_v<int, typename std_example::T2::inner>' evaluated to false}}
 
   static_assert(C1<int>);
   static_assert(C1<int*>);
@@ -160,7 +160,7 @@ namespace std_example {
   template<typename T> concept C2 =
     requires(T x) {
       {*x} -> same_as<typename T::inner>;
-      // expected-note@-1{{because 'same_as<int, typename T2::inner>' evaluated to false}}
+      // expected-note@-1{{because 'same_as<int, typename std_example::T2::inner>' evaluated to false}}
       // expected-note@-2{{because '*x' would be invalid: indirection requires pointer operand ('int' invalid)}}
     };
 
diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp
index 9fc4906..70a96be 100644
--- a/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp
+++ b/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp
@@ -27,7 +27,7 @@ using r4i = X<void>::r4<int>; // expected-error{{constraints not satisfied for c
 
 // C++ [expr.prim.req.nested] Examples
 namespace std_example {
-  template<typename U> concept C1 = sizeof(U) == 1; // expected-note{{because 'sizeof(decltype(+t)) == 1' (4 == 1) evaluated to false}}
+  template<typename U> concept C1 = sizeof(U) == 1; // expected-note{{because 'sizeof(int) == 1' (4 == 1) evaluated to false}}
   template<typename T> concept D =
     requires (T t) {
       requires C1<decltype (+t)>; // expected-note{{because 'decltype(+t)' (aka 'int') does not satisfy 'C1'}}
diff --git a/clang/test/CXX/temp/temp.param/p10-2a.cpp b/clang/test/CXX/temp/temp.param/p10-2a.cpp
index 4f192d3..c0406f8 100644
--- a/clang/test/CXX/temp/temp.param/p10-2a.cpp
+++ b/clang/test/CXX/temp/temp.param/p10-2a.cpp
@@ -95,8 +95,8 @@ concept OneOf = (is_same_v<T, Ts> || ...); // #OneOf
 // expected-note@#OneOf 3{{because 'is_same_v<int, char[1]>' evaluated to false}}
 // expected-note@#OneOf 3{{and 'is_same_v<int, char[2]>' evaluated to false}}
 // expected-note@#OneOf {{because 'is_same_v<decltype(nullptr), char>' evaluated to false}}
-// expected-note@#OneOf {{because 'is_same_v<decltype(nullptr), char>' evaluated to false}}
-// expected-note@#OneOf {{and 'is_same_v<decltype(nullptr), int>' evaluated to false}}
+// expected-note@#OneOf {{because 'is_same_v<std::nullptr_t, char>' evaluated to false}}
+// expected-note@#OneOf {{and 'is_same_v<std::nullptr_t, int>' evaluated to false}}
 // expected-note@#OneOf {{and 'is_same_v<decltype(nullptr), int>' evaluated to false}}
 
 template<OneOf<char[1], char[2]> T, OneOf<int, long, char> U>
diff --git a/clang/test/CodeGen/RISCV/bitint.c b/clang/test/CodeGen/RISCV/bitint.c
new file mode 100644
index 0000000..1ad43af
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/bitint.c
@@ -0,0 +1,342 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
+// RUN: %clang_cc1 -triple riscv64 -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=RISCV64
+// RUN: %clang_cc1 -triple riscv32 -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=RISCV32
+// RUN: %clang_cc1 -triple riscv32 -fforce-enable-int128 -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=RISCV32_INT128
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_17_add_unsigned
+// RISCV64-SAME: (i17 noundef zeroext [[A:%.*]], i17 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add i17 [[B]], [[A]]
+// RISCV64-NEXT:    ret i17 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_17_add_unsigned
+// RISCV32-SAME: (i17 noundef zeroext [[A:%.*]], i17 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[ADD:%.*]] = add i17 [[B]], [[A]]
+// RISCV32-NEXT:    ret i17 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_17_add_unsigned
+// RISCV32_INT128-SAME: (i17 noundef zeroext [[A:%.*]], i17 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add i17 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    ret i17 [[ADD]]
+//
+unsigned _BitInt(17) test_bitint_17_add_unsigned(unsigned _BitInt(17) a, unsigned _BitInt(17) b) {
+    return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_17_add_signed
+// RISCV64-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV64-NEXT:    ret i17 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_17_add_signed
+// RISCV32-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV32-NEXT:    ret i17 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_17_add_signed
+// RISCV32_INT128-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    ret i17 [[ADD]]
+//
+signed _BitInt(17) test_bitint_17_add_signed(signed _BitInt(17) a, signed _BitInt(17) b) {
+    return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_17_add_default
+// RISCV64-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV64-NEXT:    ret i17 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_17_add_default
+// RISCV32-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV32-NEXT:    ret i17 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_17_add_default
+// RISCV32_INT128-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    ret i17 [[ADD]]
+//
+_BitInt(17) test_bitint_17_add_default(_BitInt(17) a, _BitInt(17) b) {
+    return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_32_add_unsigned
+// RISCV64-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add i32 [[B]], [[A]]
+// RISCV64-NEXT:    ret i32 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_32_add_unsigned
+// RISCV32-SAME: (i32 noundef zeroext [[A:%.*]], i32 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[ADD:%.*]] = add i32 [[B]], [[A]]
+// RISCV32-NEXT:    ret i32 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_32_add_unsigned
+// RISCV32_INT128-SAME: (i32 noundef zeroext [[A:%.*]], i32 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add i32 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    ret i32 [[ADD]]
+//
+unsigned _BitInt(32) test_bitint_32_add_unsigned(unsigned _BitInt(32) a, unsigned _BitInt(32) b) {
+    return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_32_add_signed
+// RISCV64-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV64-NEXT:    ret i32 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_32_add_signed
+// RISCV32-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV32-NEXT:    ret i32 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_32_add_signed
+// RISCV32_INT128-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    ret i32 [[ADD]]
+//
+signed _BitInt(32) test_bitint_32_add_signed(signed _BitInt(32) a, signed _BitInt(32) b) {
+    return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_32_add_default
+// RISCV64-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV64-NEXT:    ret i32 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_32_add_default
+// RISCV32-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV32-NEXT:    ret i32 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_32_add_default
+// RISCV32_INT128-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    ret i32 [[ADD]]
+//
+_BitInt(32) test_bitint_32_add_default(_BitInt(32) a, _BitInt(32) b) {
+    return a + b;
+}
+
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_65_add_unsigned
+// RISCV64-SAME: (i65 noundef zeroext [[A:%.*]], i65 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add i65 [[B]], [[A]]
+// RISCV64-NEXT:    ret i65 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_65_add_unsigned
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6:![0-9]+]]
+// RISCV32-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32-NEXT:    [[ADD:%.*]] = add i65 [[B]], [[A]]
+// RISCV32-NEXT:    [[STOREDV4:%.*]] = zext i65 [[ADD]] to i128
+// RISCV32-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT:    ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_65_add_unsigned
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6:![0-9]+]]
+// RISCV32_INT128-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32_INT128-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add i65 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    [[STOREDV4:%.*]] = zext i65 [[ADD]] to i128
+// RISCV32_INT128-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT:    ret void
+//
+unsigned _BitInt(65) test_bitint_65_add_unsigned(unsigned _BitInt(65) a, unsigned _BitInt(65) b) {
+    return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_65_add_signed
+// RISCV64-SAME: (i65 noundef signext [[A:%.*]], i65 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV64-NEXT:    ret i65 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_65_add_signed
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32-NEXT:    [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV32-NEXT:    [[STOREDV4:%.*]] = sext i65 [[ADD]] to i128
+// RISCV32-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT:    ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_65_add_signed
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32_INT128-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    [[STOREDV4:%.*]] = sext i65 [[ADD]] to i128
+// RISCV32_INT128-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT:    ret void
+//
+signed _BitInt(65) test_bitint_65_add_signed(signed _BitInt(65) a, signed _BitInt(65) b) {
+    return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_65_add_default
+// RISCV64-SAME: (i65 noundef signext [[A:%.*]], i65 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV64-NEXT:    ret i65 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_65_add_default
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32-NEXT:    [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV32-NEXT:    [[STOREDV4:%.*]] = sext i65 [[ADD]] to i128
+// RISCV32-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT:    ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_65_add_default
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32_INT128-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    [[STOREDV4:%.*]] = sext i65 [[ADD]] to i128
+// RISCV32_INT128-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT:    ret void
+//
+_BitInt(65) test_bitint_65_add_default(_BitInt(65) a, _BitInt(65) b) {
+    return a + b;
+}
+
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_77_add_unsigned
+// RISCV64-SAME: (i77 noundef zeroext [[A:%.*]], i77 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add i77 [[B]], [[A]]
+// RISCV64-NEXT:    ret i77 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_77_add_unsigned
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10:![0-9]+]]
+// RISCV32-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32-NEXT:    [[ADD:%.*]] = add i77 [[B]], [[A]]
+// RISCV32-NEXT:    [[STOREDV4:%.*]] = zext i77 [[ADD]] to i128
+// RISCV32-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT:    ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_77_add_unsigned
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10:![0-9]+]]
+// RISCV32_INT128-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32_INT128-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add i77 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    [[STOREDV4:%.*]] = zext i77 [[ADD]] to i128
+// RISCV32_INT128-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT:    ret void
+//
+unsigned _BitInt(77) test_bitint_77_add_unsigned(unsigned _BitInt(77) a, unsigned _BitInt(77) b) {
+    return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_77_add_signed
+// RISCV64-SAME: (i77 noundef signext [[A:%.*]], i77 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV64-NEXT:    ret i77 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_77_add_signed
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32-NEXT:    [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV32-NEXT:    [[STOREDV4:%.*]] = sext i77 [[ADD]] to i128
+// RISCV32-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT:    ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_77_add_signed
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32_INT128-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    [[STOREDV4:%.*]] = sext i77 [[ADD]] to i128
+// RISCV32_INT128-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT:    ret void
+//
+signed _BitInt(77) test_bitint_77_add_signed(signed _BitInt(77) a, signed _BitInt(77) b) {
+    return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_77_add_default
+// RISCV64-SAME: (i77 noundef signext [[A:%.*]], i77 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT:  entry:
+// RISCV64-NEXT:    [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV64-NEXT:    ret i77 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_77_add_default
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT:  entry:
+// RISCV32-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32-NEXT:    [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV32-NEXT:    [[STOREDV4:%.*]] = sext i77 [[ADD]] to i128
+// RISCV32-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT:    ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_77_add_default
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT:  entry:
+// RISCV32_INT128-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT:    [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32_INT128-NEXT:    [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT:    [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32_INT128-NEXT:    [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV32_INT128-NEXT:    [[STOREDV4:%.*]] = sext i77 [[ADD]] to i128
+// RISCV32_INT128-NEXT:    store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT:    ret void
+//
+_BitInt(77) test_bitint_77_add_default(_BitInt(77) a, _BitInt(77) b) {
+    return a + b;
+}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 8f3d459..bcffd861 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1100,6 +1100,7 @@ __m256d test_mm256_hadd_pd(__m256d A, __m256d B) {
   // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
   return _mm256_hadd_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_hadd_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +3.0, +11.0, +7.0, +15.0));
 
 __m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_hadd_ps
@@ -1107,17 +1108,27 @@ __m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
   return _mm256_hadd_ps(A, B);
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_hadd_ps(
+    (__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f},
+    (__m256){+9.0f, +10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f}),
+    +3.0f, +7.0f, +19.0f, +23.0f, +11.0f, +15.0f, +27.0f, +31.0f));
+
 __m256d test_mm256_hsub_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_hsub_pd
   // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
   return _mm256_hsub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_hsub_pd((__m256d){+1.0, +2.0, +4.0, +3.0}, (__m256d){+10.0, +6.0, +16.0, +8.0}), -1.0,+4.0,+1.0,+8.0));
 
 __m256 test_mm256_hsub_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_hsub_ps
   // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
   return _mm256_hsub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_hsub_ps(
+    (__m256){1.0f, 2.0f, 4.0f, 3.0f, 5.0f, 7.0f, 7.0f, 5.0f},
+    (__m256){6.0f, 9.0f, 11.0f, 8.0f, 13.0f, 17.0f, 15.0f, 11.0f}),
+    -1.0f, 1.0f, -3.0f, 3.0f, -2.0f, 2.0f, -4.0f, 4.0f));
 
 __m256i test_mm256_insert_epi8(__m256i x, char b) {
   // CHECK-LABEL: test_mm256_insert_epi8
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 55f18f9..dc64f96 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -485,36 +485,60 @@ __m256i test_mm256_hadd_epi16(__m256i a, __m256i b) {
   // CHECK: call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_hadd_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_hadd_epi16(
+    (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 
+    (__m256i)(__v16hi){17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}), 
+    3,7,11,15,35,39,43,47,19,23,27,31,51,55,59,63));
 
 __m256i test_mm256_hadd_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_hadd_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
   return _mm256_hadd_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_hadd_epi32(
+    (__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80},
+    (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75}),
+    30,70,20,60,110,150,100,140));
 
 __m256i test_mm256_hadds_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_hadds_epi16
   // CHECK:call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_hadds_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v16hi( _mm256_hadds_epi16(
+    (__m256i)(__v16hi){32767, 32767, 1,2,3,4,5,6,7,8,9,10,11,12,13,14},
+    (__m256i)(__v16hi){19,20,21,22,23,24,25,26,27,28,29,30,31,32, 32767, 5}),
+    32767, 3,7,11, 39,43,47,51,15,19,23,27, 55,59,63, 32767));
 
 __m256i test_mm256_hsub_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_hsub_epi16
   // CHECK: call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_hsub_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_hsub_epi16(
+    (__m256i)(__v16hi){2,1,1,2,5,3,3,5,7,4,4,7,9,5,5,9}, 
+    (__m256i)(__v16hi){10,5,5,10,12,6,6,12,21,14,14,21,24,16,16,24}), 
+    1,-1,2,-2,5,-5,6,-6,3,-3,4,-4, 7,-7,8,-8));
 
 __m256i test_mm256_hsub_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_hsub_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
   return _mm256_hsub_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_hsub_epi32(
+    (__m256i)(__v8si){10, 20, 30,50,60,90,100,140},
+    (__m256i)(__v8si){200,150,260,200,420,350,800,720}),
+    -10,-20,50,60, -30,-40, 70,80));
 
 __m256i test_mm256_hsubs_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_hsubs_epi16
   // CHECK:call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_hsubs_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_hsubs_epi16(
+    (__m256i)(__v16hi){32726, -100, 3, 2, 6, 4, 8, 5,15,10 ,21, 14, 27, 18, 100, 90},
+    (__m256i)(__v16hi){40, 20, 100, 70, 200,150, 100,40, 1000,900,300,150, 500,300, 1, 1}),
+    32767, 1, 2, 3,  20, 30, 50, 60, 5, 7, 9, 10, 100, 150, 200, 0));
 
 __m128i test_mm_i32gather_epi32(int const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i32gather_epi32
diff --git a/clang/test/CodeGen/X86/avx512ifma-builtins.c b/clang/test/CodeGen/X86/avx512ifma-builtins.c
index eebefb0..f90697e 100644
--- a/clang/test/CodeGen/X86/avx512ifma-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifma-builtins.c
@@ -8,45 +8,230 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 
-
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m512i test_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
   // CHECK-LABEL: test_mm512_madd52hi_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-  return _mm512_madd52hi_epu64(__X, __Y, __Z); 
+  return _mm512_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+                              (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+                          100, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+                                                0, 0, 0, 0},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+                                                0, 0, 0, 0}),
+                          0xFFFFFFFFFFFFEull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull}),
+                          4503599627370495ull, 4503599627370496ull,
+                          4503599627370497ull, 4503599627370498ull,
+                          4503599627370499ull, 4503599627370500ull,
+                          4503599627370501ull, 4503599627370502ull));
+
 __m512i test_mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_mask_madd52hi_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y); 
+  return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64(
+                              (__m512i)(__v8du){111, 222, 333, 444, 555, 666,
+                                                777, 888},
+                              0x00,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80}),
+                          111, 222, 333, 444, 555, 666, 777, 888));
+
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64(
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              0xFF,
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80}),
+                          10, 20, 30, 40, 50, 60, 70, 80));
+
 __m512i test_mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
   // CHECK-LABEL: test_mm512_maskz_madd52hi_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z); 
+  return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64(
+                              0x00,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800}),
+                          0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64(
+                              0xFF,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800}),
+                          1, 2, 3, 4, 5, 6, 7, 8));
+
 __m512i test_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
   // CHECK-LABEL: test_mm512_madd52lo_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-  return _mm512_madd52lo_epu64(__X, __Y, __Z); 
+  return _mm512_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+                          50, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){20, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){30, 0, 0, 0, 0, 0, 0, 0}),
+                          700, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+                                                0, 0, 0, 0},
+                              (__m512i)(__v8du){1, 0, 0, 0, 0, 0, 0, 0}),
+                          0xFFFFFFFFFFFFFull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0x1F000000000000ull, 0, 0, 0,
+                                                0, 0, 0, 0},
+                              (__m512i)(__v8du){2, 0, 0, 0, 0, 0, 0, 0}),
+                          0xE000000000000ull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+                          21, 62, 123, 204, 305, 426, 567, 728));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+                                                0, 0, 0, 0},
+                              (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+                          4503599627370545ull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800},
+                              (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+                          210, 620, 1230, 2040, 3050, 4260, 5670, 7280));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0x1F000000000000ull,
+                                                0x1F000000000000ull, 0, 0, 0,
+                                                0, 0, 0},
+                              (__m512i)(__v8du){2, 3, 0, 0, 0, 0, 0, 0}),
+                          0xE000000000000ull, 0xD000000000000ull, 0, 0, 0, 0,
+                          0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull},
+                              (__m512i)(__v8du){1, 1, 1, 1, 1, 1, 1, 1}),
+                          0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+                          0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+                          0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+                          0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull));
+
 __m512i test_mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_mask_madd52lo_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y); 
+  return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64(
+                              (__m512i)(__v8du){111, 222, 333, 444, 555, 666,
+                                                777, 888},
+                              0x00,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80}),
+                          111, 222, 333, 444, 555, 666, 777, 888));
+
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64(
+                              (__m512i)(__v8du){1000, 2000, 3000, 4000, 5000,
+                                                6000, 7000, 8000},
+                              0xFF,
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800},
+                              (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80,
+                                                90}),
+                          3000, 8000, 15000, 24000, 35000, 48000, 63000,
+                          80000));
+
 __m512i test_mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
   // CHECK-LABEL: test_mm512_maskz_madd52lo_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z); 
+  return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
 }
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64(
+                              0x00,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+                          0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64(
+                              0xFF,
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800},
+                              (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80,
+                                                90},
+                              (__m512i)(__v8du){30, 40, 50, 60, 70, 80, 90,
+                                                100}),
+                          700, 1400, 2300, 3400, 4700, 6200, 7900, 9800));
diff --git a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
index 89108fc..1cbb580 100644
--- a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
@@ -8,85 +8,241 @@
 // RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 // RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=i386-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 
-
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
   // CHECK-LABEL: test_mm_madd52hi_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
-  return _mm_madd52hi_epu64(__X, __Y, __Z); 
+  return _mm_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+                              (__m128i)((__v2du){100, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          100, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+                          0xFFFFFFFFFFFFEull, 0));
+
 __m128i test_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
   // CHECK-LABEL: test_mm_mask_madd52hi_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y); 
+  return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){111, 222}),
+                                                   0x0,
+                                                   (__m128i)((__v2du){1, 2}),
+                                                   (__m128i)((__v2du){10, 20})),
+                          111, 222));
+
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){10, 20}),
+                                                   0x2,
+                                                   (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+                                                   (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL})),
+                          10, 0x100000000014ULL));
+
 __m128i test_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
   // CHECK-LABEL: test_mm_maskz_madd52hi_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z); 
+  return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x3,
+                                                    (__m128i)((__v2du){1, 2}),
+                                                    (__m128i)((__v2du){10, 20}),
+                                                    (__m128i)((__v2du){100, 200})),
+                          1, 2));
+
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x1,
+                                                    (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+                                                    (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+                                                    (__m128i)((__v2du){0, 0})),
+                          0x1000000000000ULL, 0));
+
 __m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
   // CHECK-LABEL: test_mm256_madd52hi_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
-  return _mm256_madd52hi_epu64(__X, __Y, __Z); 
+  return _mm256_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+                              (__m256i)((__v4du){100, 200, 300, 400}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){5, 6, 7, 8})),
+                          100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0})),
+                          0xFFFFFFFFFFFFEull, 0, 0, 0));
+
 __m256i test_mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: test_mm256_mask_madd52hi_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y); 
+  return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){111, 222, 333, 444}),
+                                                      0x0,
+                                                      (__m256i)((__v4du){1, 2, 3, 4}),
+                                                      (__m256i)((__v4du){10, 20, 30, 40})),
+                          111, 222, 333, 444));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){10, 20, 30, 40}),
+                                                      0xA,
+                                                      (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+                                                                         0x1000000000000ULL, 0x1000000000000ULL}),
+                                                      (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+                                                                         0x1000000000000ULL, 0x1000000000000ULL})),
+                          10, 0x100000000014ULL, 30, 0x100000000028ULL));
+
 __m256i test_mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
   // CHECK-LABEL: test_mm256_maskz_madd52hi_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z); 
+  return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0xF,
+                                                       (__m256i)((__v4du){1, 2, 3, 4}),
+                                                       (__m256i)((__v4du){10, 20, 30, 40}),
+                                                       (__m256i)((__v4du){100, 200, 300, 400})),
+                          1, 2, 3, 4));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0x5,
+                                                       (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+                                                                          0x1000000000000ULL, 0x1000000000000ULL}),
+                                                       (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+                                                                          0x1000000000000ULL, 0x1000000000000ULL}),
+                                                       (__m256i)((__v4du){0, 0, 0, 0})),
+                          0x1000000000000ULL, 0, 0x1000000000000ULL, 0));
+
 __m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
   // CHECK-LABEL: test_mm_madd52lo_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
-  return _mm_madd52lo_epu64(__X, __Y, __Z); 
+  return _mm_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){100, 0}),
+                              (__m128i)((__v2du){20, 0}),
+                              (__m128i)((__v2du){30, 0})),
+                          700, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){1, 2}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){2, 3})),
+                          21, 62));
+
 __m128i test_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
   // CHECK-LABEL: test_mm_mask_madd52lo_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y); 
+  return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){1000, 2000}),
+                                                   0x3,
+                                                   (__m128i)((__v2du){100, 200}),
+                                                   (__m128i)((__v2du){20, 30})),
+                          3000, 8000));
+
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){111, 222}),
+                                                   0x0,
+                                                   (__m128i)((__v2du){1, 2}),
+                                                   (__m128i)((__v2du){10, 20})),
+                          111, 222));
+
 __m128i test_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
   // CHECK-LABEL: test_mm_maskz_madd52lo_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z); 
+  return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x3,
+                                                    (__m128i)((__v2du){100, 200}),
+                                                    (__m128i)((__v2du){20, 30}),
+                                                    (__m128i)((__v2du){30, 40})),
+                          700, 1400));
+
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x1,
+                                                    (__m128i)((__v2du){100, 0}),
+                                                    (__m128i)((__v2du){20, 0}),
+                                                    (__m128i)((__v2du){30, 0})),
+                          700, 0));
+
 __m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
   // CHECK-LABEL: test_mm256_madd52lo_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
-  return _mm256_madd52lo_epu64(__X, __Y, __Z); 
+  return _mm256_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+                              (__m256i)((__v4du){1, 2, 3, 4}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){2, 3, 4, 5})),
+                          21, 62, 123, 204));
+
 __m256i test_mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: test_mm256_mask_madd52lo_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y); 
+  return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){1000, 2000, 3000, 4000}),
+                                                      0xF,
+                                                      (__m256i)((__v4du){100, 200, 300, 400}),
+                                                      (__m256i)((__v4du){20, 30, 40, 50})),
+                          3000, 8000, 15000, 24000));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){111, 222, 333, 444}),
+                                                      0x0,
+                                                      (__m256i)((__v4du){1, 2, 3, 4}),
+                                                      (__m256i)((__v4du){10, 20, 30, 40})),
+                          111, 222, 333, 444));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){11, 22, 33, 44}),
+                                                      0x5,
+                                                      (__m256i)((__v4du){100, 200, 300, 400}),
+                                                      (__m256i)((__v4du){10, 20, 30, 40})),
+                          1011, 22, 9033, 44));
+
 __m256i test_mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
   // CHECK-LABEL: test_mm256_maskz_madd52lo_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z); 
+  return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
 }
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0xF,
+                                                       (__m256i)((__v4du){100, 200, 300, 400}),
+                                                       (__m256i)((__v4du){20, 30, 40, 50}),
+                                                       (__m256i)((__v4du){30, 40, 50, 60})),
+                          700, 1400, 2300, 3400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0x9,
+                                                       (__m256i)((__v4du){100, 200, 300, 400}),
+                                                       (__m256i)((__v4du){10, 20, 30, 40}),
+                                                       (__m256i)((__v4du){5, 10, 15, 20})),
+                          150, 0, 0, 1200));
diff --git a/clang/test/CodeGen/X86/avxifma-builtins.c b/clang/test/CodeGen/X86/avxifma-builtins.c
index aa15159..70531da 100644
--- a/clang/test/CodeGen/X86/avxifma-builtins.c
+++ b/clang/test/CodeGen/X86/avxifma-builtins.c
@@ -8,8 +8,9 @@
 // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 
-
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
+
 
 __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
 // CHECK-LABEL: test_mm_madd52hi_epu64
@@ -17,44 +18,207 @@ __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
   return _mm_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+                              (__m128i)((__v2du){50, 100}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){5, 6})),
+                          50, 100));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+                          0xFFFFFFFFFFFFEull, 0));
+
 __m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
 // CHECK-LABEL: test_mm256_madd52hi_epu64
 // CHECK:    call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+                              (__m256i)((__v4du){100, 200, 300, 400}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){5, 6, 7, 8})),
+                          100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+                                                 0xFFFFFFFFFFFFFull, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+                                                 0xFFFFFFFFFFFFFull, 0, 0})),
+                          0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0));
+
 __m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
 // CHECK-LABEL: test_mm_madd52lo_epu64
 // CHECK:    call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){1, 2}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){2, 3})),
+                          21, 62));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){1, 0})),
+                          0xFFFFFFFFFFFFFull, 0));
+
 __m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
 // CHECK-LABEL: test_mm256_madd52lo_epu64
 // CHECK:    call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+                              (__m256i)((__v4du){1, 2, 3, 4}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){2, 3, 4, 5})),
+                          21, 62, 123, 204));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){1, 0, 0, 0})),
+                          0xFFFFFFFFFFFFFull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0x1F000000000000ull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){2, 0, 0, 0})),
+                          0xE000000000000ull, 0, 0, 0));
+
 __m128i test_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
-// CHECK-LABEL: test_mm_madd52hi_avx_epu64
-// CHECK:    call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK-LABEL: test_mm_madd52hi_avx_epu64
+  // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_madd52hi_avx_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+                              (__m128i)((__v2du){50, 100}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){5, 6})),
+                          50, 100));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+                              (__m128i)((__v2du){100, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          100, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+                          0xFFFFFFFFFFFFEull, 0));
+                        
 __m256i test_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
-// CHECK-LABEL: test_mm256_madd52hi_avx_epu64
-// CHECK:    call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+  // CHECK-LABEL: test_mm256_madd52hi_avx_epu64
+  // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_madd52hi_avx_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+                                                 0xFFFFFFFFFFFFFull, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+                                                 0xFFFFFFFFFFFFFull, 0, 0})),
+                          0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+                              (__m256i)((__v4du){100, 200, 300, 400}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){5, 6, 7, 8})),
+                          100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0})),
+                          0xFFFFFFFFFFFFEull, 0, 0, 0));
+
 __m128i test_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
-// CHECK-LABEL: test_mm_madd52lo_avx_epu64
-// CHECK:    call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK-LABEL: test_mm_madd52lo_avx_epu64
+  // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_madd52lo_avx_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){100, 0}),
+                              (__m128i)((__v2du){20, 0}),
+                              (__m128i)((__v2du){30, 0})),
+                          700, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){1, 2}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){2, 3})),
+                          21, 62));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){1, 0})),
+                          0xFFFFFFFFFFFFFull, 0));
+
 __m256i test_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
-// CHECK-LABEL: test_mm256_madd52lo_avx_epu64
-// CHECK:    call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+  // CHECK-LABEL: test_mm256_madd52lo_avx_epu64
+  // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_madd52lo_avx_epu64(__X, __Y, __Z);
 }
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+                              (__m256i)((__v4du){1, 2, 3, 4}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){2, 3, 4, 5})),
+                          21, 62, 123, 204));
+
+
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){1, 0, 0, 0})),
+                          0xFFFFFFFFFFFFFull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0x1F000000000000ull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){2, 0, 0, 0})),
+                          0xE000000000000ull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){5, 10}),
+                              (__m128i)((__v2du){100, 200}),
+                              (__m128i)((__v2du){7, 8})),
+                          705, 1610));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+                              (__m256i)((__v4du){1, 2, 3, 4}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){2, 3, 4, 5})),
+                          21, 62, 123, 204));
+
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 2b45b92..a1e05a1 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -312,36 +312,42 @@ __m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
   // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(
   return _mm_hadd_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_hadd_pi16((__m64)(__v4hi){1,2,3,4},(__m64)(__v4hi){5,6,7,8}),3,7,11,15));
 
 __m64 test_mm_hadd_pi32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_hadd_pi32
   // CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(
   return _mm_hadd_pi32(a, b);
 }
+TEST_CONSTEXPR(match_v2si(_mm_hadd_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){3,4}),3,7));
 
 __m64 test_mm_hadds_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_hadds_pi16
   // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(
   return _mm_hadds_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_hadds_pi16((__m64)(__v4hi){32767, 32767, 1,3},(__m64)(__v4hi){-1,3, 40, 60}),32767, 4, 2,100));
 
 __m64 test_mm_hsub_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_hsub_pi16
   // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(
   return _mm_hsub_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_hsub_pi16((__m64)(__v4hi){1,2,4,3},(__m64)(__v4hi){10,5,0,-10}),-1,1,5,10));
 
 __m64 test_mm_hsub_pi32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_hsub_pi32
   // CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(
   return _mm_hsub_pi32(a, b);
 }
+TEST_CONSTEXPR(match_v2si(_mm_hsub_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){4,3}),-1,1));
 
 __m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_hsubs_pi16
   // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(
   return _mm_hsubs_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_hsubs_pi16((__m64)(__v4hi){32767, 32767, 5, -32767},(__m64)(__v4hi){4,5,10,5}),0,32767,-1,5));
 
 __m64 test_mm_insert_pi16(__m64 a, int d) {
   // CHECK-LABEL: test_mm_insert_pi16
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index c53afc5..a82dd40 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -31,24 +31,28 @@ __m128d test_mm_hadd_pd(__m128d A, __m128d B) {
   // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
   return _mm_hadd_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_hadd_pd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +3.0, +7.0));
 
 __m128 test_mm_hadd_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_hadd_ps
   // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
   return _mm_hadd_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_hadd_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f,+6.0f,+7.0f,+8.0f}), +3.0f, +7.0f, +11.0f, +15.0f));
 
 __m128d test_mm_hsub_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_hsub_pd
   // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
   return _mm_hsub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_hsub_pd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +3.0}), -1.0, +1.0));
 
 __m128 test_mm_hsub_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_hsub_ps
   // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
   return _mm_hsub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_hsub_ps((__m128){+1.0f, +2.0f, +4.0f, +3.0f}, (__m128){+5.0f,+7.0f,+10.0f,+8.0f}), -1.0f, +1.0f, -2.0f, +2.0f));
 
 __m128i test_mm_lddqu_si128(__m128i const* P) {
   // CHECK-LABEL: test_mm_lddqu_si128
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index 5885768..e623599 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -60,36 +60,43 @@ __m128i test_mm_hadd_epi16(__m128i a, __m128i b) {
   // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_hadd_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_hadd_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}, (__m128i)(__v8hi){17,18,19,20,21,22,23,24}), 3,7,11,15,35,39,43,47));
 
 __m128i test_mm_hadd_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_hadd_epi32
   // CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_hadd_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v4si(_mm_hadd_epi32((__m128i)(__v4si){1,2,3,4}, (__m128i)(__v4si){5,6,7,8}), 3,7,11,15));
 
 __m128i test_mm_hadds_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_hadds_epi16
   // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_hadds_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_hadds_epi16((__m128i)(__v8hi){30000,30000,-1,2,-3,3,1,4}, (__m128i)(__v8hi){2,6,1,9,-4,16,7,8}), 32767, 1,0,5,8,10,12,15));
+
 
 __m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_hsub_epi16
   // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_hsub_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_hsub_epi16((__m128i)(__v8hi){20,15,16,12,9,6,4,2}, (__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 5,4,3,2,1,0,-1,-2));
 
 __m128i test_mm_hsub_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_hsub_epi32
   // CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_hsub_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v4si(_mm_hsub_epi32((__m128i)(__v4si){4,3,1,1}, (__m128i)(__v4si){7,5,10,5}), 1,0,2,5));   
 
 __m128i test_mm_hsubs_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_hsubs_epi16
   // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_hsubs_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_hsubs_epi16((__m128i)(__v8hi){32767, -15,16,12,9,6,4,2},(__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 32767,4,3,2,1,0,-1,-2));
 
 __m128i test_mm_maddubs_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_maddubs_epi16
diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c
index 7cfd992..f845afc 100644
--- a/clang/test/CodeGen/ext-int-cc.c
+++ b/clang/test/CodeGen/ext-int-cc.c
@@ -49,8 +49,8 @@ void ParamPassing(_BitInt(128) b, _BitInt(64) c) {}
 // R600: define{{.*}} void @ParamPassing(ptr addrspace(5) byval(i128) align 8 %{{.+}}, i64 %{{.+}})
 // ARC: define{{.*}} void @ParamPassing(ptr byval(i128) align 4 %{{.+}}, i64 inreg %{{.+}})
 // XCORE: define{{.*}} void @ParamPassing(ptr byval(i128) align 4 %{{.+}}, i64 %{{.+}})
-// RISCV64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}})
-// RISCV32: define{{.*}} void @ParamPassing(ptr dead_on_return %{{.+}}, i64 %{{.+}})
+// RISCV64: define{{.*}} void @ParamPassing(i128 signext %{{.+}}, i64 signext %{{.+}})
+// RISCV32: define{{.*}} void @ParamPassing(ptr dead_on_return %{{.+}}, i64 signext %{{.+}})
 // WASM: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}})
 // SYSTEMZ: define{{.*}} void @ParamPassing(ptr dead_on_return %{{.+}}, i64 %{{.+}})
 // PPC64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}})
@@ -79,8 +79,8 @@ void ParamPassing2(_BitInt(127) b, _BitInt(63) c) {}
 // R600: define{{.*}} void @ParamPassing2(ptr addrspace(5) byval(i128) align 8 %{{.+}}, i63 %{{.+}})
 // ARC: define{{.*}} void @ParamPassing2(ptr byval(i128) align 4 %{{.+}}, i63 inreg %{{.+}})
 // XCORE: define{{.*}} void @ParamPassing2(ptr byval(i128) align 4 %{{.+}}, i63 %{{.+}})
-// RISCV64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}})
-// RISCV32: define{{.*}} void @ParamPassing2(ptr dead_on_return %{{.+}}, i63 %{{.+}})
+// RISCV64: define{{.*}} void @ParamPassing2(i127 signext %{{.+}}, i63 signext %{{.+}})
+// RISCV32: define{{.*}} void @ParamPassing2(ptr dead_on_return %{{.+}}, i63 signext %{{.+}})
 // WASM: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}})
 // SYSTEMZ: define{{.*}} void @ParamPassing2(ptr dead_on_return %{{.+}}, i63 signext %{{.+}})
 // PPC64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}})
diff --git a/clang/test/CodeGen/inline-asm-systemz-flag-output.c b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
new file mode 100644
index 0000000..041797b
--- /dev/null
+++ b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -O2 -triple s390x-linux -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define dso_local signext range(i32 0, 4) i32 @test(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[ASMRESULT1]]
+//
+int test(int x) {
+  int cc;
+  asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc;
+}
+
+// CHECK-LABEL: define dso_local signext range(i32 0, 2) i32 @test_low_high_transformation(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2]], !srcloc [[META3:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[ASMRESULT1]], -1
+// CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 2
+// CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP3]] to i32
+// CHECK-NEXT:    ret i32 [[LOR_EXT]]
+//
+int test_low_high_transformation(int x) {
+  int cc;
+  asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 1 || cc == 2;
+}
+
+// CHECK-LABEL: define dso_local signext range(i32 0, 2) i32 @test_equal_high_transformation(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2]], !srcloc [[META4:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ASMRESULT1]], 1
+// CHECK-NEXT:    [[LOR_EXT:%.*]] = xor i32 [[TMP2]], 1
+// CHECK-NEXT:    ret i32 [[LOR_EXT]]
+//
+int test_equal_high_transformation(int x) {
+  int cc;
+  asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 2;
+}
+//.
+// CHECK: [[META2]] = !{i64 788}
+// CHECK: [[META3]] = !{i64 1670}
+// CHECK: [[META4]] = !{i64 2505}
+//.
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index ecddf13..44089c4 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -2525,10 +2525,10 @@
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_KL_M32
 // RUN: %clang -march=pantherlake -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_PTL_M32,CHECK_NKL_M32
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NKL_M32
 // RUN: %clang -march=clearwaterforest -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_PTL_M32,CHECK_CWF_M32,CHECK_NKL_M32
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_CWF_M32,CHECK_NKL_M32
 // CHECK_ARL_M32: #define __ADX__ 1
 // CHECK_ARL_M32: #define __AES__ 1
 // CHECK_ARL_M32: #define __AVX2__ 1
@@ -2568,7 +2568,7 @@
 // CHECK_ARL_M32: #define __POPCNT__ 1
 // CHECK_ARL_M32-NOT: #define __PREFETCHI__ 1
 // CHECK_ARLS_M32-NOT: #define __PREFETCHI__ 1
-// CHECK_PTL_M32: #define __PREFETCHI__ 1
+// CHECK_CWF_M32: #define __PREFETCHI__ 1
 // CHECK_ARL_M32: #define __PRFCHW__ 1
 // CHECK_ARL_M32: #define __PTWRITE__ 1
 // CHECK_ARL_M32-NOT: #define __RAOINT__ 1
@@ -2595,7 +2595,6 @@
 // CHECK_ARL_M32: #define __UINTR__ 1
 // CHECK_ARL_M32-NOT: #define __USERMSR__ 1
 // CHECK_ARLS_M32-NOT: #define __USERMSR__ 1
-// CHECK_PTL_M32-NOT: #define __USERMSR__ 1
 // CHECK_CWF_M32: #define __USERMSR__ 1
 // CHECK_ARL_M32: #define __VAES__ 1
 // CHECK_ARL_M32: #define __VPCLMULQDQ__ 1
@@ -2630,10 +2629,10 @@
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_KL_M64
 // RUN: %clang -march=pantherlake -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_PTL_M64,CHECK_NKL_M64
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NKL_M64
 // RUN: %clang -march=clearwaterforest -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_PTL_M64,CHECK_CWF_M64,CHECK_NKL_M64
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_CWF_M64,CHECK_NKL_M64
 // CHECK_ARL_M64: #define __ADX__ 1
 // CHECK_ARL_M64: #define __AES__ 1
 // CHECK_ARL_M64: #define __AVX2__ 1
@@ -2673,7 +2672,7 @@
 // CHECK_ARL_M64: #define __POPCNT__ 1
 // CHECK_ARL_M64-NOT: #define __PREFETCHI__ 1
 // CHECK_ARLS_M64-NOT: #define __PREFETCHI__ 1
-// CHECK_PTL_M64: #define __PREFETCHI__ 1
+// CHECK_CWF_M64: #define __PREFETCHI__ 1
 // CHECK_ARL_M64: #define __PRFCHW__ 1
 // CHECK_ARL_M64: #define __PTWRITE__ 1
 // CHECK_ARL_M64-NOT: #define __RAOINT__ 1
@@ -2701,7 +2700,6 @@
 // CHECK_ARL_M64: #define __UINTR__ 1
 // CHECK_ARL_M64-NOT: #define __USERMSR__ 1
 // CHECK_ARLS_M64-NOT: #define __USERMSR__ 1
-// CHECK_PTL_M64-NOT: #define __USERMSR__ 1
 // CHECK_CWF_M64: #define __USERMSR__ 1
 // CHECK_ARL_M64: #define __VAES__ 1
 // CHECK_ARL_M64: #define __VPCLMULQDQ__ 1
diff --git a/clang/test/Preprocessor/systemz_asm_flag_output.c b/clang/test/Preprocessor/systemz_asm_flag_output.c
new file mode 100644
index 0000000..b627499
--- /dev/null
+++ b/clang/test/Preprocessor/systemz_asm_flag_output.c
@@ -0,0 +1,4 @@
+// RUN: %clang -target systemz-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s
+// RUN: %clang -target s390x-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s
+
+// CHECK: #define __GCC_ASM_FLAG_OUTPUTS__ 1
diff --git a/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp b/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp
index 6cf0e02..331fe83 100644
--- a/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp
+++ b/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp
@@ -626,3 +626,20 @@ void fn() {
 }
 
 }
+
+
+namespace GH109096 {
+consteval void undefined();
+template <typename T>
+struct scope_exit {
+    T t;
+    constexpr ~scope_exit() { t(); }
+    // expected-error@-1 {{call to immediate function 'GH109096::(anonymous class)::operator()' is not a constant expression}} \
+    // expected-note@-1 {{implicit use of 'this' pointer is only allowed within the evaluation}}
+};
+
+scope_exit guard( // expected-note {{in instantiation of member function}}
+    []() { undefined(); }
+);
+
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl b/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl
index 3f0a37d..999372c 100644
--- a/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl
@@ -20,7 +20,7 @@ Buffer<double2> r4;
 // expected-error@+4 {{constraints not satisfied for class template 'Buffer'}}
 // expected-note@*:* {{template declaration from hidden source: template <typename element_type> requires __is_typed_resource_element_compatible<element_type> class Buffer}}
 // expected-note@*:* {{because 'Buffer<int>' does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(Buffer<int>)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(hlsl::Buffer<int>)' evaluated to false}}
 Buffer<Buffer<int> > r5;
 
 struct s {
@@ -66,7 +66,7 @@ Buffer<half[4]> r10;
 typedef vector<int, 8> int8;
 // expected-error@+3 {{constraints not satisfied for class template 'Buffer'}}
 // expected-note@*:* {{because 'int8' (aka 'vector<int, 8>') does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(int8)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<int, 8>)' evaluated to false}}
 Buffer<int8> r11;
 
 typedef int MyInt;
@@ -91,7 +91,7 @@ Buffer<numbers> r15;
 
 // expected-error@+3 {{constraints not satisfied for class template 'Buffer'}}
 // expected-note@*:* {{because 'double3' (aka 'vector<double, 3>') does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(double3)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<double, 3>)' evaluated to false}}
 Buffer<double3> r16;
 
 
diff --git a/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl b/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl
index aa36c48..b33f2af 100644
--- a/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl
@@ -20,7 +20,7 @@ RWBuffer<double2> r4;
 // expected-error@+4 {{constraints not satisfied for class template 'RWBuffer'}}
 // expected-note@*:* {{template declaration from hidden source: template <typename element_type> requires __is_typed_resource_element_compatible<element_type> class RWBuffer}}
 // expected-note@*:* {{because 'RWBuffer<int>' does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(RWBuffer<int>)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(hlsl::RWBuffer<int>)' evaluated to false}}
 RWBuffer<RWBuffer<int> > r5;
 
 struct s {
@@ -66,7 +66,7 @@ RWBuffer<half[4]> r10;
 typedef vector<int, 8> int8;
 // expected-error@+3 {{constraints not satisfied for class template 'RWBuffer'}}
 // expected-note@*:* {{because 'int8' (aka 'vector<int, 8>') does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(int8)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<int, 8>)' evaluated to false}}
 RWBuffer<int8> r11;
 
 typedef int MyInt;
@@ -91,7 +91,7 @@ RWBuffer<numbers> r15;
 
 // expected-error@+3 {{constraints not satisfied for class template 'RWBuffer'}}
 // expected-note@*:* {{because 'double3' (aka 'vector<double, 3>') does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(double3)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<double, 3>)' evaluated to false}}
 RWBuffer<double3> r16;
 
 
diff --git a/clang/test/SemaTemplate/concepts-recursive-inst.cpp b/clang/test/SemaTemplate/concepts-recursive-inst.cpp
index d36c6a8..73dce93 100644
--- a/clang/test/SemaTemplate/concepts-recursive-inst.cpp
+++ b/clang/test/SemaTemplate/concepts-recursive-inst.cpp
@@ -68,8 +68,8 @@ struct my_range{
 void baz() {
 auto it = begin(rng); // #BEGIN_CALL
 // expected-error-re@#INF_REQ {{satisfaction of constraint {{.*}} depends on itself}}
-// expected-note@#INF_BEGIN {{while checking the satisfaction of concept 'Inf<struct my_range>' requested here}}
-// expected-note@#INF_BEGIN_EXPR {{while checking constraint satisfaction for template 'begin<struct my_range>' required here}}
+// expected-note@#INF_BEGIN {{while checking the satisfaction of concept 'Inf<DirectRecursiveCheck::my_range>' requested here}}
+// expected-note@#INF_BEGIN_EXPR {{while checking constraint satisfaction for template 'begin<DirectRecursiveCheck::my_range>' required here}}
 // expected-note@#INF_BEGIN_EXPR {{while substituting deduced template arguments into function template 'begin'}}
 // expected-note@#INF_BEGIN_EXPR {{in instantiation of requirement here}}
 // expected-note@#INF_REQ {{while substituting template arguments into constraint expression here}}
diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp
index ee2bb8d..aaa20f6 100644
--- a/clang/test/SemaTemplate/concepts.cpp
+++ b/clang/test/SemaTemplate/concepts.cpp
@@ -833,13 +833,13 @@ struct Parent {
 static_assert(Parent<void>::TakesUnary<int, 0>::i == 0);
 // expected-error@+3{{constraints not satisfied for class template 'TakesUnary'}}
 // expected-note@#UNARY{{because 'decltype(0ULL)' (aka 'unsigned long long') does not satisfy 'C'}}
-// expected-note@#61777_C{{because 'sizeof(decltype(0ULL)) == 4' (8 == 4) evaluated to false}}
+// expected-note@#61777_C{{because 'sizeof(unsigned long long) == 4' (8 == 4) evaluated to false}}
 static_assert(Parent<void>::TakesUnary<int, 0uLL>::i == 0);
 
 static_assert(Parent<int>::TakesBinary<int, 0>::i == 0);
 // expected-error@+3{{constraints not satisfied for class template 'TakesBinary'}}
 // expected-note@#BINARY{{because 'C2<decltype(0ULL), int>' evaluated to false}}
-// expected-note@#61777_C2{{because 'sizeof(decltype(0ULL)) == sizeof(int)' (8 == 4) evaluated to false}}
+// expected-note@#61777_C2{{because 'sizeof(unsigned long long) == sizeof(int)' (8 == 4) evaluated to false}}
 static_assert(Parent<int>::TakesBinary<int, 0ULL>::i == 0);
 }
 
@@ -1329,8 +1329,8 @@ static_assert(__cpp17_iterator<not_move_constructible>); \
 // expected-error {{static assertion failed}} \
 // expected-note {{because 'not_move_constructible' does not satisfy '__cpp17_iterator'}} \
 // expected-note@#__cpp17_copy_constructible {{because 'not_move_constructible' does not satisfy '__cpp17_copy_constructible'}} \
-// expected-note@#__cpp17_move_constructible {{because 'not_move_constructible' does not satisfy '__cpp17_move_constructible'}} \
-// expected-note@#is_move_constructible_v {{because 'is_move_constructible_v<not_move_constructible>' evaluated to false}}
+// expected-note@#__cpp17_move_constructible {{because 'parameter_mapping_regressions::case3::not_move_constructible' does not satisfy '__cpp17_move_constructible'}} \
+// expected-note@#is_move_constructible_v {{because 'is_move_constructible_v<parameter_mapping_regressions::case3::not_move_constructible>' evaluated to false}}
 }
 
 namespace case4 {
@@ -1416,6 +1416,31 @@ concept IsEntitySpec =
 
 }
 
+namespace case8 {
+
+template <class T>
+struct type_identity {
+    using type = T;
+};
+
+template <typename Inner>
+struct Cat {};
+
+template <typename T>
+concept CatConcept = requires {
+    []<class Inner>(type_identity<Cat<Inner>>) {}(type_identity<T>{});
+};
+
+template <typename Dummy>
+struct Feeder {
+    template <CatConcept Dummy2>
+    void feed() noexcept {}
+};
+
+void main() { Feeder<int>{}.feed<Cat<int>>(); }
+
+}
+
 }
 
 namespace GH162125 {
diff --git a/compiler-rt/test/tsan/Darwin/write-interpose.c b/compiler-rt/test/tsan/Darwin/write-interpose.c
index cbd9a08..51ff3ee 100644
--- a/compiler-rt/test/tsan/Darwin/write-interpose.c
+++ b/compiler-rt/test/tsan/Darwin/write-interpose.c
@@ -7,6 +7,8 @@
 // Note that running the below command with out `lock_during_write` should
 // deadlock (self-lock)
 // RUN: env DYLD_INSERT_LIBRARIES=%t.dylib TSAN_OPTIONS=verbosity=2:lock_during_write=disable_for_current_process %run %t 2>&1 | FileCheck %s
+//
+// UNSUPPORTED: ios
 
 #include <stdio.h>
 
diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
index 1085393..408f039 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
@@ -57,8 +57,11 @@ struct OpenACCMappableModel
                                   mlir::Location loc,
                                   mlir::TypedValue<mlir::acc::MappableType> var,
                                   llvm::StringRef varName,
-                                  mlir::ValueRange extents,
-                                  mlir::Value initVal) const;
+                                  mlir::ValueRange extents, mlir::Value initVal,
+                                  bool &needsDestroy) const;
+
+  bool generatePrivateDestroy(mlir::Type type, mlir::OpBuilder &builder,
+                              mlir::Location loc, mlir::Value privatized) const;
 };
 
 } // namespace fir::acc
diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 0b31cfe..bbb7e6e 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -200,6 +200,12 @@ std::optional<llvm::ArrayRef<int64_t>> getComponentLowerBoundsIfNonDefault(
     fir::RecordType recordType, llvm::StringRef component,
     mlir::ModuleOp module, const mlir::SymbolTable *symbolTable = nullptr);
 
+/// Indicate if a derived type has final routine. Returns std::nullopt if that
+/// information is not in the IR;
+std::optional<bool>
+isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module,
+                         const mlir::SymbolTable *symbolTable = nullptr);
+
 /// Generate a LLVM constant value of type `ity`, using the provided offset.
 mlir::LLVM::ConstantOp
 genConstantIndex(mlir::Location loc, mlir::Type ity,
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 62e5c0c..cfb1891 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -978,15 +978,40 @@ static RecipeOp genRecipeOp(
   auto mappableTy = mlir::dyn_cast<mlir::acc::MappableType>(ty);
   assert(mappableTy &&
          "Expected that all variable types are considered mappable");
+  bool needsDestroy = false;
   auto retVal = mappableTy.generatePrivateInit(
       builder, loc,
       mlir::cast<mlir::TypedValue<mlir::acc::MappableType>>(
           initBlock->getArgument(0)),
       initName,
       initBlock->getArguments().take_back(initBlock->getArguments().size() - 1),
-      initValue);
+      initValue, needsDestroy);
   mlir::acc::YieldOp::create(builder, loc,
                              retVal ? retVal : initBlock->getArgument(0));
+  // Create destroy region and generate destruction if requested.
+  if (needsDestroy) {
+    llvm::SmallVector<mlir::Type> destroyArgsTy;
+    llvm::SmallVector<mlir::Location> destroyArgsLoc;
+    // original and privatized/reduction value
+    destroyArgsTy.push_back(ty);
+    destroyArgsTy.push_back(ty);
+    destroyArgsLoc.push_back(loc);
+    destroyArgsLoc.push_back(loc);
+    // Append bounds arguments (if any) in the same order as init region
+    if (argsTy.size() > 1) {
+      destroyArgsTy.append(argsTy.begin() + 1, argsTy.end());
+      destroyArgsLoc.insert(destroyArgsLoc.end(), argsTy.size() - 1, loc);
+    }
+
+    builder.createBlock(&recipe.getDestroyRegion(),
+                        recipe.getDestroyRegion().end(), destroyArgsTy,
+                        destroyArgsLoc);
+    builder.setInsertionPointToEnd(&recipe.getDestroyRegion().back());
+    // Call interface on the privatized/reduction value (2nd argument).
+    (void)mappableTy.generatePrivateDestroy(
+        builder, loc, recipe.getDestroyRegion().front().getArgument(1));
+    mlir::acc::TerminatorOp::create(builder, loc);
+  }
   return recipe;
 }
 
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 89aa010..41383fb 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -21,6 +21,7 @@
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Support/Utils.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -548,14 +549,27 @@ template <typename Ty>
 mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
     mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
     mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
-    mlir::ValueRange extents, mlir::Value initVal) const {
+    mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const {
+  needsDestroy = false;
   mlir::Value retVal;
   mlir::Type unwrappedTy = fir::unwrapRefType(type);
   mlir::ModuleOp mod = builder.getInsertionBlock()
                            ->getParent()
                            ->getParentOfType<mlir::ModuleOp>();
-  fir::FirOpBuilder firBuilder(builder, mod);
 
+  if (auto recType = llvm::dyn_cast<fir::RecordType>(
+          fir::getFortranElementType(unwrappedTy))) {
+    // Need to make deep copies of allocatable components.
+    if (fir::isRecordWithAllocatableMember(recType))
+      TODO(loc,
+           "OpenACC: privatizing derived type with allocatable components");
+    // Need to decide if user assignment/final routine should be called.
+    if (fir::isRecordWithFinalRoutine(recType, mod).value_or(false))
+      TODO(loc, "OpenACC: privatizing derived type with user assignment or "
+                "final routine ");
+  }
+
+  fir::FirOpBuilder firBuilder(builder, mod);
   auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp {
     auto alloca = fir::AllocaOp::create(firBuilder, loc, ty);
     return hlfir::DeclareOp::create(firBuilder, loc, alloca, varName);
@@ -615,9 +629,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
       mlir::Value firClass =
           fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar);
       fir::StoreOp::create(builder, loc, firClass, retVal);
+      needsDestroy = true;
     } else if (mlir::isa<fir::SequenceType>(innerTy)) {
       hlfir::Entity source = hlfir::Entity{var};
-      auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source);
+      auto [temp, cleanupFlag] =
+          hlfir::createTempFromMold(loc, firBuilder, source);
       if (fir::isa_ref_type(type)) {
         // When the temp is created - it is not a reference - thus we can
         // end up with a type inconsistency. Therefore ensure storage is created
@@ -636,6 +652,9 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
       } else {
         retVal = temp;
       }
+      // If heap was allocated, a destroy is required later.
+      if (cleanupFlag)
+        needsDestroy = true;
     } else {
       TODO(loc, "Unsupported boxed type for OpenACC private-like recipe");
     }
@@ -667,23 +686,61 @@ template mlir::Value
 OpenACCMappableModel<fir::BaseBoxType>::generatePrivateInit(
     mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
     mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
-    mlir::ValueRange extents, mlir::Value initVal) const;
+    mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
 
 template mlir::Value
 OpenACCMappableModel<fir::ReferenceType>::generatePrivateInit(
     mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
     mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
-    mlir::ValueRange extents, mlir::Value initVal) const;
+    mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
 
 template mlir::Value OpenACCMappableModel<fir::HeapType>::generatePrivateInit(
     mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
     mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
-    mlir::ValueRange extents, mlir::Value initVal) const;
+    mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
 
 template mlir::Value
 OpenACCMappableModel<fir::PointerType>::generatePrivateInit(
     mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
     mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
-    mlir::ValueRange extents, mlir::Value initVal) const;
+    mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
+
+template <typename Ty>
+bool OpenACCMappableModel<Ty>::generatePrivateDestroy(
+    mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value privatized) const {
+  mlir::Type unwrappedTy = fir::unwrapRefType(type);
+  // For boxed scalars allocated with AllocMem during init, free the heap.
+  if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(unwrappedTy)) {
+    mlir::Value boxVal = privatized;
+    if (fir::isa_ref_type(boxVal.getType()))
+      boxVal = fir::LoadOp::create(builder, loc, boxVal);
+    mlir::Value addr = fir::BoxAddrOp::create(builder, loc, boxVal);
+    // FreeMem only accepts fir.heap and this may not be represented in the box
+    // type if the privatized entity is not an allocatable.
+    mlir::Type heapType =
+        fir::HeapType::get(fir::unwrapRefType(addr.getType()));
+    if (heapType != addr.getType())
+      addr = fir::ConvertOp::create(builder, loc, heapType, addr);
+    fir::FreeMemOp::create(builder, loc, addr);
+    return true;
+  }
+
+  // Nothing to do for other categories by default, they are stack allocated.
+  return true;
+}
+
+template bool OpenACCMappableModel<fir::BaseBoxType>::generatePrivateDestroy(
+    mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::ReferenceType>::generatePrivateDestroy(
+    mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::HeapType>::generatePrivateDestroy(
+    mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::PointerType>::generatePrivateDestroy(
+    mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+    mlir::Value privatized) const;
 
 } // namespace fir::acc
diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp
index c71642c..92390e4a 100644
--- a/flang/lib/Optimizer/Support/Utils.cpp
+++ b/flang/lib/Optimizer/Support/Utils.cpp
@@ -51,6 +51,16 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault(
   return std::nullopt;
 }
 
+std::optional<bool>
+fir::isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module,
+                              const mlir::SymbolTable *symbolTable) {
+  fir::TypeInfoOp typeInfo =
+      fir::lookupTypeInfoOp(recordType, module, symbolTable);
+  if (!typeInfo)
+    return std::nullopt;
+  return !typeInfo.getNoFinal();
+}
+
 mlir::LLVM::ConstantOp
 fir::genConstantIndex(mlir::Location loc, mlir::Type ity,
                       mlir::ConversionPatternRewriter &rewriter,
diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
index 429f207..3987f9f 100644
--- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
+++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
@@ -4,6 +4,11 @@
 ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
 ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefix=FIR-CHECK
 
+! TODO: This test hits a fatal TODO. Deal with allocatable component
+! destructions. For arrays, allocatable component allocation may also be
+! missing.
+! XFAIL: *
+
 module m_firstprivate_derived_alloc_comp
  type point
    real, allocatable :: x(:)
diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90
index d37eb8d..485825d 100644
--- a/flang/test/Lower/OpenACC/acc-private.f90
+++ b/flang/test/Lower/OpenACC/acc-private.f90
@@ -26,6 +26,12 @@
 ! CHECK:   %[[DES_DST:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> !fir.box<!fir.array<?x?x2xi32>>
 ! CHECK:   hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.box<!fir.array<?x?x2xi32>>, !fir.box<!fir.array<?x?x2xi32>>
 ! CHECK:   acc.terminator
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>):
+! CHECK:   %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>>
+! CHECK:   %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK:   fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK:   acc.terminator
 ! CHECK: }
 
 ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb4.ub9_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
@@ -47,6 +53,12 @@
 ! CHECK:   %[[RIGHT:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
 ! CHECK:   hlfir.assign %[[LEFT]] to %[[RIGHT]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>
 ! CHECK:   acc.terminator
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>):
+! CHECK:   %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK:   %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:   fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:   acc.terminator
 ! CHECK: }
 
 ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
@@ -64,6 +76,12 @@
 ! CHECK:   %[[DES_V2:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
 ! CHECK:   hlfir.assign %[[DES_V1]] to %[[DES_V2]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>
 ! CHECK:   acc.terminator
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>):
+! CHECK:   %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK:   %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:   fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:   acc.terminator
 ! CHECK: }
 
 ! CHECK-LABEL: acc.private.recipe @privatization_box_UxUx2xi32 : !fir.box<!fir.array<?x?x2xi32>> init {
@@ -74,6 +92,12 @@
 ! CHECK:   %[[TEMP:.*]] = fir.allocmem !fir.array<?x?x2xi32>, %[[DIM0]]#1, %[[DIM1]]#1 {bindc_name = ".tmp", uniq_name = ""}
 ! CHECK:   %[[DECL:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x2xi32>>, !fir.heap<!fir.array<?x?x2xi32>>)
 ! CHECK:   acc.yield %[[DECL]]#0 : !fir.box<!fir.array<?x?x2xi32>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>):
+! CHECK:   %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>>
+! CHECK:   %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK:   fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK:   acc.terminator
 ! CHECK: }
 
 ! CHECK-LABEL: acc.private.recipe @privatization_ref_box_ptr_Uxi32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> init {
@@ -89,6 +113,13 @@
 ! CHECK:   %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
 ! CHECK:   fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
 ! CHECK:   acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+! CHECK:   %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK:   %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>>
+! CHECK:   %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ptr<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:   fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:   acc.terminator
 ! CHECK: }
 
 ! CHECK-LABEL: @privatization_ref_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>> init {
@@ -99,6 +130,12 @@
 ! CHECK:   %[[BOX:.*]] = fir.embox %[[ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
 ! CHECK:   fir.store %[[BOX]] to %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
 ! CHECK:   acc.yield %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>, %arg1: !fir.ref<!fir.box<!fir.heap<i32>>>):
+! CHECK:   %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:   %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK:   fir.freemem %[[ADDR]] : !fir.heap<i32>
+! CHECK:   acc.terminator
 ! CHECK: }
 
 ! CHECK-LABEL: acc.private.recipe @privatization_ref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init {
@@ -114,6 +151,12 @@
 ! CHECK:   %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
 ! CHECK:   fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
 ! CHECK:   acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK:   %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:   %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:   fir.freemem %[[ADDR]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:   acc.terminator
 ! CHECK: }
 
 ! CHECK-LABEL: acc.private.recipe @privatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
@@ -124,6 +167,12 @@
 ! CHECK:   %[[TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %0#1 {bindc_name = ".tmp", uniq_name = ""}
 ! CHECK:   %[[DECLARE:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
 ! CHECK:   acc.yield %[[DECLARE:.*]]#0 : !fir.box<!fir.array<?xi32>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>):
+! CHECK:   %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK:   %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:   fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:   acc.terminator
 ! CHECK: }
 
 ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb50.ub99_ref_50xf32 : !fir.ref<!fir.array<50xf32>> init {
@@ -140,6 +189,7 @@
 ! CHECK:   %[[DES_SRC:.*]] = hlfir.designate %[[DECL_SRC]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>>
 ! CHECK:   %[[DES_DST:.*]] = hlfir.designate %[[DECL_DST]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>>
 ! CHECK:   hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.ref<!fir.array<50xf32>>, !fir.ref<!fir.array<50xf32>>
+! CHECK:   acc.terminator
 ! CHECK: }
 
 ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_100xf32 : !fir.ref<!fir.array<100xf32>> init {
diff --git a/libcxx/include/__cxx03/__atomic/atomic.h b/libcxx/include/__cxx03/__atomic/atomic.h
index bc4a393..f2e8493 100644
--- a/libcxx/include/__cxx03/__atomic/atomic.h
+++ b/libcxx/include/__cxx03/__atomic/atomic.h
@@ -34,9 +34,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp>
 struct atomic : public __atomic_base<_Tp> {
-  using __base          = __atomic_base<_Tp>;
-  using value_type      = _Tp;
-  using difference_type = value_type;
+  using __base = __atomic_base<_Tp>;
 
   _LIBCPP_HIDE_FROM_ABI atomic() _NOEXCEPT = default;
 
@@ -59,8 +57,8 @@ struct atomic : public __atomic_base<_Tp> {
 
 template <class _Tp>
 struct atomic<_Tp*> : public __atomic_base<_Tp*> {
-  using __base          = __atomic_base<_Tp*>;
-  using value_type      = _Tp*;
+  using __base = __atomic_base<_Tp*>;
+
   using difference_type = ptrdiff_t;
 
   _LIBCPP_HIDE_FROM_ABI atomic() _NOEXCEPT = default;
diff --git a/libcxx/include/__cxx03/__atomic/atomic_base.h b/libcxx/include/__cxx03/__atomic/atomic_base.h
index a2b40c6..d79ef7d 100644
--- a/libcxx/include/__cxx03/__atomic/atomic_base.h
+++ b/libcxx/include/__cxx03/__atomic/atomic_base.h
@@ -32,6 +32,8 @@ struct __atomic_base // false
 {
   mutable __cxx_atomic_impl<_Tp> __a_;
 
+  using value_type = _Tp;
+
   _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const volatile _NOEXCEPT {
     return __cxx_atomic_is_lock_free(sizeof(__cxx_atomic_impl<_Tp>));
   }
@@ -127,6 +129,8 @@ template <class _Tp>
 struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> {
   using __base = __atomic_base<_Tp, false>;
 
+  using difference_type = typename __base::value_type;
+
   _LIBCPP_HIDE_FROM_ABI __atomic_base() _NOEXCEPT = default;
 
   _LIBCPP_HIDE_FROM_ABI __atomic_base(_Tp __d) _NOEXCEPT : __base(__d) {}
diff --git a/libcxx/include/__cxx03/regex b/libcxx/include/__cxx03/regex
index b96d59d..b6a78f2 100644
--- a/libcxx/include/__cxx03/regex
+++ b/libcxx/include/__cxx03/regex
@@ -2100,7 +2100,7 @@ public:
       __ranges_.push_back(
           std::make_pair(__traits_.transform(__b.begin(), __b.end()), __traits_.transform(__e.begin(), __e.end())));
     } else {
-      if (__b.size() != 1 || __e.size() != 1)
+      if (__b.size() != 1 || __e.size() != 1 || char_traits<typename string_type::value_type>::lt(__e[0], __b[0]))
         __throw_regex_error<regex_constants::error_range>();
       if (__icase_) {
         __b[0] = __traits_.translate_nocase(__b[0]);
@@ -3911,7 +3911,7 @@ _ForwardIterator basic_regex<_CharT, _Traits>::__parse_character_escape(
       ++__first;
       break;
     default:
-      if (*__first != '_' && !__traits_.isctype(*__first, ctype_base::alnum)) {
+      if (!__traits_.isctype(*__first, ctype_base::alnum)) {
         if (__str)
           *__str = *__first;
         else
diff --git a/libcxx/include/__cxx03/sstream b/libcxx/include/__cxx03/sstream
index 44c2423..741158a 100644
--- a/libcxx/include/__cxx03/sstream
+++ b/libcxx/include/__cxx03/sstream
@@ -354,9 +354,15 @@ private:
 
 public:
   // [stringbuf.cons] constructors:
-  _LIBCPP_HIDE_FROM_ABI basic_stringbuf() : __hm_(nullptr), __mode_(ios_base::in | ios_base::out) {}
+  _LIBCPP_HIDE_FROM_ABI basic_stringbuf() : __hm_(nullptr), __mode_(ios_base::in | ios_base::out) {
+    // it is implementation-defined whether we initialize eback() & friends to nullptr, and libc++ doesn't
+    __init_buf_ptrs();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI explicit basic_stringbuf(ios_base::openmode __wch) : __hm_(nullptr), __mode_(__wch) {}
+  _LIBCPP_HIDE_FROM_ABI explicit basic_stringbuf(ios_base::openmode __wch) : __hm_(nullptr), __mode_(__wch) {
+    // it is implementation-defined whether we initialize eback() & friends to nullptr, and libc++ doesn't
+    __init_buf_ptrs();
+  }
 
   _LIBCPP_HIDE_FROM_ABI explicit basic_stringbuf(const string_type& __s,
                                                  ios_base::openmode __wch = ios_base::in | ios_base::out)
diff --git a/libcxx/include/__cxx03/vector b/libcxx/include/__cxx03/vector
index 4b62e0b..dbaa33c 100644
--- a/libcxx/include/__cxx03/vector
+++ b/libcxx/include/__cxx03/vector
@@ -432,10 +432,12 @@ public:
   template <__enable_if_t<__is_allocator<_Allocator>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI vector(size_type __n, const value_type& __x, const allocator_type& __a)
       : __end_cap_(nullptr, __a) {
+    auto __guard = std::__make_exception_guard(__destroy_vector(*this));
     if (__n > 0) {
       __vallocate(__n);
       __construct_at_end(__n, __x);
     }
+    __guard.__complete();
   }
 
   template <class _InputIterator,
@@ -1054,9 +1056,7 @@ inline _LIBCPP_HIDE_FROM_ABI vector<_Tp, _Allocator>::vector(vector&& __x, const
     __x.__begin_ = __x.__end_ = __x.__end_cap() = nullptr;
   } else {
     typedef move_iterator<iterator> _Ip;
-    auto __guard = std::__make_exception_guard(__destroy_vector(*this));
-    assign(_Ip(__x.begin()), _Ip(__x.end()));
-    __guard.__complete();
+    __init_with_size(_Ip(__x.begin()), _Ip(__x.end()), __x.size());
   }
 }
 
diff --git a/libcxx/test/benchmarks/bitset.bench.cpp b/libcxx/test/benchmarks/bitset.bench.cpp
index 8fcf52e..b4c7e6f 100644
--- a/libcxx/test/benchmarks/bitset.bench.cpp
+++ b/libcxx/test/benchmarks/bitset.bench.cpp
@@ -103,7 +103,7 @@ BENCHMARK(BM_BitsetToString<262144>)->Arg(50)->Name("BM_BitsetToString<262144>/U
 BENCHMARK(BM_BitsetToString<524288>)->Arg(50)->Name("BM_BitsetToString<524288>/Uniform (50%)");
 BENCHMARK(BM_BitsetToString<1048576>)->Arg(50)->Name("BM_BitsetToString<1048576>/Uniform (50%)"); // 1 << 20
 
-static void BM_ctor_ull(benchmark::State& state) {
+static void BM_Bitset_ctor_ull(benchmark::State& state) {
   unsigned long long val = (1ULL << state.range(0)) - 1;
   for (auto _ : state) {
     std::bitset<128> b(val);
@@ -111,6 +111,6 @@ static void BM_ctor_ull(benchmark::State& state) {
   }
 }
 
-BENCHMARK(BM_ctor_ull)->DenseRange(1, 63);
+BENCHMARK(BM_Bitset_ctor_ull)->DenseRange(1, 63);
 
 BENCHMARK_MAIN();
diff --git a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.verify.cpp b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.verify.cpp
index 320ef57..1bd5792 100644
--- a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.verify.cpp
+++ b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.verify.cpp
@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 // <atomic>
 
 // template <class T>
@@ -48,12 +46,12 @@ void pointer_to_incomplete_type() {
 void function_pointer() {
   {
     volatile std::atomic<void (*)(int)> fun;
-    // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}}
+    // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}}
     std::atomic_fetch_add(&fun, 0);
   }
   {
     std::atomic<void (*)(int)> fun;
-    // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}}
+    // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}}
     std::atomic_fetch_add(&fun, 0);
   }
 }
diff --git a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.verify.cpp b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.verify.cpp
index bdd8089..bdd4a83 100644
--- a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.verify.cpp
+++ b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.verify.cpp
@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 // <atomic>
 
 // template <class T>
@@ -51,12 +49,12 @@ void pointer_to_incomplete_type() {
 void function_pointer() {
   {
     volatile std::atomic<void (*)(int)> fun;
-    // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}}
+    // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}}
     std::atomic_fetch_add_explicit(&fun, 0, std::memory_order_relaxed);
   }
   {
     std::atomic<void (*)(int)> fun;
-    // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}}
+    // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}}
     std::atomic_fetch_add_explicit(&fun, 0, std::memory_order_relaxed);
   }
 }
diff --git a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.verify.cpp b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.verify.cpp
index 2c9f898..105a010 100644
--- a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.verify.cpp
+++ b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.verify.cpp
@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 // <atomic>
 
 // template <class T>
@@ -48,12 +46,12 @@ void pointer_to_incomplete_type() {
 void function_pointer() {
   {
     volatile std::atomic<void (*)(int)> fun;
-    // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}}
+    // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}}
     std::atomic_fetch_sub(&fun, 0);
   }
   {
     std::atomic<void (*)(int)> fun;
-    // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}}
+    // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}}
     std::atomic_fetch_sub(&fun, 0);
   }
 }
diff --git a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.verify.cpp b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.verify.cpp
index 88c4275..1647ed3 100644
--- a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.verify.cpp
+++ b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.verify.cpp
@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 // <atomic>
 
 // template <class T>
@@ -51,12 +49,12 @@ void pointer_to_incomplete_type() {
 void function_pointer() {
   {
     volatile std::atomic<void (*)(int)> fun;
-    // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}}
+    // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}}
     std::atomic_fetch_sub_explicit(&fun, 0, std::memory_order_relaxed);
   }
   {
     std::atomic<void (*)(int)> fun;
-    // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}}
+    // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}}
     std::atomic_fetch_sub_explicit(&fun, 0, std::memory_order_relaxed);
   }
 }
diff --git a/libcxx/test/libcxx-03/input.output/string.streams/stringbuf/const_sso_buffer.pass.cpp b/libcxx/test/libcxx-03/input.output/string.streams/stringbuf/const_sso_buffer.pass.cpp
index 2b6c380..d6caa33 100644
--- a/libcxx/test/libcxx-03/input.output/string.streams/stringbuf/const_sso_buffer.pass.cpp
+++ b/libcxx/test/libcxx-03/input.output/string.streams/stringbuf/const_sso_buffer.pass.cpp
@@ -8,8 +8,6 @@
 
 // <sstream>
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 // How the constructors of basic_stringbuf initialize the buffer pointers is
 // not specified. For some constructors it's implementation defined whether the
 // pointers are set to nullptr. Libc++'s implementation directly uses the SSO
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp
index fcbf649..283adbc 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp
@@ -15,8 +15,6 @@
 
 // UNSUPPORTED: no-wide-characters
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 
 std::basic_filebuf<char, std::char_traits<wchar_t> > f;
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp
index 8eca76c..ba6f3c3 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp
@@ -15,8 +15,6 @@
 
 // UNSUPPORTED: no-wide-characters
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <fstream>
 
 std::basic_fstream<char, std::char_traits<wchar_t> > f;
diff --git a/libcxx/test/std/atomics/types.pass.cpp b/libcxx/test/std/atomics/types.pass.cpp
index c979392..b1edec5 100644
--- a/libcxx/test/std/atomics/types.pass.cpp
+++ b/libcxx/test/std/atomics/types.pass.cpp
@@ -17,9 +17,6 @@
 //     typedef T value_type;
 // };
 
-// atomic still has a difference_type in the C++03 frozen headers
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <atomic>
 #include <chrono>
 #include <cstdint>
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
index 00de053..679eec2 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
@@ -11,8 +11,6 @@
 // (bug report: https://llvm.org/PR58392)
 // Check that vector constructors don't leak memory when an operation inside the constructor throws an exception
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <cstddef>
 #include <memory>
 #include <type_traits>
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp
index dac4396..d131f5c 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp
@@ -15,8 +15,6 @@
 // basic_stringbuf() : basic_stringbuf(ios_base::in | ios_base::out) {}               // C++20
 // explicit basic_stringbuf(ios_base::openmode which);                                // C++20
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <sstream>
 #include <cassert>
 
diff --git a/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp
index 799a362..0184a38 100644
--- a/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp
@@ -7,8 +7,6 @@
 //===----------------------------------------------------------------------===//
 //
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 // <regex>
 
 // template <class BidirectionalIterator, class Allocator, class charT, class traits>
diff --git a/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp b/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp
index ecfdaee..cabd9eb 100644
--- a/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp
+++ b/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp
@@ -14,8 +14,6 @@
 // template <class ST, class SA>
 //    basic_regex(const basic_string<charT, ST, SA>& s);
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
 #include <regex>
 #include <cassert>
 #include "test_macros.h"
diff --git a/libcxx/utils/ci/docker-compose.yml b/libcxx/utils/ci/docker-compose.yml
index cac97a9..9367a8f 100644
--- a/libcxx/utils/ci/docker-compose.yml
+++ b/libcxx/utils/ci/docker-compose.yml
@@ -23,7 +23,7 @@ services:
       dockerfile: Dockerfile
       target: actions-builder
       args:
-        GITHUB_RUNNER_VERSION: "2.328.0"
+        GITHUB_RUNNER_VERSION: "2.329.0"
         <<: [*image_versions, *compiler_versions]
 
   android-buildkite-builder:
diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp
index f88b021c..091903b 100644
--- a/lld/ELF/Arch/Mips.cpp
+++ b/lld/ELF/Arch/Mips.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InputFiles.h"
+#include "RelocScan.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -31,6 +32,9 @@ public:
   void writePltHeader(uint8_t *buf) const override;
   void writePlt(uint8_t *buf, const Symbol &sym,
                 uint64_t pltEntryAddr) const override;
+  template <class RelTy>
+  void scanSectionImpl(InputSectionBase &, Relocs<RelTy>);
+  void scanSection(InputSectionBase &) override;
   bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
                   uint64_t branchAddr, const Symbol &s,
                   int64_t a) const override;
@@ -570,6 +574,123 @@ static uint64_t fixupCrossModeJump(Ctx &ctx, uint8_t *loc, RelType type,
   return val;
 }
 
+template <class RelTy>
+static RelType getMipsN32RelType(Ctx &ctx, RelTy *&rel, RelTy *end) {
+  uint32_t type = 0;
+  uint64_t offset = rel->r_offset;
+  int n = 0;
+  while (rel != end && rel->r_offset == offset)
+    type |= (rel++)->getType(ctx.arg.isMips64EL) << (8 * n++);
+  return type;
+}
+
+static RelType getMipsPairType(RelType type, bool isLocal) {
+  switch (type) {
+  case R_MIPS_HI16:
+    return R_MIPS_LO16;
+  case R_MIPS_GOT16:
+    // In case of global symbol, the R_MIPS_GOT16 relocation does not
+    // have a pair. Each global symbol has a unique entry in the GOT
+    // and a corresponding instruction with help of the R_MIPS_GOT16
+    // relocation loads an address of the symbol. In case of local
+    // symbol, the R_MIPS_GOT16 relocation creates a GOT entry to hold
+    // the high 16 bits of the symbol's value. A paired R_MIPS_LO16
+    // relocations handle low 16 bits of the address. That allows
+    // to allocate only one GOT entry for every 64 KiB of local data.
+    return isLocal ? R_MIPS_LO16 : R_MIPS_NONE;
+  case R_MICROMIPS_GOT16:
+    return isLocal ? R_MICROMIPS_LO16 : R_MIPS_NONE;
+  case R_MIPS_PCHI16:
+    return R_MIPS_PCLO16;
+  case R_MICROMIPS_HI16:
+    return R_MICROMIPS_LO16;
+  default:
+    return R_MIPS_NONE;
+  }
+}
+
+template <class ELFT>
+template <class RelTy>
+void MIPS<ELFT>::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
+  RelocScan rs(ctx, &sec);
+  sec.relocations.reserve(rels.size());
+  RelType type;
+  for (auto it = rels.begin(); it != rels.end();) {
+    const RelTy &rel = *it;
+    uint64_t offset = rel.r_offset;
+    if constexpr (ELFT::Is64Bits) {
+      type = it->getType(ctx.arg.isMips64EL);
+      ++it;
+    } else {
+      if (ctx.arg.mipsN32Abi) {
+        type = getMipsN32RelType(ctx, it, rels.end());
+      } else {
+        type = it->getType(ctx.arg.isMips64EL);
+        ++it;
+      }
+    }
+
+    uint32_t symIdx = rel.getSymbol(ctx.arg.isMips64EL);
+    Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
+    RelExpr expr =
+        ctx.target->getRelExpr(type, sym, sec.content().data() + rel.r_offset);
+    if (expr == R_NONE)
+      continue;
+    if (sym.isUndefined() && symIdx != 0 &&
+        rs.maybeReportUndefined(cast<Undefined>(sym), offset))
+      continue;
+
+    auto addend = rs.getAddend<ELFT>(rel, type);
+    if (expr == RE_MIPS_GOTREL && sym.isLocal()) {
+      addend += sec.getFile<ELFT>()->mipsGp0;
+    } else if (!RelTy::HasAddend) {
+      // MIPS has an odd notion of "paired" relocations to calculate addends.
+      // For example, if a relocation is of R_MIPS_HI16, there must be a
+      // R_MIPS_LO16 relocation after that, and an addend is calculated using
+      // the two relocations.
+      RelType pairTy = getMipsPairType(type, sym.isLocal());
+      if (pairTy != R_MIPS_NONE) {
+        const uint8_t *buf = sec.content().data();
+        // To make things worse, paired relocations might not be contiguous in
+        // the relocation table, so we need to do linear search. *sigh*
+        bool found = false;
+        for (auto *ri = &rel; ri != rels.end(); ++ri) {
+          if (ri->getType(ctx.arg.isMips64EL) == pairTy &&
+              ri->getSymbol(ctx.arg.isMips64EL) == symIdx) {
+            addend += ctx.target->getImplicitAddend(buf + ri->r_offset, pairTy);
+            found = true;
+            break;
+          }
+        }
+
+        if (!found)
+          Warn(ctx) << "can't find matching " << pairTy << " relocation for "
+                    << type;
+      }
+    }
+
+    if (expr == RE_MIPS_TLSLD) {
+      ctx.in.mipsGot->addTlsIndex(*sec.file);
+      sec.addReloc({expr, type, offset, addend, &sym});
+    } else if (expr == RE_MIPS_TLSGD) {
+      ctx.in.mipsGot->addDynTlsEntry(*sec.file, sym);
+      sec.addReloc({expr, type, offset, addend, &sym});
+    } else {
+      if (expr == R_TPREL && rs.checkTlsLe(offset, sym, type))
+        continue;
+      rs.process(expr, type, offset, sym, addend);
+    }
+  }
+}
+
+template <class ELFT> void MIPS<ELFT>::scanSection(InputSectionBase &sec) {
+  auto relocs = sec.template relsOrRelas<ELFT>();
+  if (relocs.areRelocsRel())
+    scanSectionImpl(sec, relocs.rels);
+  else
+    scanSectionImpl(sec, relocs.relas);
+}
+
 template <class ELFT>
 void MIPS<ELFT>::relocate(uint8_t *loc, const Relocation &rel,
                           uint64_t val) const {
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index 550c091..6528cc5 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -8,6 +8,7 @@
 
 #include "InputFiles.h"
 #include "OutputSections.h"
+#include "RelocScan.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
@@ -178,6 +179,10 @@ public:
                 uint64_t pltEntryAddr) const override;
   void writeIplt(uint8_t *buf, const Symbol &sym,
                  uint64_t pltEntryAddr) const override;
+  template <class ELFT, class RelTy>
+  void scanSectionImpl(InputSectionBase &, Relocs<RelTy>);
+  template <class ELFT> void scanSection1(InputSectionBase &);
+  void scanSection(InputSectionBase &) override;
   void relocate(uint8_t *loc, const Relocation &rel,
                 uint64_t val) const override;
   void writeGotHeader(uint8_t *buf) const override;
@@ -1257,6 +1262,132 @@ static bool isTocOptType(RelType type) {
   }
 }
 
+// R_PPC64_TLSGD/R_PPC64_TLSLD is required to mark `bl __tls_get_addr` for
+// General Dynamic/Local Dynamic code sequences. If a GD/LD GOT relocation is
+// found but no R_PPC64_TLSGD/R_PPC64_TLSLD is seen, we assume that the
+// instructions are generated by very old IBM XL compilers. Work around the
+// issue by disabling GD/LD to IE/LE relaxation.
+template <class RelTy>
+static void checkPPC64TLSRelax(InputSectionBase &sec, Relocs<RelTy> rels) {
+  // Skip if sec is synthetic (sec.file is null) or if sec has been marked.
+  if (!sec.file || sec.file->ppc64DisableTLSRelax)
+    return;
+  bool hasGDLD = false;
+  for (const RelTy &rel : rels) {
+    RelType type = rel.getType(false);
+    switch (type) {
+    case R_PPC64_TLSGD:
+    case R_PPC64_TLSLD:
+      return; // Found a marker
+    case R_PPC64_GOT_TLSGD16:
+    case R_PPC64_GOT_TLSGD16_HA:
+    case R_PPC64_GOT_TLSGD16_HI:
+    case R_PPC64_GOT_TLSGD16_LO:
+    case R_PPC64_GOT_TLSLD16:
+    case R_PPC64_GOT_TLSLD16_HA:
+    case R_PPC64_GOT_TLSLD16_HI:
+    case R_PPC64_GOT_TLSLD16_LO:
+      hasGDLD = true;
+      break;
+    }
+  }
+  if (hasGDLD) {
+    sec.file->ppc64DisableTLSRelax = true;
+    Warn(sec.file->ctx)
+        << sec.file
+        << ": disable TLS relaxation due to R_PPC64_GOT_TLS* relocations "
+           "without "
+           "R_PPC64_TLSGD/R_PPC64_TLSLD relocations";
+  }
+}
+
+template <class ELFT, class RelTy>
+void PPC64::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
+  RelocScan rs(ctx, &sec);
+  sec.relocations.reserve(rels.size());
+  checkPPC64TLSRelax<RelTy>(sec, rels);
+  for (auto it = rels.begin(); it != rels.end(); ++it) {
+    const RelTy &rel = *it;
+    uint64_t offset = rel.r_offset;
+    uint32_t symIdx = rel.getSymbol(false);
+    Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
+    RelType type = rel.getType(false);
+    RelExpr expr =
+        ctx.target->getRelExpr(type, sym, sec.content().data() + offset);
+    if (expr == R_NONE)
+      continue;
+    if (sym.isUndefined() && symIdx != 0 &&
+        rs.maybeReportUndefined(cast<Undefined>(sym), offset))
+      continue;
+
+    auto addend = getAddend<ELFT>(rel);
+    if (ctx.arg.isPic && type == R_PPC64_TOC)
+      addend += getPPC64TocBase(ctx);
+
+    // We can separate the small code model relocations into 2 categories:
+    // 1) Those that access the compiler generated .toc sections.
+    // 2) Those that access the linker allocated got entries.
+    // lld allocates got entries to symbols on demand. Since we don't try to
+    // sort the got entries in any way, we don't have to track which objects
+    // have got-based small code model relocs. The .toc sections get placed
+    // after the end of the linker allocated .got section and we do sort those
+    // so sections addressed with small code model relocations come first.
+    if (type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS)
+      sec.file->ppc64SmallCodeModelTocRelocs = true;
+
+    // Record the TOC entry (.toc + addend) as not relaxable. See the comment in
+    // PPC64::relocateAlloc().
+    if (type == R_PPC64_TOC16_LO && sym.isSection() && isa<Defined>(sym) &&
+        cast<Defined>(sym).section->name == ".toc")
+      ctx.ppc64noTocRelax.insert({&sym, addend});
+
+    if ((type == R_PPC64_TLSGD && expr == R_TLSDESC_CALL) ||
+        (type == R_PPC64_TLSLD && expr == R_TLSLD_HINT)) {
+      auto it1 = it;
+      ++it1;
+      if (it1 == rels.end()) {
+        auto diag = Err(ctx);
+        diag << "R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last "
+                "relocation";
+        printLocation(diag, sec, sym, offset);
+        continue;
+      }
+
+      // Offset the 4-byte aligned R_PPC64_TLSGD by one byte in the NOTOC
+      // case, so we can discern it later from the toc-case.
+      if (it1->getType(/*isMips64EL=*/false) == R_PPC64_REL24_NOTOC)
+        ++offset;
+    }
+
+    if (oneof<R_GOTREL, RE_PPC64_TOCBASE, RE_PPC64_RELAX_TOC>(expr))
+      ctx.in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
+
+    if (sym.isTls()) {
+      if (unsigned processed =
+              rs.handleTlsRelocation(expr, type, offset, sym, addend)) {
+        it += processed - 1;
+        continue;
+      }
+    }
+    rs.process(expr, type, offset, sym, addend);
+  }
+}
+
+template <class ELFT> void PPC64::scanSection1(InputSectionBase &sec) {
+  auto relocs = sec.template relsOrRelas<ELFT>();
+  if (relocs.areRelocsCrel())
+    scanSectionImpl<ELFT>(sec, relocs.crels);
+  else
+    scanSectionImpl<ELFT>(sec, relocs.relas);
+}
+
+void PPC64::scanSection(InputSectionBase &sec) {
+  if (ctx.arg.isLE)
+    scanSection1<ELF64LE>(sec);
+  else
+    scanSection1<ELF64BE>(sec);
+}
+
 void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   RelType type = rel.type;
   bool shouldTocOptimize =  isTocOptType(type);
diff --git a/lld/ELF/RelocScan.h b/lld/ELF/RelocScan.h
new file mode 100644
index 0000000..43f337d
--- /dev/null
+++ b/lld/ELF/RelocScan.h
@@ -0,0 +1,126 @@
+//===------------------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_RELOCSCAN_H
+#define LLD_ELF_RELOCSCAN_H
+
+#include "Config.h"
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "Relocations.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+
+namespace lld::elf {
+
+// Build a bitmask with one bit set for each 64 subset of RelExpr.
+inline constexpr uint64_t buildMask() { return 0; }
+
+template <typename... Tails>
+inline constexpr uint64_t buildMask(int head, Tails... tails) {
+  return (0 <= head && head < 64 ? uint64_t(1) << head : 0) |
+         buildMask(tails...);
+}
+
+// Return true if `Expr` is one of `Exprs`.
+// There are more than 64 but less than 128 RelExprs, so we divide the set of
+// exprs into [0, 64) and [64, 128) and represent each range as a constant
+// 64-bit mask. Then we decide which mask to test depending on the value of
+// expr and use a simple shift and bitwise-and to test for membership.
+template <RelExpr... Exprs> bool oneof(RelExpr expr) {
+  assert(0 <= expr && (int)expr < 128 &&
+         "RelExpr is too large for 128-bit mask!");
+
+  if (expr >= 64)
+    return (uint64_t(1) << (expr - 64)) & buildMask((Exprs - 64)...);
+  return (uint64_t(1) << expr) & buildMask(Exprs...);
+}
+
+// This class encapsulates states needed to scan relocations for one
+// InputSectionBase.
+class RelocScan {
+public:
+  Ctx &ctx;
+  InputSectionBase *sec;
+
+  RelocScan(Ctx &ctx, InputSectionBase *sec = nullptr) : ctx(ctx), sec(sec) {}
+  template <class ELFT, class RelTy>
+  void scan(typename Relocs<RelTy>::const_iterator &i, RelType type,
+            int64_t addend);
+  void scanEhSection(EhInputSection &s);
+
+  template <class ELFT, class RelTy>
+  int64_t getAddend(const RelTy &r, RelType type);
+  bool maybeReportUndefined(Undefined &sym, uint64_t offset);
+  bool checkTlsLe(uint64_t offset, Symbol &sym, RelType type);
+  bool isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym,
+                                uint64_t relOff) const;
+  void process(RelExpr expr, RelType type, uint64_t offset, Symbol &sym,
+               int64_t addend) const;
+  unsigned handleTlsRelocation(RelExpr expr, RelType type, uint64_t offset,
+                               Symbol &sym, int64_t addend);
+};
+
+template <class ELFT, class RelTy>
+int64_t RelocScan::getAddend(const RelTy &r, RelType type) {
+  return RelTy::HasAddend ? elf::getAddend<ELFT>(r)
+                          : ctx.target->getImplicitAddend(
+                                sec->content().data() + r.r_offset, type);
+}
+
+template <class ELFT, class RelTy>
+void RelocScan::scan(typename Relocs<RelTy>::const_iterator &it, RelType type,
+                     int64_t addend) {
+  const RelTy &rel = *it;
+  uint32_t symIdx = rel.getSymbol(false);
+  Symbol &sym = sec->getFile<ELFT>()->getSymbol(symIdx);
+  uint64_t offset = rel.r_offset;
+  RelExpr expr =
+      ctx.target->getRelExpr(type, sym, sec->content().data() + offset);
+
+  // Ignore R_*_NONE and other marker relocations.
+  if (expr == R_NONE)
+    return;
+
+  // Error if the target symbol is undefined. Symbol index 0 may be used by
+  // marker relocations, e.g. R_*_NONE and R_ARM_V4BX. Don't error on them.
+  if (sym.isUndefined() && symIdx != 0 &&
+      maybeReportUndefined(cast<Undefined>(sym), offset))
+    return;
+
+  // Ensure GOT or GOTPLT is created for relocations that reference their base
+  // addresses without directly creating entries.
+  if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_PLT_GOTPLT,
+            R_TLSDESC_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
+    ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+  } else if (oneof<R_GOTONLY_PC, R_GOTREL, RE_PPC32_PLTREL>(expr)) {
+    ctx.in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
+  }
+
+  // Process TLS relocations, including TLS optimizations. Note that
+  // R_TPREL and R_TPREL_NEG relocations are resolved in processAux.
+  //
+  // Some RISCV TLSDESC relocations reference a local NOTYPE symbol,
+  // but we need to process them in handleTlsRelocation.
+  if (sym.isTls() || oneof<R_TLSDESC_PC, R_TLSDESC_CALL>(expr)) {
+    if (unsigned processed =
+            handleTlsRelocation(expr, type, offset, sym, addend)) {
+      it += processed - 1;
+      return;
+    }
+  }
+
+  process(expr, type, offset, sym, addend);
+}
+} // namespace lld::elf
+
+#endif
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 84b9b5e..d21376f 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -30,6 +30,7 @@
 #include "InputFiles.h"
 #include "LinkerScript.h"
 #include "OutputSections.h"
+#include "RelocScan.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
@@ -58,8 +59,8 @@ static void printDefinedLocation(ELFSyncStream &s, const Symbol &sym) {
 // >>> defined in /home/alice/src/foo.o
 // >>> referenced by bar.c:12 (/home/alice/src/bar.c:12)
 // >>>               /home/alice/src/bar.o:(.text+0x1)
-static void printLocation(ELFSyncStream &s, InputSectionBase &sec,
-                          const Symbol &sym, uint64_t off) {
+void elf::printLocation(ELFSyncStream &s, InputSectionBase &sec,
+                        const Symbol &sym, uint64_t off) {
   printDefinedLocation(s, sym);
   s << "\n>>> referenced by ";
   auto tell = s.tell();
@@ -111,54 +112,6 @@ void elf::reportRangeError(Ctx &ctx, uint8_t *loc, int64_t v, int n,
   }
 }
 
-// Build a bitmask with one bit set for each 64 subset of RelExpr.
-static constexpr uint64_t buildMask() { return 0; }
-
-template <typename... Tails>
-static constexpr uint64_t buildMask(int head, Tails... tails) {
-  return (0 <= head && head < 64 ? uint64_t(1) << head : 0) |
-         buildMask(tails...);
-}
-
-// Return true if `Expr` is one of `Exprs`.
-// There are more than 64 but less than 128 RelExprs, so we divide the set of
-// exprs into [0, 64) and [64, 128) and represent each range as a constant
-// 64-bit mask. Then we decide which mask to test depending on the value of
-// expr and use a simple shift and bitwise-and to test for membership.
-template <RelExpr... Exprs> static bool oneof(RelExpr expr) {
-  assert(0 <= expr && (int)expr < 128 &&
-         "RelExpr is too large for 128-bit mask!");
-
-  if (expr >= 64)
-    return (uint64_t(1) << (expr - 64)) & buildMask((Exprs - 64)...);
-  return (uint64_t(1) << expr) & buildMask(Exprs...);
-}
-
-static RelType getMipsPairType(RelType type, bool isLocal) {
-  switch (type) {
-  case R_MIPS_HI16:
-    return R_MIPS_LO16;
-  case R_MIPS_GOT16:
-    // In case of global symbol, the R_MIPS_GOT16 relocation does not
-    // have a pair. Each global symbol has a unique entry in the GOT
-    // and a corresponding instruction with help of the R_MIPS_GOT16
-    // relocation loads an address of the symbol. In case of local
-    // symbol, the R_MIPS_GOT16 relocation creates a GOT entry to hold
-    // the high 16 bits of the symbol's value. A paired R_MIPS_LO16
-    // relocations handle low 16 bits of the address. That allows
-    // to allocate only one GOT entry for every 64 KiB of local data.
-    return isLocal ? R_MIPS_LO16 : R_MIPS_NONE;
-  case R_MICROMIPS_GOT16:
-    return isLocal ? R_MICROMIPS_LO16 : R_MIPS_NONE;
-  case R_MIPS_PCHI16:
-    return R_MIPS_PCLO16;
-  case R_MICROMIPS_HI16:
-    return R_MICROMIPS_LO16;
-  default:
-    return R_MIPS_NONE;
-  }
-}
-
 // True if non-preemptable symbol always has the same value regardless of where
 // the DSO is loaded.
 bool elf::isAbsolute(const Symbol &sym) {
@@ -424,73 +377,8 @@ private:
   ArrayRef<EhSectionPiece> cies, fdes;
   ArrayRef<EhSectionPiece>::iterator i, j;
 };
-
-// This class encapsulates states needed to scan relocations for one
-// InputSectionBase.
-class RelocationScanner {
-public:
-  RelocationScanner(Ctx &ctx) : ctx(ctx) {}
-  template <class ELFT> void scanSection(InputSectionBase &s);
-  template <class ELFT> void scanEhSection(EhInputSection &s);
-
-private:
-  Ctx &ctx;
-  InputSectionBase *sec;
-
-  // End of relocations, used by Mips/PPC64.
-  const void *end = nullptr;
-
-  template <class RelTy> RelType getMipsN32RelType(RelTy *&rel) const;
-  template <class ELFT, class RelTy>
-  int64_t computeMipsAddend(const RelTy &rel, RelExpr expr, bool isLocal) const;
-  bool isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym,
-                                uint64_t relOff) const;
-  void process(RelExpr expr, RelType type, uint64_t offset, Symbol &sym,
-               int64_t addend) const;
-  unsigned handleTlsRelocation(RelExpr expr, RelType type, uint64_t offset,
-                               Symbol &sym, int64_t addend);
-
-  template <class ELFT, class RelTy>
-  void scan(typename Relocs<RelTy>::const_iterator &i);
-  template <class ELFT, class RelTy> void scanSectionImpl(Relocs<RelTy> rels);
-};
 } // namespace
 
-// MIPS has an odd notion of "paired" relocations to calculate addends.
-// For example, if a relocation is of R_MIPS_HI16, there must be a
-// R_MIPS_LO16 relocation after that, and an addend is calculated using
-// the two relocations.
-template <class ELFT, class RelTy>
-int64_t RelocationScanner::computeMipsAddend(const RelTy &rel, RelExpr expr,
-                                             bool isLocal) const {
-  if (expr == RE_MIPS_GOTREL && isLocal)
-    return sec->getFile<ELFT>()->mipsGp0;
-
-  // The ABI says that the paired relocation is used only for REL.
-  // See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
-  // This generalises to relocation types with implicit addends.
-  if (RelTy::HasAddend)
-    return 0;
-
-  RelType type = rel.getType(ctx.arg.isMips64EL);
-  RelType pairTy = getMipsPairType(type, isLocal);
-  if (pairTy == R_MIPS_NONE)
-    return 0;
-
-  const uint8_t *buf = sec->content().data();
-  uint32_t symIndex = rel.getSymbol(ctx.arg.isMips64EL);
-
-  // To make things worse, paired relocations might not be contiguous in
-  // the relocation table, so we need to do linear search. *sigh*
-  for (const RelTy *ri = &rel; ri != static_cast<const RelTy *>(end); ++ri)
-    if (ri->getType(ctx.arg.isMips64EL) == pairTy &&
-        ri->getSymbol(ctx.arg.isMips64EL) == symIndex)
-      return ctx.target->getImplicitAddend(buf + ri->r_offset, pairTy);
-
-  Warn(ctx) << "can't find matching " << pairTy << " relocation for " << type;
-  return 0;
-}
-
 // Custom error message if Sym is defined in a discarded section.
 template <class ELFT>
 static void maybeReportDiscarded(Ctx &ctx, ELFSyncStream &msg, Undefined &sym) {
@@ -767,13 +655,12 @@ void elf::reportUndefinedSymbols(Ctx &ctx) {
 
 // Report an undefined symbol if necessary.
 // Returns true if the undefined symbol will produce an error message.
-static bool maybeReportUndefined(Ctx &ctx, Undefined &sym,
-                                 InputSectionBase &sec, uint64_t offset) {
+bool RelocScan::maybeReportUndefined(Undefined &sym, uint64_t offset) {
   std::lock_guard<std::mutex> lock(ctx.relocMutex);
   // If versioned, issue an error (even if the symbol is weak) because we don't
   // know the defining filename which is required to construct a Verneed entry.
   if (sym.hasVersionSuffix) {
-    ctx.undefErrs.push_back({&sym, {{&sec, offset}}, false});
+    ctx.undefErrs.push_back({&sym, {{sec, offset}}, false});
     return true;
   }
   if (sym.isWeak())
@@ -792,30 +679,24 @@ static bool maybeReportUndefined(Ctx &ctx, Undefined &sym,
   // PPC32 .got2 is similar but cannot be fixed. Multiple .got2 is infeasible
   // because .LC0-.LTOC is not representable if the two labels are in different
   // .got2
-  if (sym.discardedSecIdx != 0 && (sec.name == ".got2" || sec.name == ".toc"))
+  if (sym.discardedSecIdx != 0 && (sec->name == ".got2" || sec->name == ".toc"))
     return false;
 
   bool isWarning =
       (ctx.arg.unresolvedSymbols == UnresolvedPolicy::Warn && canBeExternal) ||
       ctx.arg.noinhibitExec;
-  ctx.undefErrs.push_back({&sym, {{&sec, offset}}, isWarning});
+  ctx.undefErrs.push_back({&sym, {{sec, offset}}, isWarning});
   return !isWarning;
 }
 
-// MIPS N32 ABI treats series of successive relocations with the same offset
-// as a single relocation. The similar approach used by N64 ABI, but this ABI
-// packs all relocations into the single relocation record. Here we emulate
-// this for the N32 ABI. Iterate over relocation with the same offset and put
-// theirs types into the single bit-set.
-template <class RelTy>
-RelType RelocationScanner::getMipsN32RelType(RelTy *&rel) const {
-  uint32_t type = 0;
-  uint64_t offset = rel->r_offset;
-
-  int n = 0;
-  while (rel != static_cast<const RelTy *>(end) && rel->r_offset == offset)
-    type |= (rel++)->getType(ctx.arg.isMips64EL) << (8 * n++);
-  return type;
+bool RelocScan::checkTlsLe(uint64_t offset, Symbol &sym, RelType type) {
+  if (!ctx.arg.shared)
+    return false;
+  auto diag = Err(ctx);
+  diag << "relocation " << type << " against " << &sym
+       << " cannot be used with -shared";
+  printLocation(diag, *sec, sym, offset);
+  return true;
 }
 
 template <bool shard = false>
@@ -949,9 +830,9 @@ static bool canDefineSymbolInExecutable(Ctx &ctx, Symbol &sym) {
 //
 // If this function returns false, that means we need to emit a
 // dynamic relocation so that the relocation will be fixed at load-time.
-bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type,
-                                                 const Symbol &sym,
-                                                 uint64_t relOff) const {
+bool RelocScan::isStaticLinkTimeConstant(RelExpr e, RelType type,
+                                         const Symbol &sym,
+                                         uint64_t relOff) const {
   // These expressions always compute a constant
   if (oneof<
           R_GOTPLT, R_GOT_OFF, R_RELAX_HINT, RE_MIPS_GOT_LOCAL_PAGE,
@@ -1010,7 +891,7 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type,
   // We set the final symbols values for linker script defined symbols later.
   // They always can be computed as a link time constant.
   if (sym.scriptDefined)
-      return true;
+    return true;
 
   auto diag = Err(ctx);
   diag << "relocation " << type << " cannot refer to absolute symbol: " << &sym;
@@ -1031,8 +912,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type,
 // sections. Given that it is ro, we will need an extra PT_LOAD. This
 // complicates things for the dynamic linker and means we would have to reserve
 // space for the extra PT_LOAD even if we end up not using it.
-void RelocationScanner::process(RelExpr expr, RelType type, uint64_t offset,
-                                Symbol &sym, int64_t addend) const {
+void RelocScan::process(RelExpr expr, RelType type, uint64_t offset,
+                        Symbol &sym, int64_t addend) const {
   // If non-ifunc non-preemptible, change PLT to direct call and optimize GOT
   // indirection.
   const bool isIfunc = sym.isGnuIFunc();
@@ -1243,28 +1124,6 @@ void RelocationScanner::process(RelExpr expr, RelType type, uint64_t offset,
   printLocation(diag, *sec, sym, offset);
 }
 
-// This function is similar to the `handleTlsRelocation`. MIPS does not
-// support any relaxations for TLS relocations so by factoring out MIPS
-// handling in to the separate function we can simplify the code and do not
-// pollute other `handleTlsRelocation` by MIPS `ifs` statements.
-// Mips has a custom MipsGotSection that handles the writing of GOT entries
-// without dynamic relocations.
-static unsigned handleMipsTlsRelocation(Ctx &ctx, RelType type, Symbol &sym,
-                                        InputSectionBase &c, uint64_t offset,
-                                        int64_t addend, RelExpr expr) {
-  if (expr == RE_MIPS_TLSLD) {
-    ctx.in.mipsGot->addTlsIndex(*c.file);
-    c.addReloc({expr, type, offset, addend, &sym});
-    return 1;
-  }
-  if (expr == RE_MIPS_TLSGD) {
-    ctx.in.mipsGot->addDynTlsEntry(*c.file, sym);
-    c.addReloc({expr, type, offset, addend, &sym});
-    return 1;
-  }
-  return 0;
-}
-
 static unsigned handleAArch64PAuthTlsRelocation(InputSectionBase *sec,
                                                 RelExpr expr, RelType type,
                                                 uint64_t offset, Symbol &sym,
@@ -1293,9 +1152,9 @@ static unsigned handleAArch64PAuthTlsRelocation(InputSectionBase *sec,
 // symbol in TLS block.
 //
 // Returns the number of relocations processed.
-unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
-                                                uint64_t offset, Symbol &sym,
-                                                int64_t addend) {
+unsigned RelocScan::handleTlsRelocation(RelExpr expr, RelType type,
+                                        uint64_t offset, Symbol &sym,
+                                        int64_t addend) {
   bool isAArch64 = ctx.arg.emachine == EM_AARCH64;
 
   if (isAArch64)
@@ -1303,19 +1162,8 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
             sec, expr, type, offset, sym, addend))
       return processed;
 
-  if (expr == R_TPREL || expr == R_TPREL_NEG) {
-    if (ctx.arg.shared) {
-      auto diag = Err(ctx);
-      diag << "relocation " << type << " against " << &sym
-           << " cannot be used with -shared";
-      printLocation(diag, *sec, sym, offset);
-      return 1;
-    }
-    return 0;
-  }
-
-  if (ctx.arg.emachine == EM_MIPS)
-    return handleMipsTlsRelocation(ctx, type, sym, *sec, offset, addend, expr);
+  if (expr == R_TPREL || expr == R_TPREL_NEG)
+    return checkTlsLe(offset, sym, type) ? 1 : 0;
 
   bool isRISCV = ctx.arg.emachine == EM_RISCV;
 
@@ -1472,159 +1320,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
 }
 
 template <class ELFT, class RelTy>
-void RelocationScanner::scan(typename Relocs<RelTy>::const_iterator &i) {
-  const RelTy &rel = *i;
-  uint32_t symIndex = rel.getSymbol(ctx.arg.isMips64EL);
-  Symbol &sym = sec->getFile<ELFT>()->getSymbol(symIndex);
-  RelType type;
-  if constexpr (ELFT::Is64Bits || RelTy::IsCrel) {
-    type = rel.getType(ctx.arg.isMips64EL);
-    ++i;
-  } else {
-    // CREL is unsupported for MIPS N32.
-    if (ctx.arg.mipsN32Abi) {
-      type = getMipsN32RelType(i);
-    } else {
-      type = rel.getType(ctx.arg.isMips64EL);
-      ++i;
-    }
-  }
-  // Get an offset in an output section this relocation is applied to.
-  uint64_t offset = rel.r_offset;
-
-  RelExpr expr =
-      ctx.target->getRelExpr(type, sym, sec->content().data() + offset);
-  int64_t addend = RelTy::HasAddend
-                       ? getAddend<ELFT>(rel)
-                       : ctx.target->getImplicitAddend(
-                             sec->content().data() + rel.r_offset, type);
-  if (LLVM_UNLIKELY(ctx.arg.emachine == EM_MIPS))
-    addend += computeMipsAddend<ELFT>(rel, expr, sym.isLocal());
-  else if (ctx.arg.emachine == EM_PPC64 && ctx.arg.isPic && type == R_PPC64_TOC)
-    addend += getPPC64TocBase(ctx);
-
-  // Ignore R_*_NONE and other marker relocations.
-  if (expr == R_NONE)
-    return;
-
-  // Error if the target symbol is undefined. Symbol index 0 may be used by
-  // marker relocations, e.g. R_*_NONE and R_ARM_V4BX. Don't error on them.
-  if (sym.isUndefined() && symIndex != 0 &&
-      maybeReportUndefined(ctx, cast<Undefined>(sym), *sec, offset))
-    return;
-
-  if (ctx.arg.emachine == EM_PPC64) {
-    // We can separate the small code model relocations into 2 categories:
-    // 1) Those that access the compiler generated .toc sections.
-    // 2) Those that access the linker allocated got entries.
-    // lld allocates got entries to symbols on demand. Since we don't try to
-    // sort the got entries in any way, we don't have to track which objects
-    // have got-based small code model relocs. The .toc sections get placed
-    // after the end of the linker allocated .got section and we do sort those
-    // so sections addressed with small code model relocations come first.
-    if (type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS)
-      sec->file->ppc64SmallCodeModelTocRelocs = true;
-
-    // Record the TOC entry (.toc + addend) as not relaxable. See the comment in
-    // PPC64::relocateAlloc().
-    if (type == R_PPC64_TOC16_LO && sym.isSection() && isa<Defined>(sym) &&
-        cast<Defined>(sym).section->name == ".toc")
-      ctx.ppc64noTocRelax.insert({&sym, addend});
-
-    if ((type == R_PPC64_TLSGD && expr == R_TLSDESC_CALL) ||
-        (type == R_PPC64_TLSLD && expr == R_TLSLD_HINT)) {
-      // Skip the error check for CREL, which does not set `end`.
-      if constexpr (!RelTy::IsCrel) {
-        if (i == end) {
-          auto diag = Err(ctx);
-          diag << "R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last "
-                  "relocation";
-          printLocation(diag, *sec, sym, offset);
-          return;
-        }
-      }
-
-      // Offset the 4-byte aligned R_PPC64_TLSGD by one byte in the NOTOC
-      // case, so we can discern it later from the toc-case.
-      if (i->getType(/*isMips64EL=*/false) == R_PPC64_REL24_NOTOC)
-        ++offset;
-    }
-  }
-
-  // If the relocation does not emit a GOT or GOTPLT entry but its computation
-  // uses their addresses, we need GOT or GOTPLT to be created.
-  //
-  // The 5 types that relative GOTPLT are all x86 and x86-64 specific.
-  if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_PLT_GOTPLT,
-            R_TLSDESC_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
-    ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
-  } else if (oneof<R_GOTONLY_PC, R_GOTREL, RE_PPC32_PLTREL, RE_PPC64_TOCBASE,
-                   RE_PPC64_RELAX_TOC>(expr)) {
-    ctx.in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
-  }
-
-  // Process TLS relocations, including TLS optimizations. Note that
-  // R_TPREL and R_TPREL_NEG relocations are resolved in process().
-  //
-  // Some RISCV TLSDESC relocations reference a local NOTYPE symbol,
-  // but we need to process them in handleTlsRelocation.
-  if (sym.isTls() || oneof<R_TLSDESC_PC, R_TLSDESC_CALL>(expr)) {
-    if (unsigned processed =
-            handleTlsRelocation(expr, type, offset, sym, addend)) {
-      i += processed - 1;
-      return;
-    }
-  }
-
-  process(expr, type, offset, sym, addend);
-}
-
-// R_PPC64_TLSGD/R_PPC64_TLSLD is required to mark `bl __tls_get_addr` for
-// General Dynamic/Local Dynamic code sequences. If a GD/LD GOT relocation is
-// found but no R_PPC64_TLSGD/R_PPC64_TLSLD is seen, we assume that the
-// instructions are generated by very old IBM XL compilers. Work around the
-// issue by disabling GD/LD to IE/LE relaxation.
-template <class RelTy>
-static void checkPPC64TLSRelax(InputSectionBase &sec, Relocs<RelTy> rels) {
-  // Skip if sec is synthetic (sec.file is null) or if sec has been marked.
-  if (!sec.file || sec.file->ppc64DisableTLSRelax)
-    return;
-  bool hasGDLD = false;
-  for (const RelTy &rel : rels) {
-    RelType type = rel.getType(false);
-    switch (type) {
-    case R_PPC64_TLSGD:
-    case R_PPC64_TLSLD:
-      return; // Found a marker
-    case R_PPC64_GOT_TLSGD16:
-    case R_PPC64_GOT_TLSGD16_HA:
-    case R_PPC64_GOT_TLSGD16_HI:
-    case R_PPC64_GOT_TLSGD16_LO:
-    case R_PPC64_GOT_TLSLD16:
-    case R_PPC64_GOT_TLSLD16_HA:
-    case R_PPC64_GOT_TLSLD16_HI:
-    case R_PPC64_GOT_TLSLD16_LO:
-      hasGDLD = true;
-      break;
-    }
-  }
-  if (hasGDLD) {
-    sec.file->ppc64DisableTLSRelax = true;
-    Warn(sec.file->ctx)
-        << sec.file
-        << ": disable TLS relaxation due to R_PPC64_GOT_TLS* relocations "
-           "without "
-           "R_PPC64_TLSGD/R_PPC64_TLSLD relocations";
-  }
-}
-
-template <class ELFT, class RelTy>
-void RelocationScanner::scanSectionImpl(Relocs<RelTy> rels) {
-  // Not all relocations end up in Sec->Relocations, but a lot do.
-  sec->relocations.reserve(rels.size());
-
-  if (ctx.arg.emachine == EM_PPC64)
-    checkPPC64TLSRelax<RelTy>(*sec, rels);
+void TargetInfo::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
+  RelocScan rs(ctx, &sec);
+  // Many relocations end up in sec.relocations.
+  sec.relocations.reserve(rels.size());
 
   // On SystemZ, all sections need to be sorted by r_offset, to allow TLS
   // relaxation to be handled correctly - see SystemZ::getTlsGdRelaxSkip.
@@ -1632,40 +1331,38 @@ void RelocationScanner::scanSectionImpl(Relocs<RelTy> rels) {
   if (ctx.arg.emachine == EM_S390)
     rels = sortRels(rels, storage);
 
-  if constexpr (RelTy::IsCrel) {
-    for (auto i = rels.begin(); i != rels.end();)
-      scan<ELFT, RelTy>(i);
-  } else {
-    // The non-CREL code path has additional check for PPC64 TLS.
-    end = static_cast<const void *>(rels.end());
-    for (auto i = rels.begin(); i != end;)
-      scan<ELFT, RelTy>(i);
+  for (auto it = rels.begin(); it != rels.end(); ++it) {
+    auto type = it->getType(false);
+    rs.scan<ELFT, RelTy>(it, type, rs.getAddend<ELFT>(*it, type));
   }
 
   // Sort relocations by offset for more efficient searching for
   // R_RISCV_PCREL_HI20, ALIGN relocations, R_PPC64_ADDR64 and the
   // branch-to-branch optimization.
   if (is_contained({EM_RISCV, EM_LOONGARCH}, ctx.arg.emachine) ||
-      (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc") ||
+      (ctx.arg.emachine == EM_PPC64 && sec.name == ".toc") ||
       ctx.arg.branchToBranch)
-    llvm::stable_sort(sec->relocs(),
+    llvm::stable_sort(sec.relocs(),
                       [](const Relocation &lhs, const Relocation &rhs) {
                         return lhs.offset < rhs.offset;
                       });
 }
 
-template <class ELFT> void RelocationScanner::scanSection(InputSectionBase &s) {
-  sec = &s;
-  const RelsOrRelas<ELFT> rels = s.template relsOrRelas<ELFT>();
+template <class ELFT> void TargetInfo::scanSection1(InputSectionBase &sec) {
+  const RelsOrRelas<ELFT> rels = sec.template relsOrRelas<ELFT>();
   if (rels.areRelocsCrel())
-    scanSectionImpl<ELFT>(rels.crels);
+    scanSectionImpl<ELFT>(sec, rels.crels);
   else if (rels.areRelocsRel())
-    scanSectionImpl<ELFT>(rels.rels);
+    scanSectionImpl<ELFT>(sec, rels.rels);
   else
-    scanSectionImpl<ELFT>(rels.relas);
+    scanSectionImpl<ELFT>(sec, rels.relas);
+}
+
+void TargetInfo::scanSection(InputSectionBase &sec) {
+  invokeELFT(scanSection1, sec);
 }
 
-template <class ELFT> void RelocationScanner::scanEhSection(EhInputSection &s) {
+void RelocScan::scanEhSection(EhInputSection &s) {
   sec = &s;
   OffsetGetter getter(s);
   auto rels = s.rels;
@@ -1680,7 +1377,7 @@ template <class ELFT> void RelocationScanner::scanEhSection(EhInputSection &s) {
       continue;
     Symbol *sym = r.sym;
     if (sym->isUndefined() &&
-        maybeReportUndefined(ctx, cast<Undefined>(*sym), *sec, offset))
+        maybeReportUndefined(cast<Undefined>(*sym), offset))
       continue;
     process(r.expr, r.type, offset, *sym, r.addend);
   }
@@ -1701,12 +1398,11 @@ template <class ELFT> void elf::scanRelocations(Ctx &ctx) {
   auto outerFn = [&]() {
     for (ELFFileBase *f : ctx.objectFiles) {
       auto fn = [f, &ctx]() {
-        RelocationScanner scanner(ctx);
         for (InputSectionBase *s : f->getSections()) {
           if (s && s->kind() == SectionBase::Regular && s->isLive() &&
               (s->flags & SHF_ALLOC) &&
               !(s->type == SHT_ARM_EXIDX && ctx.arg.emachine == EM_ARM))
-            scanner.template scanSection<ELFT>(*s);
+            ctx.target->scanSection(*s);
         }
       };
       if (serial)
@@ -1715,14 +1411,14 @@ template <class ELFT> void elf::scanRelocations(Ctx &ctx) {
         tg.spawn(fn);
     }
     auto scanEH = [&] {
-      RelocationScanner scanner(ctx);
+      RelocScan scanner(ctx);
       for (Partition &part : ctx.partitions) {
         for (EhInputSection *sec : part.ehFrame->sections)
-          scanner.template scanEhSection<ELFT>(*sec);
+          scanner.scanEhSection(*sec);
         if (part.armExidx && part.armExidx->isLive())
           for (InputSection *sec : part.armExidx->exidxSections)
             if (sec->isLive())
-              scanner.template scanSection<ELFT>(*sec);
+              ctx.target->scanSection(*sec);
       }
     };
     if (serial)
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 7ea03c3..86ca298 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -17,7 +17,9 @@
 
 namespace lld::elf {
 struct Ctx;
+struct ELFSyncStream;
 class Defined;
+class Undefined;
 class Symbol;
 class InputSection;
 class InputSectionBase;
@@ -153,12 +155,17 @@ struct JumpInstrMod {
   unsigned size;
 };
 
+void printLocation(ELFSyncStream &s, InputSectionBase &sec, const Symbol &sym,
+                   uint64_t off);
+
 // This function writes undefined symbol diagnostics to an internal buffer.
 // Call reportUndefinedSymbols() after calling scanRelocations() to emit
 // the diagnostics.
 template <class ELFT> void scanRelocations(Ctx &ctx);
 template <class ELFT> void checkNoCrossRefs(Ctx &ctx);
 void reportUndefinedSymbols(Ctx &);
+bool maybeReportUndefined(Ctx &, Undefined &sym, InputSectionBase &sec,
+                          uint64_t offset);
 void postScanRelocations(Ctx &ctx);
 void addGotEntry(Ctx &ctx, Symbol &sym);
 
diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp
index fb79ee9..89e4dbe 100644
--- a/lld/ELF/Target.cpp
+++ b/lld/ELF/Target.cpp
@@ -26,6 +26,7 @@
 #include "Target.h"
 #include "InputFiles.h"
 #include "OutputSections.h"
+#include "RelocScan.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "lld/Common/ErrorHandler.h"
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index 9f06051..90d8ddf 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -24,6 +24,7 @@ namespace elf {
 class Defined;
 class InputFile;
 class Symbol;
+template <class RelTy> struct Relocs;
 
 std::string toStr(Ctx &, RelType type);
 
@@ -87,6 +88,15 @@ public:
   virtual bool inBranchRange(RelType type, uint64_t src,
                              uint64_t dst) const;
 
+  // Function for scanning relocation. Typically overridden by targets that
+  // require special type or addend adjustment.
+  virtual void scanSection(InputSectionBase &);
+  // Called by scanSection as a default implementation for specific ELF
+  // relocation types.
+  template <class ELFT> void scanSection1(InputSectionBase &);
+  template <class ELFT, class RelTy>
+  void scanSectionImpl(InputSectionBase &, Relocs<RelTy>);
+
   virtual void relocate(uint8_t *loc, const Relocation &rel,
                         uint64_t val) const = 0;
   void relocateNoSym(uint8_t *loc, RelType type, uint64_t val) const {
diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
index 53e991a..1a2860a 100644
--- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
+++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
@@ -1,3 +1,4 @@
+from abc import ABC, abstractmethod
 import ctypes
 import errno
 import io
@@ -5,6 +6,7 @@ import threading
 import socket
 import traceback
 from lldbsuite.support import seven
+from typing import Optional, List, Tuple
 
 
 def checksum(message):
@@ -86,7 +88,7 @@ class MockGDBServerResponder:
     handles any packet not recognized in the common packet handling code.
     """
 
-    registerCount = 40
+    registerCount: int = 40
 
     class RESPONSE_DISCONNECT:
         pass
@@ -95,7 +97,7 @@ class MockGDBServerResponder:
         pass
 
     def __init__(self):
-        self.packetLog = []
+        self.packetLog: List[str] = []
 
     def respond(self, packet):
         """
@@ -241,7 +243,7 @@ class MockGDBServerResponder:
     def qHostInfo(self):
         return "ptrsize:8;endian:little;"
 
-    def qEcho(self):
+    def qEcho(self, num: int):
         return "E04"
 
     def qQueryGDBServer(self):
@@ -262,10 +264,10 @@ class MockGDBServerResponder:
     def D(self, packet):
         return "OK"
 
-    def readRegisters(self):
+    def readRegisters(self) -> str:
         return "00000000" * self.registerCount
 
-    def readRegister(self, register):
+    def readRegister(self, register: int) -> str:
         return "00000000"
 
     def writeRegisters(self, registers_hex):
@@ -305,7 +307,9 @@ class MockGDBServerResponder:
         # SIGINT is 2, return type is 2 digit hex string
         return "S02"
 
-    def qXferRead(self, obj, annex, offset, length):
+    def qXferRead(
+        self, obj: str, annex: str, offset: int, length: int
+    ) -> Tuple[Optional[str], bool]:
         return None, False
 
     def _qXferResponse(self, data, has_more):
@@ -373,15 +377,17 @@ class MockGDBServerResponder:
         pass
 
 
-class ServerChannel:
+class ServerChannel(ABC):
     """
     A wrapper class for TCP or pty-based server.
     """
 
-    def get_connect_address(self):
+    @abstractmethod
+    def get_connect_address(self) -> str:
         """Get address for the client to connect to."""
 
-    def get_connect_url(self):
+    @abstractmethod
+    def get_connect_url(self) -> str:
         """Get URL suitable for process connect command."""
 
     def close_server(self):
@@ -393,10 +399,12 @@ class ServerChannel:
     def close_connection(self):
         """Close all resources used by the accepted connection."""
 
-    def recv(self):
+    @abstractmethod
+    def recv(self) -> bytes:
         """Receive a data packet from the connected client."""
 
-    def sendall(self, data):
+    @abstractmethod
+    def sendall(self, data: bytes) -> None:
         """Send the data to the connected client."""
 
 
@@ -427,11 +435,11 @@ class ServerSocket(ServerChannel):
         self._connection.close()
         self._connection = None
 
-    def recv(self):
+    def recv(self) -> bytes:
         assert self._connection is not None
         return self._connection.recv(4096)
 
-    def sendall(self, data):
+    def sendall(self, data: bytes) -> None:
         assert self._connection is not None
         return self._connection.sendall(data)
 
@@ -443,10 +451,10 @@ class TCPServerSocket(ServerSocket):
         )[0]
         super().__init__(family, type, proto, addr)
 
-    def get_connect_address(self):
+    def get_connect_address(self) -> str:
         return "[{}]:{}".format(*self._server_socket.getsockname())
 
-    def get_connect_url(self):
+    def get_connect_url(self) -> str:
         return "connect://" + self.get_connect_address()
 
 
@@ -454,10 +462,10 @@ class UnixServerSocket(ServerSocket):
     def __init__(self, addr):
         super().__init__(socket.AF_UNIX, socket.SOCK_STREAM, 0, addr)
 
-    def get_connect_address(self):
+    def get_connect_address(self) -> str:
         return self._server_socket.getsockname()
 
-    def get_connect_url(self):
+    def get_connect_url(self) -> str:
         return "unix-connect://" + self.get_connect_address()
 
 
@@ -471,7 +479,7 @@ class PtyServerSocket(ServerChannel):
         self._primary = io.FileIO(primary, "r+b")
         self._secondary = io.FileIO(secondary, "r+b")
 
-    def get_connect_address(self):
+    def get_connect_address(self) -> str:
         libc = ctypes.CDLL(None)
         libc.ptsname.argtypes = (ctypes.c_int,)
         libc.ptsname.restype = ctypes.c_char_p
@@ -484,7 +492,7 @@ class PtyServerSocket(ServerChannel):
         self._secondary.close()
         self._primary.close()
 
-    def recv(self):
+    def recv(self) -> bytes:
         try:
             return self._primary.read(4096)
         except OSError as e:
@@ -493,8 +501,8 @@ class PtyServerSocket(ServerChannel):
                 return b""
             raise
 
-    def sendall(self, data):
-        return self._primary.write(data)
+    def sendall(self, data: bytes) -> None:
+        self._primary.write(data)
 
 
 class MockGDBServer:
@@ -527,18 +535,21 @@ class MockGDBServer:
             self._thread.join()
             self._thread = None
 
-    def get_connect_address(self):
+    def get_connect_address(self) -> str:
+        assert self._socket is not None
         return self._socket.get_connect_address()
 
-    def get_connect_url(self):
+    def get_connect_url(self) -> str:
+        assert self._socket is not None
         return self._socket.get_connect_url()
 
     def run(self):
+        assert self._socket is not None
         # For testing purposes, we only need to worry about one client
         # connecting just one time.
         try:
             self._socket.accept()
-        except:
+        except Exception:
             traceback.print_exc()
             return
         self._shouldSendAck = True
@@ -553,7 +564,7 @@ class MockGDBServer:
                 self._receive(data)
         except self.TerminateConnectionException:
             pass
-        except Exception as e:
+        except Exception:
             print(
                 "An exception happened when receiving the response from the gdb server. Closing the client..."
             )
@@ -586,7 +597,9 @@ class MockGDBServer:
         Once a complete packet is found at the front of self._receivedData,
         its data is removed form self._receivedData.
         """
+        assert self._receivedData is not None
         data = self._receivedData
+        assert self._receivedDataOffset is not None
         i = self._receivedDataOffset
         data_len = len(data)
         if data_len == 0:
@@ -639,10 +652,13 @@ class MockGDBServer:
         self._receivedDataOffset = 0
         return packet
 
-    def _sendPacket(self, packet):
-        self._socket.sendall(seven.bitcast_to_bytes(frame_packet(packet)))
+    def _sendPacket(self, packet: str):
+        assert self._socket is not None
+        framed_packet = seven.bitcast_to_bytes(frame_packet(packet))
+        self._socket.sendall(framed_packet)
 
     def _handlePacket(self, packet):
+        assert self._socket is not None
         if packet is self.PACKET_ACK:
             # Ignore ACKs from the client. For the future, we can consider
             # adding validation code to make sure the client only sends ACKs
diff --git a/lldb/source/Host/freebsd/Host.cpp b/lldb/source/Host/freebsd/Host.cpp
index fa7efad..dfdbfea 100644
--- a/lldb/source/Host/freebsd/Host.cpp
+++ b/lldb/source/Host/freebsd/Host.cpp
@@ -18,8 +18,6 @@
 #include <dlfcn.h>
 #include <execinfo.h>
 
-#include "llvm/Object/ELF.h"
-
 #include "lldb/Host/FileSystem.h"
 #include "lldb/Host/Host.h"
 #include "lldb/Host/HostInfo.h"
@@ -32,6 +30,7 @@
 #include "lldb/Utility/Status.h"
 #include "lldb/Utility/StreamString.h"
 
+#include "llvm/Object/ELF.h"
 #include "llvm/TargetParser/Host.h"
 
 namespace lldb_private {
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
index 4b183a8..5588208 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
@@ -52,7 +52,7 @@ private:
   ValueObject *m_tree = nullptr;
   size_t m_num_elements = 0;
   ValueObject *m_next_element = nullptr;
-  std::vector<std::pair<ValueObject *, uint64_t>> m_elements_cache;
+  std::vector<ValueObject *> m_elements_cache;
 };
 
 class LibCxxUnorderedMapIteratorSyntheticFrontEnd
@@ -192,26 +192,25 @@ lldb::ValueObjectSP lldb_private::formatters::
           return nullptr;
       }
     }
-    m_elements_cache.push_back(
-        {value_sp.get(), hash_sp->GetValueAsUnsigned(0)});
+    m_elements_cache.push_back(value_sp.get());
     m_next_element = node_sp->GetChildMemberWithName("__next_").get();
     if (!m_next_element || m_next_element->GetValueAsUnsigned(0) == 0)
       m_next_element = nullptr;
   }
 
-  std::pair<ValueObject *, uint64_t> val_hash = m_elements_cache[idx];
-  if (!val_hash.first)
+  ValueObject *val_hash = m_elements_cache[idx];
+  if (!val_hash)
     return lldb::ValueObjectSP();
   StreamString stream;
   stream.Printf("[%" PRIu64 "]", (uint64_t)idx);
   DataExtractor data;
   Status error;
-  val_hash.first->GetData(data, error);
+  val_hash->GetData(data, error);
   if (error.Fail())
     return lldb::ValueObjectSP();
   const bool thread_and_frame_only_if_stopped = true;
-  ExecutionContext exe_ctx = val_hash.first->GetExecutionContextRef().Lock(
-      thread_and_frame_only_if_stopped);
+  ExecutionContext exe_ctx =
+      val_hash->GetExecutionContextRef().Lock(thread_and_frame_only_if_stopped);
   return CreateValueObjectFromData(stream.GetString(), data, exe_ctx,
                                    m_element_type);
 }
diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
index fc7bfe4..50ea173 100644
--- a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
+++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
@@ -44,10 +44,6 @@ class TestCortexMExceptionUnwind(TestBase):
         thread = process.GetThreadAtIndex(0)
         self.assertTrue(thread.IsValid())
 
-        # We have 4 named stack frames and two unnamed
-        # frames above that.  The topmost two stack frames
-        # were not interesting for this test, so I didn't
-        # create symbols for them.
         self.assertEqual(thread.GetNumFrames(), 3)
         stackframe_names = [
             "exception_catcher",
diff --git a/lldb/tools/lldb-mcp/lldb-mcp.cpp b/lldb/tools/lldb-mcp/lldb-mcp.cpp
index 68e9872..3fc3622 100644
--- a/lldb/tools/lldb-mcp/lldb-mcp.cpp
+++ b/lldb/tools/lldb-mcp/lldb-mcp.cpp
@@ -85,6 +85,8 @@ FileSpec driverPath() {
 llvm::Error launch() {
   FileSpec lldb_exec = driverPath();
   lldb_private::ProcessLaunchInfo info;
+  info.SetMonitorProcessCallback(
+      &lldb_private::ProcessLaunchInfo::NoOpMonitorCallback);
   info.SetExecutableFile(lldb_exec,
                          /*add_exe_file_as_first_arg=*/true);
   info.GetArguments().AppendArgument("-O");
diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt
index 3cbfb0d..e411ed4 100644
--- a/llvm/benchmarks/CMakeLists.txt
+++ b/llvm/benchmarks/CMakeLists.txt
@@ -11,6 +11,7 @@ add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
 add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED)
 add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED)
 add_benchmark(MustacheBench Mustache.cpp PARTIAL_SOURCES_INTENDED)
+add_benchmark(SpecialCaseListBM SpecialCaseListBM.cpp PARTIAL_SOURCES_INTENDED)
 
 add_benchmark(RuntimeLibcallsBench RuntimeLibcalls.cpp PARTIAL_SOURCES_INTENDED)
 
diff --git a/llvm/benchmarks/SpecialCaseListBM.cpp b/llvm/benchmarks/SpecialCaseListBM.cpp
new file mode 100644
index 0000000..00aa3cd
--- /dev/null
+++ b/llvm/benchmarks/SpecialCaseListBM.cpp
@@ -0,0 +1,207 @@
+#include "benchmark/benchmark.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+constexpr int RNG_SEED = 123456;
+constexpr int MAX_LIST_MIN = 10;
+constexpr int MAX_LIST_MAX = 1000000;
+constexpr int MAX_LIST_MUL = 10;
+
+std::unique_ptr<SpecialCaseList> makeSpecialCaseList(StringRef List) {
+  std::string Error;
+  std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(List);
+  auto SCL = SpecialCaseList::create(MB.get(), Error);
+  assert(SCL);
+  assert(Error == "");
+  return SCL;
+}
+
+static const std::string Dictionary[] = {
+    "orange",   "tabby",  "tortie", "tuxedo", "void",
+    "multiple", "spaces", "cute",   "fluffy", "kittens",
+};
+
+std::vector<std::string> genFiles(size_t NumFiles) {
+  std::vector<std::string> R;
+  R.reserve(NumFiles);
+  std::minstd_rand Rng(RNG_SEED);
+  std::uniform_int_distribution<> DepthDistrib(8, 16);
+  std::uniform_int_distribution<> WordDistrib(0, std::size(Dictionary) - 1);
+
+  std::string S;
+  for (size_t I = 0; I < NumFiles; ++I) {
+    for (size_t D = DepthDistrib(Rng); D; --D) {
+      S += Dictionary[WordDistrib(Rng)];
+      if (D > 1)
+        S += "/";
+    }
+    R.push_back(std::move(S));
+    S.clear();
+  }
+  return R;
+}
+
+std::string genGlobNone(const std::vector<std::string> &Files) {
+  std::string S;
+  for (const auto &F : Files) {
+    S += "src:";
+    S += F;
+    S += "\n";
+  }
+  return S;
+}
+
+std::string genGlobInMid(const std::vector<std::string> &Files) {
+  std::string S;
+  std::minstd_rand Rng(RNG_SEED);
+  for (std::string F : Files) {
+    std::uniform_int_distribution<> PosDistrib(0, F.size() - 1);
+    F[PosDistrib(Rng)] = '*';
+    S += "src:";
+    S += F;
+    S += "\n";
+  }
+  return S;
+}
+
+std::string genGlobAtStart(const std::vector<std::string> &Files) {
+  std::string S;
+  for (std::string F : Files) {
+    F.front() = '*';
+    S += "src:";
+    S += F;
+    S += "\n";
+  }
+  return S;
+}
+
+std::string genGlobAtEnd(const std::vector<std::string> &Files) {
+  std::string S;
+  for (std::string F : Files) {
+    F.back() = '*';
+    S += "src:";
+    S += F;
+    S += "\n";
+  }
+  return S;
+}
+
+std::string genGlobAtBothSides(const std::vector<std::string> &Files) {
+  std::string S;
+  for (std::string F : Files) {
+    F.back() = '*';
+    F.front() = '*';
+    S += "src:";
+    S += F;
+    S += "\n";
+  }
+  return S;
+}
+
+void BM_Make_(
+    benchmark::State &state,
+    std::string (*GenerateCaseList)(const std::vector<std::string> &Files)) {
+  std::vector<std::string> BigFileList = genFiles(state.range(0));
+  std::string BigCaseList = GenerateCaseList(BigFileList);
+  for (auto _ : state) {
+    auto SCL = makeSpecialCaseList(BigCaseList);
+    benchmark::DoNotOptimize(SCL);
+  }
+}
+void BM_True_(
+    benchmark::State &state,
+    std::string (*GenerateCaseList)(const std::vector<std::string> &Files)) {
+  std::vector<std::string> BigFileList = genFiles(state.range(0));
+  std::string BigCaseList = GenerateCaseList(BigFileList);
+  auto SCL = makeSpecialCaseList(BigCaseList);
+  std::minstd_rand Rng(RNG_SEED);
+  std::uniform_int_distribution<> LineDistrib(0, BigFileList.size() - 1);
+  for (auto _ : state) {
+    auto &Q = BigFileList[LineDistrib(Rng)];
+    bool R = SCL->inSection("", "src", Q);
+    if (!R)
+      abort();
+    benchmark::DoNotOptimize(R);
+  }
+}
+
+void BM_False(
+    benchmark::State &state,
+    std::string (*GenerateCaseList)(const std::vector<std::string> &Files)) {
+  std::vector<std::string> BigFileList = genFiles(state.range(0));
+  std::string BigCaseList = GenerateCaseList(BigFileList);
+  auto SCL = makeSpecialCaseList(BigCaseList);
+  std::minstd_rand Rng(RNG_SEED);
+  std::uniform_int_distribution<> LineDistrib(0, BigFileList.size() - 1);
+  for (auto _ : state) {
+    std::string Q = BigFileList[LineDistrib(Rng)];
+    std::uniform_int_distribution<> PosDistrib(0, Q.size() - 1);
+    Q[PosDistrib(Rng)] = '_';
+    bool R = SCL->inSection("", "src", Q);
+    benchmark::DoNotOptimize(R);
+  }
+}
+
+} // namespace
+
+BENCHMARK_CAPTURE(BM_Make_, None_, genGlobNone)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_Make_, Start, genGlobAtStart)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_Make_, End__, genGlobAtEnd)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_Make_, Mid__, genGlobInMid)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_Make_, Both_, genGlobAtBothSides)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+
+BENCHMARK_CAPTURE(BM_True_, None_, genGlobNone)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_True_, Start, genGlobAtStart)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_True_, End__, genGlobAtEnd)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_True_, Mid__, genGlobInMid)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_True_, Both_, genGlobAtBothSides)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+
+BENCHMARK_CAPTURE(BM_False, None_, genGlobNone)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_False, Start, genGlobAtStart)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_False, End__, genGlobAtEnd)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_False, Mid__, genGlobInMid)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_False, Both_, genGlobAtBothSides)
+    ->RangeMultiplier(MAX_LIST_MUL)
+    ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+
+BENCHMARK_MAIN();
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index a4d110f..402fd05 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4172,7 +4172,7 @@ non-AMD key names should be prefixed by "*vendor-name*.".
                                                 "Image", or "Pipe". This may be
                                                 more restrictive than indicated
                                                 by "AccQual" to reflect what the
-                                                kernel actual does. If not
+                                                kernel actually does. If not
                                                 present then the runtime must
                                                 assume what is implied by
                                                 "AccQual" and "IsConst". Values
@@ -5436,8 +5436,8 @@ The fields used by CP for code objects before V3 also match those specified in
                                                      ``COMPUTE_PGM_RSRC1.PRIORITY``.
      13:12   2 bits  FLOAT_ROUND_MODE_32             Wavefront starts execution
                                                      with specified rounding
-                                                     mode for single (32
-                                                     bit) floating point
+                                                     mode for single (32-bit)
+                                                     floating point
                                                      precision floating point
                                                      operations.
 
@@ -5769,7 +5769,7 @@ The fields used by CP for code objects before V3 also match those specified in
 
                                                      Wavefront starts execution
                                                      with memory violation
-                                                     exceptions exceptions
+                                                     exceptions
                                                      enabled which are generated
                                                      when a memory violation has
                                                      occurred for this wavefront from
@@ -6005,7 +6005,7 @@ The fields used by CP for code objects before V3 also match those specified in
      FLOAT_DENORM_MODE_FLUSH_NONE           3     No Flush
      ====================================== ===== ====================================
 
-  Denormal flushing is sign respecting. i.e. the behavior expected by
+  Denormal flushing is sign respecting, i.e., the behavior expected by
   ``"denormal-fp-math"="preserve-sign"``. The behavior is undefined with
   ``"denormal-fp-math"="positive-zero"``
 
@@ -16831,7 +16831,7 @@ For GFX125x:
 * Some memory operations contain a ``nv`` bit, for "non-volatile", which indicates
   memory that is not expected to change during a kernel's execution.
   This information is propagated to the cache lines for that address
-  (refered to as ``$nv``).
+  (referred to as ``$nv``).
 
   * When ``nv=0`` reads hit dirty ``$nv=1`` data in cache, the hardware will
     writeback the data to the next level in the hierarchy and then subsequently read
@@ -18970,7 +18970,7 @@ On entry to a function:
 #. All other registers are unspecified.
 #. Any necessary ``s_waitcnt`` has been performed to ensure memory is available
    to the function.
-#. Use pass-by-reference (byref) in stead of pass-by-value (byval) for struct
+#. Use pass-by-reference (byref) instead of pass-by-value (byval) for struct
    arguments in C ABI. Callee is responsible for allocating stack memory and
    copying the value of the struct if modified. Note that the backend still
    supports byval for struct arguments.
@@ -20214,7 +20214,7 @@ from the value of the ``-mcpu`` option that is passed to the assembler.
 .amdgpu_hsa_kernel (name)
 +++++++++++++++++++++++++
 
-This directives specifies that the symbol with given name is a kernel entry
+This directive specifies that the symbol with given name is a kernel entry
 point (label) and the object should contain corresponding symbol of type
 STT_AMDGPU_HSA_KERNEL.
 
diff --git a/llvm/docs/GettingStartedVS.rst b/llvm/docs/GettingStartedVS.rst
index bc5746d..e65fd8f 100644
--- a/llvm/docs/GettingStartedVS.rst
+++ b/llvm/docs/GettingStartedVS.rst
@@ -126,6 +126,15 @@ These instructions were tested with Visual Studio 2019 and Python 3.9.6:
        cmake -S llvm\llvm -B build -DLLVM_ENABLE_PROJECTS=clang -DLLVM_TARGETS_TO_BUILD=X86 -Thost=x64
        exit
 
+   .. note::
+     By default, the Visual Studio project files generated by CMake use the
+     32-bit toolset. If you are developing on a 64-bit version of Windows and
+     want to use the 64-bit toolset, pass the ``-Thost=x64`` flag when
+     generating the Visual Studio solution. This requires CMake 3.8.0 or later.
+
+     For Windows on Arm the equivalent is ``-Thost=ARM64``, but this the default
+     for those hosts, so you do not have to use this option.
+
    ``LLVM_ENABLE_PROJECTS`` specifies any additional LLVM projects you want to
    build while ``LLVM_TARGETS_TO_BUILD`` selects the compiler targets. If
    ``LLVM_TARGETS_TO_BUILD`` is omitted by default all targets are built
@@ -149,10 +158,6 @@ These instructions were tested with Visual Studio 2019 and Python 3.9.6:
    * CMake generates project files for all build types. To select a specific
      build type, use the Configuration manager from the VS IDE or the
      ``/property:Configuration`` command-line option when using MSBuild.
-   * By default, the Visual Studio project files generated by CMake use the
-     32-bit toolset. If you are developing on a 64-bit version of Windows and
-     want to use the 64-bit toolset, pass the ``-Thost=x64`` flag when
-     generating the Visual Studio solution. This requires CMake 3.8.0 or later.
 
 13. Start Visual Studio and select configuration:
 
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 46a9f32..640516a 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -134,6 +134,8 @@ Changes to the WebAssembly Backend
 Changes to the Windows Target
 -----------------------------
 
+* `-fpseudo-probe-for-profiling` is now supported for COFF.
+
 Changes to the X86 Backend
 --------------------------
 
@@ -161,6 +163,8 @@ Changes to the Debug Info
 Changes to the LLVM tools
 ---------------------------------
 
+* `llvm-profgen` now supports decoding pseudo probe for COFF binaries.
+
 * `llvm-readelf` now dumps all hex format values in lower-case mode.
 * Some code paths for supporting Python 2.7 in `llvm-lit` have been removed.
 * Support for `%T` in lit has been removed.
diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h
index a96535c..1bb07e39 100644
--- a/llvm/include/llvm/ADT/StringSwitch.h
+++ b/llvm/include/llvm/ADT/StringSwitch.h
@@ -54,17 +54,14 @@ public:
   explicit StringSwitch(StringRef S)
   : Str(S), Result() { }
 
+  StringSwitch(StringSwitch &&) = default;
+
   // StringSwitch is not copyable.
   StringSwitch(const StringSwitch &) = delete;
 
   // StringSwitch is not assignable due to 'Str' being 'const'.
   void operator=(const StringSwitch &) = delete;
-  void operator=(StringSwitch &&other) = delete;
-
-  StringSwitch(StringSwitch &&other)
-    : Str(other.Str), Result(std::move(other.Result)) { }
-
-  ~StringSwitch() = default;
+  void operator=(StringSwitch &&) = delete;
 
   // Case-sensitive case matchers
   StringSwitch &Case(StringLiteral S, T Value) {
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
index 8dcc292..1cfcdbf 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
@@ -62,6 +62,7 @@ ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15,               0x139)
 ELF_RELOC(R_AARCH64_PLT32,                           0x13a)
 ELF_RELOC(R_AARCH64_GOTPCREL32,                      0x13b)
 ELF_RELOC(R_AARCH64_PATCHINST,                       0x13c)
+ELF_RELOC(R_AARCH64_FUNCINIT64,                      0x13d)
 // General dynamic TLS relocations
 ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21,                0x200)
 ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21,                0x201)
diff --git a/llvm/include/llvm/CAS/CASID.h b/llvm/include/llvm/CAS/CASID.h
index 8820994..f508ed3 100644
--- a/llvm/include/llvm/CAS/CASID.h
+++ b/llvm/include/llvm/CAS/CASID.h
@@ -95,8 +95,7 @@ public:
   }
 
   friend hash_code hash_value(const CASID &ID) {
-    ArrayRef<uint8_t> Hash = ID.getHash();
-    return hash_combine_range(Hash.begin(), Hash.end());
+    return hash_combine_range(ID.getHash());
   }
 
   const CASContext &getContext() const {
diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h
index 1050b3d..c252f9d 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -229,8 +229,8 @@ public:
   /// doing something wrong if you call pruneValue directly on a
   /// LiveInterval. Indeed, you are supposed to call pruneValue on the main
   /// LiveRange and all the LiveRanges of the subranges if any.
-  LLVM_ATTRIBUTE_UNUSED void pruneValue(LiveInterval &, SlotIndex,
-                                        SmallVectorImpl<SlotIndex> *) {
+  [[maybe_unused]] void pruneValue(LiveInterval &, SlotIndex,
+                                   SmallVectorImpl<SlotIndex> *) {
     llvm_unreachable(
         "Use pruneValue on the main LiveRange and on each subrange");
   }
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
index a4a7fa4..a5f6c4f 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
@@ -272,6 +272,9 @@ struct ExecutorAddrRange {
   }
 
   bool contains(ExecutorAddr Addr) const { return Start <= Addr && Addr < End; }
+  bool contains(const ExecutorAddrRange &Other) {
+    return (Other.Start >= Start && Other.End <= End);
+  }
   bool overlaps(const ExecutorAddrRange &Other) {
     return !(Other.End <= Start || End <= Other.Start);
   }
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td b/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
index bf20080..4a0272c 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
@@ -180,4 +180,98 @@ let TargetPrefix = "riscv" in {
   // XSfvfnrclipxfqf
   defm int_riscv_sf_vfnrclip_x_f_qf : RISCVSFCustomVFNRCLIP;
   defm int_riscv_sf_vfnrclip_xu_f_qf : RISCVSFCustomVFNRCLIP;
+
+  // XSfmm
+  // Output: (output_len)
+  // Input: (input_len, vsew, twiden)
+  class RISCVSFVSet
+      : DefaultAttrsIntrinsic<[llvm_anyint_ty],
+                              [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+                              [ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, IntrNoMem]>;
+
+  // Input: (tss, base, tn)
+  class RISCVSFTileLoad
+      : DefaultAttrsIntrinsic<[],
+                              [llvm_anyint_ty, llvm_ptr_ty, LLVMMatchType<0>],
+                              [NoCapture<ArgIndex<1>>, IntrHasSideEffects]>,
+        RISCVVIntrinsic;
+
+  // Input: (tss, base, tn)
+  class RISCVSFTileStore
+      : DefaultAttrsIntrinsic<[],
+                              [llvm_anyint_ty, llvm_ptr_ty, LLVMMatchType<0>],
+                              [NoCapture<ArgIndex<1>>, IntrWriteMem,
+                               IntrHasSideEffects]>,
+        RISCVVIntrinsic;
+
+  // Output: ()
+  // Input: (mtd, mat1, mat2, tm, tn, tk, twiden)
+  class RISCVSFCustomMatMul<bit is_float = false>
+      : DefaultAttrsIntrinsic<[], [llvm_anyint_ty, llvm_anyvector_ty,
+                                   !if(is_float, LLVMMatchType<1>,
+                                                 llvm_anyvector_ty),
+                                   LLVMMatchType<0>, LLVMMatchType<0>,
+                                   LLVMMatchType<0>, LLVMMatchType<0>],
+                              [IntrNoMem, IntrHasSideEffects,
+                               ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<6>>]>,
+        RISCVVIntrinsic;
+
+  def int_riscv_sf_vsettnt  : RISCVSFVSet;
+  def int_riscv_sf_vsettm   : RISCVSFVSet;
+  def int_riscv_sf_vsettk   : RISCVSFVSet;
+
+  def int_riscv_sf_vlte8    : RISCVSFTileLoad;
+  def int_riscv_sf_vlte16   : RISCVSFTileLoad;
+  def int_riscv_sf_vlte32   : RISCVSFTileLoad;
+  def int_riscv_sf_vlte64   : RISCVSFTileLoad;
+  def int_riscv_sf_vste8    : RISCVSFTileStore;
+  def int_riscv_sf_vste16   : RISCVSFTileStore;
+  def int_riscv_sf_vste32   : RISCVSFTileStore;
+  def int_riscv_sf_vste64   : RISCVSFTileStore;
+
+  // Output: (vd)
+  // Input: (tss, tn)
+  def int_riscv_sf_vtmv_v_t
+      : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                              [llvm_anyint_ty, LLVMMatchType<1>],
+                              [IntrNoMem, IntrHasSideEffects]>,
+      RISCVVIntrinsic {
+    let VLOperand = 2;
+  }
+  // Output: ()
+  // Input: (tss, vs2, tn)
+  def int_riscv_sf_vtmv_t_v
+      : DefaultAttrsIntrinsic<[], [LLVMMatchType<1>, llvm_anyvector_ty,
+                                   llvm_anyint_ty], [IntrNoMem, IntrHasSideEffects]>,
+      RISCVVIntrinsic {
+    let VLOperand = 2;
+  }
+
+  foreach a = ["u", "s"] in {
+    foreach b = ["u", "s"] in {
+      def int_riscv_sf_mm_ # a # _ # b   : RISCVSFCustomMatMul;
+    }
+  }
+
+  def int_riscv_sf_mm_f_f : RISCVSFCustomMatMul<true>;
+  foreach e1 = [5, 4] in
+    foreach e2 = [5, 4] in
+      def int_riscv_sf_mm_e # e1 # m # !sub(7, e1) # _e # e2 # m # !sub(7, e2)
+          : RISCVSFCustomMatMul<true>;
+
+  // Output: ()
+  // Input: (mtd)
+  def int_riscv_sf_vtzero_t
+      : DefaultAttrsIntrinsic<[],
+                              [llvm_anyint_ty, LLVMMatchType<0>,LLVMMatchType<0>,
+                               LLVMMatchType<0>, LLVMMatchType<0>],
+                              [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<3>>,
+                               ImmArg<ArgIndex<4>>, IntrNoMem, IntrHasSideEffects]>,
+        RISCVVIntrinsic;
+
+  // Output: ()
+  // Input: ()
+  def int_riscv_sf_vtdiscard
+      : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>,
+        RISCVVIntrinsic;
 } // TargetPrefix = "riscv"
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 5a26e2f..e9a417d 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -833,6 +833,7 @@ struct BBAddrMap {
     bool MultiBBRange : 1;
     bool OmitBBEntries : 1;
     bool CallsiteEndOffsets : 1;
+    bool BBHash : 1;
 
     bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; }
 
@@ -845,7 +846,8 @@ struct BBAddrMap {
              (static_cast<uint8_t>(BrProb) << 2) |
              (static_cast<uint8_t>(MultiBBRange) << 3) |
              (static_cast<uint8_t>(OmitBBEntries) << 4) |
-             (static_cast<uint8_t>(CallsiteEndOffsets) << 5);
+             (static_cast<uint8_t>(CallsiteEndOffsets) << 5) |
+             (static_cast<uint8_t>(BBHash) << 6);
     }
 
     // Decodes from minimum bit width representation and validates no
@@ -854,7 +856,8 @@ struct BBAddrMap {
       Features Feat{
           static_cast<bool>(Val & (1 << 0)), static_cast<bool>(Val & (1 << 1)),
           static_cast<bool>(Val & (1 << 2)), static_cast<bool>(Val & (1 << 3)),
-          static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5))};
+          static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5)),
+          static_cast<bool>(Val & (1 << 6))};
       if (Feat.encode() != Val)
         return createStringError(
             std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x",
@@ -864,10 +867,10 @@ struct BBAddrMap {
 
     bool operator==(const Features &Other) const {
       return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange,
-                      OmitBBEntries, CallsiteEndOffsets) ==
+                      OmitBBEntries, CallsiteEndOffsets, BBHash) ==
              std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb,
                       Other.MultiBBRange, Other.OmitBBEntries,
-                      Other.CallsiteEndOffsets);
+                      Other.CallsiteEndOffsets, Other.BBHash);
     }
   };
 
@@ -920,17 +923,19 @@ struct BBAddrMap {
                    false}; // Metdata for this basic block.
     // Offsets of end of call instructions, relative to the basic block start.
     SmallVector<uint32_t, 1> CallsiteEndOffsets;
+    uint64_t Hash = 0; // Hash for this basic block.
 
     BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD,
-            SmallVector<uint32_t, 1> CallsiteEndOffsets)
+            SmallVector<uint32_t, 1> CallsiteEndOffsets, uint64_t Hash)
         : ID(ID), Offset(Offset), Size(Size), MD(MD),
-          CallsiteEndOffsets(std::move(CallsiteEndOffsets)) {}
+          CallsiteEndOffsets(std::move(CallsiteEndOffsets)), Hash(Hash) {}
 
     UniqueBBID getID() const { return {ID, 0}; }
 
     bool operator==(const BBEntry &Other) const {
       return ID == Other.ID && Offset == Other.Offset && Size == Other.Size &&
-             MD == Other.MD && CallsiteEndOffsets == Other.CallsiteEndOffsets;
+             MD == Other.MD && CallsiteEndOffsets == Other.CallsiteEndOffsets &&
+             Hash == Other.Hash;
     }
 
     bool hasReturn() const { return MD.HasReturn; }
diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index c90591d..a7c7c7c 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -163,6 +163,7 @@ struct BBAddrMapEntry {
     llvm::yaml::Hex64 Size;
     llvm::yaml::Hex64 Metadata;
     std::optional<std::vector<llvm::yaml::Hex64>> CallsiteEndOffsets;
+    std::optional<llvm::yaml::Hex64> Hash;
   };
   uint8_t Version;
   llvm::yaml::Hex8 Feature;
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index 466e2a4..ead7655 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -115,7 +115,8 @@ protected:
   // classes.
   LLVM_ABI bool createInternal(const std::vector<std::string> &Paths,
                                vfs::FileSystem &VFS, std::string &Error);
-  LLVM_ABI bool createInternal(const MemoryBuffer *MB, std::string &Error);
+  LLVM_ABI bool createInternal(const MemoryBuffer *MB, std::string &Error,
+                               bool OrderBySize = false);
 
   SpecialCaseList() = default;
   SpecialCaseList(SpecialCaseList const &) = delete;
@@ -126,6 +127,8 @@ private:
   class RegexMatcher {
   public:
     LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+    LLVM_ABI void preprocess(bool BySize);
+
     LLVM_ABI void
     match(StringRef Query,
           llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
@@ -144,6 +147,8 @@ private:
   class GlobMatcher {
   public:
     LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+    LLVM_ABI void preprocess(bool BySize);
+
     LLVM_ABI void
     match(StringRef Query,
           llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
@@ -164,6 +169,9 @@ private:
   public:
     LLVM_ABI Matcher(bool UseGlobs, bool RemoveDotSlash);
 
+    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+    LLVM_ABI void preprocess(bool BySize);
+
     LLVM_ABI void
     match(StringRef Query,
           llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
@@ -174,8 +182,6 @@ private:
       return R;
     }
 
-    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
-
     std::variant<RegexMatcher, GlobMatcher> M;
     bool RemoveDotSlash;
   };
@@ -206,6 +212,8 @@ protected:
                                        StringRef Category) const;
 
   private:
+    friend class SpecialCaseList;
+    LLVM_ABI void preprocess(bool OrderBySize);
     LLVM_ABI const SpecialCaseList::Matcher *
     findMatcher(StringRef Prefix, StringRef Category) const;
   };
@@ -222,7 +230,7 @@ private:
 
   /// Parses just-constructed SpecialCaseList entries from a memory buffer.
   LLVM_ABI bool parse(unsigned FileIdx, const MemoryBuffer *MB,
-                      std::string &Error);
+                      std::string &Error, bool OrderBySize);
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 8d20b0e..805b682 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -1180,32 +1180,41 @@ bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
   S = SE->getTruncateOrZeroExtend(S, MaxType);
   Size = SE->getTruncateOrZeroExtend(Size, MaxType);
 
-  // Special check for addrecs using BE taken count
-  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S))
-    if (AddRec->isAffine() && AddRec->hasNoSignedWrap()) {
-      const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
-      const SCEV *Start = AddRec->getStart();
-      const SCEV *Step = AddRec->getStepRecurrence(*SE);
-      const SCEV *End = AddRec->evaluateAtIteration(BECount, *SE);
-      const SCEV *Diff0 = SE->getMinusSCEV(Start, Size);
-      const SCEV *Diff1 = SE->getMinusSCEV(End, Size);
-
-      // If the value of Step is non-negative and the AddRec is non-wrap, it
-      // reaches its maximum at the last iteration. So it's enouth to check
-      // whether End - Size is negative.
-      if (SE->isKnownNonNegative(Step) && SE->isKnownNegative(Diff1))
-        return true;
+  auto CheckAddRecBECount = [&]() {
+    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
+    if (!AddRec || !AddRec->isAffine() || !AddRec->hasNoSignedWrap())
+      return false;
+    const SCEV *BECount = collectUpperBound(AddRec->getLoop(), MaxType);
+    // If the BTC cannot be computed, check the base case for S.
+    if (!BECount || isa<SCEVCouldNotCompute>(BECount))
+      return false;
+    const SCEV *Start = AddRec->getStart();
+    const SCEV *Step = AddRec->getStepRecurrence(*SE);
+    const SCEV *End = AddRec->evaluateAtIteration(BECount, *SE);
+    const SCEV *Diff0 = SE->getMinusSCEV(Start, Size);
+    const SCEV *Diff1 = SE->getMinusSCEV(End, Size);
+
+    // If the value of Step is non-negative and the AddRec is non-wrap, it
+    // reaches its maximum at the last iteration. So it's enouth to check
+    // whether End - Size is negative.
+    if (SE->isKnownNonNegative(Step) && SE->isKnownNegative(Diff1))
+      return true;
 
-      // If the value of Step is non-positive and the AddRec is non-wrap, the
-      // initial value is its maximum.
-      if (SE->isKnownNonPositive(Step) && SE->isKnownNegative(Diff0))
-        return true;
+    // If the value of Step is non-positive and the AddRec is non-wrap, the
+    // initial value is its maximum.
+    if (SE->isKnownNonPositive(Step) && SE->isKnownNegative(Diff0))
+      return true;
 
-      // Even if we don't know the sign of Step, either Start or End must be
-      // the maximum value of the AddRec since it is non-wrap.
-      if (SE->isKnownNegative(Diff0) && SE->isKnownNegative(Diff1))
-        return true;
-    }
+    // Even if we don't know the sign of Step, either Start or End must be
+    // the maximum value of the AddRec since it is non-wrap.
+    if (SE->isKnownNegative(Diff0) && SE->isKnownNegative(Diff1))
+      return true;
+
+    return false;
+  };
+
+  if (CheckAddRecBECount())
+    return true;
 
   // Check using normal isKnownNegative
   const SCEV *LimitedBound = SE->getMinusSCEV(S, Size);
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b8c540c..9f8ac6e 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -849,17 +849,12 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
 /// %sum.2 = select %cmp, %add, %sum.1
 RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) {
-  SelectInst *SI = dyn_cast<SelectInst>(I);
-  if (!SI)
-    return InstDesc(false, I);
-
-  CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
+  Value *TrueVal, *FalseVal;
   // Only handle single use cases for now.
-  if (!CI || !CI->hasOneUse())
+  if (!match(I,
+             m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal))))
     return InstDesc(false, I);
 
-  Value *TrueVal = SI->getTrueValue();
-  Value *FalseVal = SI->getFalseValue();
   // Handle only when either of operands of select instruction is a PHI
   // node for now.
   if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
@@ -886,7 +881,7 @@ RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) {
   if (!IPhi || IPhi != FalseVal)
     return InstDesc(false, I);
 
-  return InstDesc(true, SI);
+  return InstDesc(true, I);
 }
 
 RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 4e38626..e08ef60 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6644,7 +6644,7 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
            "Invalid mask width");
     // If index-width (mask size) is less than pointer-size then mask is
     // 1-extended.
-    if (match(Op1, m_PtrToInt(m_Specific(Op0))))
+    if (match(Op1, m_PtrToIntOrAddr(m_Specific(Op0))))
       return Op0;
 
     // NOTE: We may have attributes associated with the return value of the
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index ab37338..0b2e3fc 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -393,7 +393,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
 /// \param AA        The AliasAnalysis we used for our search.
 /// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
 
-LLVM_ATTRIBUTE_UNUSED static void
+[[maybe_unused]] static void
 checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
                    const MemoryLocation &StartLoc, const MemorySSA &MSSA,
                    const UpwardsMemoryQuery &Query, BatchAAResults &AA,
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3fab6b0..4bafd3f 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15761,6 +15761,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
           const SCEV *OneAlignedUp =
               GetNextSCEVDividesByDivisor(One, DividesBy);
           To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
+        } else {
+          if (LHS->getType()->isPointerTy()) {
+            LHS = SE.getLosslessPtrToIntExpr(LHS);
+            RHS = SE.getLosslessPtrToIntExpr(RHS);
+            if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS))
+              break;
+          }
+          auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) {
+            const SCEV *Sub = SE.getMinusSCEV(A, B);
+            AddRewrite(Sub, Sub,
+                       SE.getUMaxExpr(Sub, SE.getOne(From->getType())));
+          };
+          AddSubRewrite(LHS, RHS);
+          AddSubRewrite(RHS, LHS);
+          continue;
         }
         break;
       default:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 219bbc9..05fffe9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1437,7 +1437,8 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
           BrProbEnabled,
           MF.hasBBSections() && NumMBBSectionRanges > 1,
           static_cast<bool>(BBAddrMapSkipEmitBBEntries),
-          HasCalls};
+          HasCalls,
+          false};
 }
 
 void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 4931403..53f1cfe2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -770,7 +770,7 @@ struct PartwordMaskValues {
   Value *Inv_Mask = nullptr;
 };
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
   auto PrintObj = [&O](auto *V) {
     if (V)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 3812823..04d9309 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -112,7 +112,7 @@ APInt GISelValueTracking::getKnownOnes(Register R) {
   return getKnownBits(R).One;
 }
 
-LLVM_ATTRIBUTE_UNUSED static void
+[[maybe_unused]] static void
 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
   dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
          << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
@@ -2013,6 +2013,43 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
     FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
     break;
   }
+  case TargetOpcode::G_ADD: {
+    Register Src2 = MI.getOperand(2).getReg();
+    unsigned Src2NumSignBits =
+        computeNumSignBits(Src2, DemandedElts, Depth + 1);
+    if (Src2NumSignBits <= 2)
+      return 1; // Early out.
+
+    Register Src1 = MI.getOperand(1).getReg();
+    unsigned Src1NumSignBits =
+        computeNumSignBits(Src1, DemandedElts, Depth + 1);
+    if (Src1NumSignBits == 1)
+      return 1; // Early Out.
+
+    // Special case decrementing a value (ADD X, -1):
+    KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
+    if (Known2.isAllOnes()) {
+      KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
+      // If the input is known to be 0 or 1, the output is 0/-1, which is all
+      // sign bits set.
+      if ((Known1.Zero | 1).isAllOnes())
+        return TyBits;
+
+      // If we are subtracting one from a positive number, there is no carry
+      // out of the result.
+      if (Known1.isNonNegative()) {
+        FirstAnswer = Src1NumSignBits;
+        break;
+      }
+
+      // Otherwise, we treat this like an ADD.
+    }
+
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
+    break;
+  }
   case TargetOpcode::G_FCMP:
   case TargetOpcode::G_ICMP: {
     bool IsFP = Opcode == TargetOpcode::G_FCMP;
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 3268c26..9662511 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1551,7 +1551,7 @@ LLVM_DUMP_METHOD void ILPValue::dump() const {
   dbgs() << *this << '\n';
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) {
   Val.print(OS);
   return OS;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e153842..787a81a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5044,7 +5044,6 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
 
   unsigned Opc = N->getOpcode();
   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // X / undef -> undef
   // X % undef -> undef
@@ -5076,7 +5075,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
   // it's a 1.
-  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
+  if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1))
     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
 
   return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4512c5c..90edaf3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5063,8 +5063,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     break;
   case ISD::ADD:
   case ISD::ADDC:
-    // Add can have at most one carry bit.  Thus we know that the output
-    // is, at worst, one more bit than the inputs.
+    // TODO: Move Operand 1 check before Operand 0 check
     Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
     if (Tmp == 1) return 1; // Early out.
 
@@ -5088,6 +5087,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
 
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
     if (Tmp2 == 1) return 1; // Early out.
+
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
     return std::min(Tmp, Tmp2) - 1;
   case ISD::SUB:
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -6403,8 +6405,9 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
   if (VT.isScalableVector())
     return SDValue();
 
-  // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
-  // simplified to one big BUILD_VECTOR.
+  // A CONCAT_VECTOR of scalar sources, such as UNDEF, BUILD_VECTOR and
+  // single-element INSERT_VECTOR_ELT operands can be simplified to one big
+  // BUILD_VECTOR.
   // FIXME: Add support for SCALAR_TO_VECTOR as well.
   EVT SVT = VT.getScalarType();
   SmallVector<SDValue, 16> Elts;
@@ -6414,6 +6417,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
       Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
     else if (Op.getOpcode() == ISD::BUILD_VECTOR)
       Elts.append(Op->op_begin(), Op->op_end());
+    else if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+             OpVT.getVectorNumElements() == 1 &&
+             isNullConstant(Op.getOperand(2)))
+      Elts.push_back(Op.getOperand(1));
     else
       return SDValue();
   }
@@ -8783,7 +8790,7 @@ static SDValue getMemcpyLoadsAndStores(
       if (Value.getNode()) {
         Store = DAG.getStore(
             Chain, dl, Value,
-            DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+            DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)),
             DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
         OutChains.push_back(Store);
       }
@@ -8799,7 +8806,7 @@ static SDValue getMemcpyLoadsAndStores(
       assert(NVT.bitsGE(VT));
 
       bool isDereferenceable =
-        SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+          SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
       MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
       if (isDereferenceable)
         SrcMMOFlags |= MachineMemOperand::MODereferenceable;
@@ -8808,14 +8815,14 @@ static SDValue getMemcpyLoadsAndStores(
 
       Value = DAG.getExtLoad(
           ISD::EXTLOAD, dl, NVT, Chain,
-          DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
+          DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)),
           SrcPtrInfo.getWithOffset(SrcOff), VT,
           commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
       OutLoadChains.push_back(Value.getValue(1));
 
       Store = DAG.getTruncStore(
           Chain, dl, Value,
-          DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+          DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)),
           DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
       OutStoreChains.push_back(Store);
     }
@@ -8945,14 +8952,14 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
     SDValue Value;
 
     bool isDereferenceable =
-      SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+        SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
     MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
     if (isDereferenceable)
       SrcMMOFlags |= MachineMemOperand::MODereferenceable;
 
     Value = DAG.getLoad(
         VT, dl, Chain,
-        DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
+        DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)),
         SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
@@ -8967,7 +8974,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
 
     Store = DAG.getStore(
         Chain, dl, LoadValues[i],
-        DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+        DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)),
         DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
     OutChains.push_back(Store);
     DstOff += VTSize;
@@ -9099,7 +9106,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
     assert(Value.getValueType() == VT && "Value with wrong type.");
     SDValue Store = DAG.getStore(
         Chain, dl, Value,
-        DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+        DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)),
         DstPtrInfo.getWithOffset(DstOff), Alignment,
         isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
         NewAAInfo);
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 3842b1a..6a9ef2e 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -741,7 +741,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       assert(!CI2->isZero() && "And zero handled above");
       if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
         // If and'ing the address of a global with a constant, fold it.
-        if (CE1->getOpcode() == Instruction::PtrToInt &&
+        if ((CE1->getOpcode() == Instruction::PtrToInt ||
+             CE1->getOpcode() == Instruction::PtrToAddr) &&
             isa<GlobalValue>(CE1->getOperand(0))) {
           GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
 
diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp
index eef8a21..6b7b4f1 100644
--- a/llvm/lib/ObjCopy/ConfigManager.cpp
+++ b/llvm/lib/ObjCopy/ConfigManager.cpp
@@ -122,14 +122,14 @@ ConfigManager::getDXContainerConfig() const {
   if (!Common.AddGnuDebugLink.empty() || !Common.SplitDWO.empty() ||
       !Common.AllocSectionsPrefix.empty() ||
       Common.DiscardMode != DiscardType::None || !Common.AddSection.empty() ||
-      !Common.DumpSection.empty() || !Common.KeepSection.empty() ||
-      !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
-      !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() ||
-      Common.ExtractDWO || Common.OnlyKeepDebug || Common.StripAllGNU ||
-      Common.StripDWO || Common.StripDebug || Common.StripNonAlloc ||
-      Common.StripSections || Common.StripUnneeded ||
-      Common.DecompressDebugSections || Common.GapFill != 0 ||
-      Common.PadTo != 0 || Common.ChangeSectionLMAValAll != 0 ||
+      !Common.KeepSection.empty() || !Common.SectionsToRename.empty() ||
+      !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() ||
+      !Common.SetSectionType.empty() || Common.ExtractDWO ||
+      Common.OnlyKeepDebug || Common.StripAllGNU || Common.StripDWO ||
+      Common.StripDebug || Common.StripNonAlloc || Common.StripSections ||
+      Common.StripUnneeded || Common.DecompressDebugSections ||
+      Common.GapFill != 0 || Common.PadTo != 0 ||
+      Common.ChangeSectionLMAValAll != 0 ||
       !Common.ChangeSectionAddress.empty()) {
     return createStringError(llvm::errc::invalid_argument,
                              "option is not supported for DXContainer");
diff --git a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp
index d7f3c0d..95ab3d9 100644
--- a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp
+++ b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp
@@ -9,8 +9,10 @@
 #include "llvm/ObjCopy/DXContainer/DXContainerObjcopy.h"
 #include "DXContainerReader.h"
 #include "DXContainerWriter.h"
+#include "llvm/BinaryFormat/DXContainer.h"
 #include "llvm/ObjCopy/CommonConfig.h"
 #include "llvm/ObjCopy/DXContainer/DXContainerConfig.h"
+#include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
@@ -42,7 +44,47 @@ static Error extractPartAsObject(StringRef PartName, StringRef OutFilename,
                          "part '%s' not found", PartName.str().c_str());
 }
 
+static Error dumpPartToFile(StringRef PartName, StringRef Filename,
+                            StringRef InputFilename, Object &Obj) {
+  auto PartIter = llvm::find_if(
+      Obj.Parts, [&PartName](const Part &P) { return P.Name == PartName; });
+  if (PartIter == Obj.Parts.end())
+    return createFileError(Filename,
+                           std::make_error_code(std::errc::invalid_argument),
+                           "part '%s' not found", PartName.str().c_str());
+  ArrayRef<uint8_t> Contents = PartIter->Data;
+  // The DXContainer format is a bit odd because the part-specific headers are
+  // contained inside the part data itself. For parts that contain LLVM bitcode
+  // when we dump the part we want to skip the part-specific header so that we
+  // get a valid .bc file that we can inspect. All the data contained inside the
+  // program header is pulled out of the bitcode, so the header can be
+  // reconstructed if needed from the bitcode itself. More comprehensive
+  // documentation on the DXContainer format can be found at
+  // https://llvm.org/docs/DirectX/DXContainer.html.
+
+  if (PartName == "DXIL" || PartName == "STAT")
+    Contents = Contents.drop_front(sizeof(llvm::dxbc::ProgramHeader));
+  if (Contents.empty())
+    return createFileError(Filename, object_error::parse_failed,
+                           "part '%s' is empty", PartName.str().c_str());
+  Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+      FileOutputBuffer::create(Filename, Contents.size());
+  if (!BufferOrErr)
+    return createFileError(Filename, BufferOrErr.takeError());
+  std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+  llvm::copy(Contents, Buf->getBufferStart());
+  if (Error E = Buf->commit())
+    return createFileError(Filename, std::move(E));
+  return Error::success();
+}
+
 static Error handleArgs(const CommonConfig &Config, Object &Obj) {
+  for (StringRef Flag : Config.DumpSection) {
+    auto [SecName, FileName] = Flag.split("=");
+    if (Error E = dumpPartToFile(SecName, FileName, Config.InputFilename, Obj))
+      return E;
+  }
+
   // Extract all sections before any modifications.
   for (StringRef Flag : Config.ExtractSection) {
     StringRef SectionName;
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 53699ce0..f256e7b 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -837,7 +837,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
     Version = Data.getU8(Cur);
     if (!Cur)
       break;
-    if (Version < 2 || Version > 3)
+    if (Version < 2 || Version > 4)
       return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " +
                          Twine(static_cast<int>(Version)));
     Feature = Data.getU8(Cur); // Feature byte
@@ -852,6 +852,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
                          "callsite offsets feature is enabled: version = " +
                          Twine(static_cast<int>(Version)) +
                          " feature = " + Twine(static_cast<int>(Feature)));
+    if (FeatEnable.BBHash && Version < 4)
+      return createError("version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when "
+                         "basic block hash feature is enabled: version = " +
+                         Twine(static_cast<int>(Version)) +
+                         " feature = " + Twine(static_cast<int>(Feature)));
     uint32_t NumBlocksInBBRange = 0;
     uint32_t NumBBRanges = 1;
     typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0;
@@ -907,6 +912,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
           uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) +
                           LastCallsiteEndOffset;
           uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+          uint64_t Hash = FeatEnable.BBHash ? Data.getU64(Cur) : 0;
           Expected<BBAddrMap::BBEntry::Metadata> MetadataOrErr =
               BBAddrMap::BBEntry::Metadata::decode(MD);
           if (!MetadataOrErr) {
@@ -914,7 +920,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
             break;
           }
           BBEntries.push_back({ID, Offset + PrevBBEndOffset, Size,
-                               *MetadataOrErr, CallsiteEndOffsets});
+                               *MetadataOrErr, CallsiteEndOffsets, Hash});
           PrevBBEndOffset += Offset + Size;
         }
         TotalNumBlocks += BBEntries.size();
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index faeeab3..8b75fbe 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1465,7 +1465,7 @@ void ELFState<ELFT>::writeSectionContent(
   for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) {
     // Write version and feature values.
     if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) {
-      if (E.Version > 3)
+      if (E.Version > 4)
         WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: "
                              << static_cast<int>(E.Version)
                              << "; encoding using the most recent version";
@@ -1526,6 +1526,12 @@ void ELFState<ELFT>::writeSectionContent(
         }
         SHeader.sh_size += CBA.writeULEB128(BBE.Size);
         SHeader.sh_size += CBA.writeULEB128(BBE.Metadata);
+        if (FeatureOrErr->BBHash || BBE.Hash.has_value()) {
+          uint64_t Hash =
+              BBE.Hash.has_value() ? BBE.Hash.value() : llvm::yaml::Hex64(0);
+          CBA.write<uint64_t>(Hash, ELFT::Endianness);
+          SHeader.sh_size += 8;
+        }
       }
     }
     if (!PGOAnalyses)
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index d9cce1e..421d6603 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1887,6 +1887,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping(
   IO.mapRequired("Size", E.Size);
   IO.mapRequired("Metadata", E.Metadata);
   IO.mapOptional("CallsiteEndOffsets", E.CallsiteEndOffsets);
+  IO.mapOptional("Hash", E.Hash);
 }
 
 void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping(
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 80fd485..549c418 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -55,12 +55,20 @@ Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern,
   return Error::success();
 }
 
+void SpecialCaseList::RegexMatcher::preprocess(bool BySize) {
+  if (BySize) {
+    llvm::stable_sort(RegExes, [](const Reg &A, const Reg &B) {
+      return A.Name.size() < B.Name.size();
+    });
+  }
+}
+
 void SpecialCaseList::RegexMatcher::match(
     StringRef Query,
     llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
   for (const auto &R : reverse(RegExes))
     if (R.Rg.match(Query))
-      Cb(R.Name, R.LineNo);
+      return Cb(R.Name, R.LineNo);
 }
 
 Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
@@ -75,12 +83,20 @@ Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
   return Error::success();
 }
 
+void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
+  if (BySize) {
+    llvm::stable_sort(Globs, [](const Glob &A, const Glob &B) {
+      return A.Name.size() < B.Name.size();
+    });
+  }
+}
+
 void SpecialCaseList::GlobMatcher::match(
     StringRef Query,
     llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
   for (const auto &G : reverse(Globs))
     if (G.Pattern.match(Query))
-      Cb(G.Name, G.LineNo);
+      return Cb(G.Name, G.LineNo);
 }
 
 SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
@@ -91,6 +107,14 @@ SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
     M.emplace<RegexMatcher>();
 }
 
+Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
+  return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
+}
+
+LLVM_ABI void SpecialCaseList::Matcher::preprocess(bool BySize) {
+  return std::visit([&](auto &V) { return V.preprocess(BySize); }, M);
+}
+
 void SpecialCaseList::Matcher::match(
     StringRef Query,
     llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
@@ -99,10 +123,6 @@ void SpecialCaseList::Matcher::match(
   return std::visit([&](auto &V) { return V.match(Query, Cb); }, M);
 }
 
-Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
-  return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
-}
-
 // TODO: Refactor this to return Expected<...>
 std::unique_ptr<SpecialCaseList>
 SpecialCaseList::create(const std::vector<std::string> &Paths,
@@ -141,7 +161,7 @@ bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
       return false;
     }
     std::string ParseError;
-    if (!parse(i, FileOrErr.get().get(), ParseError)) {
+    if (!parse(i, FileOrErr.get().get(), ParseError, /*OrderBySize=*/false)) {
       Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
       return false;
     }
@@ -149,9 +169,9 @@ bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
   return true;
 }
 
-bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
-                                     std::string &Error) {
-  if (!parse(0, MB, Error))
+bool SpecialCaseList::createInternal(const MemoryBuffer *MB, std::string &Error,
+                                     bool OrderBySize) {
+  if (!parse(0, MB, Error, OrderBySize))
     return false;
   return true;
 }
@@ -174,7 +194,7 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
 }
 
 bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
-                            std::string &Error) {
+                            std::string &Error, bool OrderBySize) {
   unsigned long long Version = 2;
 
   StringRef Header = MB->getBuffer();
@@ -246,6 +266,10 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
       return false;
     }
   }
+
+  for (Section &S : Sections)
+    S.preprocess(OrderBySize);
+
   return true;
 }
 
@@ -283,6 +307,13 @@ SpecialCaseList::Section::findMatcher(StringRef Prefix,
   return &II->second;
 }
 
+LLVM_ABI void SpecialCaseList::Section::preprocess(bool OrderBySize) {
+  SectionMatcher.preprocess(false);
+  for (auto &[K1, E] : Entries)
+    for (auto &[K2, M] : E)
+      M.preprocess(OrderBySize);
+}
+
 unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix,
                                                 StringRef Query,
                                                 StringRef Category) const {
diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp
index 804ff07..41f5187 100644
--- a/llvm/lib/Support/TextEncoding.cpp
+++ b/llvm/lib/Support/TextEncoding.cpp
@@ -54,9 +54,9 @@ static std::optional<TextEncoding> getKnownEncoding(StringRef Name) {
   return std::nullopt;
 }
 
-LLVM_ATTRIBUTE_UNUSED static void
-HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength,
-               SmallVectorImpl<char> &Result) {
+[[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output,
+                                            size_t &OutputLength,
+                                            SmallVectorImpl<char> &Result) {
   // No space left in output buffer. Double the size of the underlying
   // memory in the SmallVectorImpl, adjust pointer and length and continue
   // the conversion.
diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
index 8d66348..6f8e091 100644
--- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp
+++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
@@ -476,7 +476,7 @@ nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) {
       std::min(NormalizedName.size(), UnicodeNameToCodepointLargestNameSize) +
       1;
 
-  LLVM_ATTRIBUTE_UNUSED static std::size_t Rows =
+  [[maybe_unused]] static std::size_t Rows =
       UnicodeNameToCodepointLargestNameSize + 1;
 
   std::vector<char> Distances(
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 2c3870c..636d4f8a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -8217,6 +8217,8 @@ bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) {
       Spec = AArch64::S_GOTPCREL;
     else if (Identifier == "plt")
       Spec = AArch64::S_PLT;
+    else if (Identifier == "funcinit")
+      Spec = AArch64::S_FUNCINIT;
   }
   if (Spec == AArch64::S_None)
     return Error(Loc, "invalid relocation specifier");
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a388216..892b8da 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -232,6 +232,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(const MCFixup &Fixup,
       }
       if (RefKind == AArch64::S_AUTH || RefKind == AArch64::S_AUTHADDR)
         return ELF::R_AARCH64_AUTH_ABS64;
+      if (RefKind == AArch64::S_FUNCINIT)
+        return ELF::R_AARCH64_FUNCINIT64;
       return ELF::R_AARCH64_ABS64;
     }
     case AArch64::fixup_aarch64_add_imm12:
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 2b5cf34..bc090c6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -40,6 +40,7 @@ const MCAsmInfo::AtSpecifier ELFAtSpecifiers[] = {
     {AArch64::S_GOT, "GOT"},
     {AArch64::S_GOTPCREL, "GOTPCREL"},
     {AArch64::S_PLT, "PLT"},
+    {AArch64::S_FUNCINIT, "FUNCINIT"},
 };
 
 const MCAsmInfo::AtSpecifier MachOAtSpecifiers[] = {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 0dfa61b..f2acff5 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -164,6 +164,7 @@ enum {
   // ELF relocation specifiers in data directives:
   S_PLT          = 0x400,
   S_GOTPCREL,
+  S_FUNCINIT,
 
   // Mach-O @ relocation specifiers:
   S_MACHO_GOT,
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index dd6fa16..d71f728 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -130,6 +130,12 @@ SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI)
   if (auto *CalledFunction = CB.getCalledFunction())
     CalledFn = SMEAttrs(*CalledFunction, TLI);
 
+  // An `invoke` of an agnostic ZA function may not return normally (it may
+  // resume in an exception block). In this case, it acts like a private ZA
+  // callee and may require a ZA save to be set up before it is called.
+  if (isa<InvokeInst>(CB))
+    CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
+
   // FIXME: We probably should not allow SME attributes on direct calls but
   // clang duplicates streaming mode attributes at each callsite.
   assert((IsIndirect ||
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 80e985d..a2841c11 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18168,7 +18168,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
   return CacheLineAlign;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
   assert(N->getOpcode() == ISD::CopyFromReg);
   do {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a44a247..d516330 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -865,22 +865,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       return;
     }
 
-    if (DestReg == AMDGPU::VCC_LO) {
-      if (AMDGPU::SReg_32RegClass.contains(SrcReg)) {
-        BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO)
-          .addReg(SrcReg, getKillRegState(KillSrc));
-      } else {
+    if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
+      if (DestReg == AMDGPU::VCC_LO) {
         // FIXME: Hack until VReg_1 removed.
         assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
         BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
-          .addImm(0)
-          .addReg(SrcReg, getKillRegState(KillSrc));
+            .addImm(0)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+        return;
       }
 
-      return;
-    }
-
-    if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
@@ -898,22 +892,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       return;
     }
 
-    if (DestReg == AMDGPU::VCC) {
-      if (AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
-        BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
-          .addReg(SrcReg, getKillRegState(KillSrc));
-      } else {
+    if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
+      if (DestReg == AMDGPU::VCC) {
         // FIXME: Hack until VReg_1 removed.
         assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
         BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
-          .addImm(0)
-          .addReg(SrcReg, getKillRegState(KillSrc));
+            .addImm(0)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+        return;
       }
 
-      return;
-    }
-
-    if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
index 39e651d..8945ec3 100644
--- a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
+++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
@@ -166,7 +166,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
 }
 
 // TODO
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 static DecodeStatus DecodesFPR128RegisterClass(MCInst &Inst, uint64_t RegNo,
                                                uint64_t Address,
                                                const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 974f653..5f180d6 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -667,11 +667,10 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
   return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable);
 }
 
-LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily.
-    static DecodeStatus
-    DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo,
-                              uint64_t /*Address*/,
-                              const MCDisassembler *Decoder) {
+[[maybe_unused]] // Suppress warning temporarily.
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                              uint64_t /*Address*/,
+                                              const MCDisassembler *Decoder) {
   static const MCPhysReg HvxVQRDecoderTable[] = {
       Hexagon::VQ0,  Hexagon::VQ1,  Hexagon::VQ2,  Hexagon::VQ3,
       Hexagon::VQ4,  Hexagon::VQ5,  Hexagon::VQ6,  Hexagon::VQ7};
diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index 5dde47a..a3296e0 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -419,8 +419,8 @@ namespace {
 
   using HCE = HexagonConstExtenders;
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const OffsetRange &OR) {
     if (OR.Min > OR.Max)
       OS << '!';
     OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align)
@@ -435,8 +435,8 @@ namespace {
     const HexagonRegisterInfo &HRI;
   };
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &P) {
     if (P.Rs.Reg != 0)
       OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub);
     else
@@ -451,8 +451,8 @@ namespace {
     const HexagonRegisterInfo &HRI;
   };
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const PrintExpr &P) {
     OS << "## " << (P.Ex.Neg ? "- " : "+ ");
     if (P.Ex.Rs.Reg != 0)
       OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub);
@@ -469,15 +469,15 @@ namespace {
     const HexagonRegisterInfo &HRI;
   };
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const PrintInit &P) {
     OS << '[' << P.ExtI.first << ", "
        << PrintExpr(P.ExtI.second, P.HRI) << ']';
     return OS;
   }
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtDesc &ED) {
     assert(ED.OpNum != -1u);
     const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent();
     const MachineFunction &MF = *MBB.getParent();
@@ -493,8 +493,8 @@ namespace {
     return OS;
   }
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtRoot &ER) {
     switch (ER.Kind) {
       case MachineOperand::MO_Immediate:
         OS << "imm:" << ER.V.ImmVal;
@@ -527,8 +527,8 @@ namespace {
     return OS;
   }
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtValue &EV) {
     OS << HCE::ExtRoot(EV) << "  off:" << EV.Offset;
     return OS;
   }
@@ -540,8 +540,8 @@ namespace {
     const HexagonRegisterInfo &HRI;
   };
 
-  LLVM_ATTRIBUTE_UNUSED
-  raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) {
+  [[maybe_unused]]
+  raw_ostream &operator<<(raw_ostream &OS, const PrintIMap &P) {
     OS << "{\n";
     for (const std::pair<const HCE::ExtenderInit, HCE::IndexList> &Q : P.IMap) {
       OS << "  " << PrintInit(Q.first, P.HRI) << " -> {";
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 4d96cfa..c7a4f68 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -789,7 +789,7 @@ struct ShuffleMask {
   }
 };
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
   SM.print(OS);
   return OS;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 87d052b..e4c0a16 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -364,7 +364,7 @@ private:
   const HexagonVectorCombine &HVC;
 };
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
   OS << "Inst: " << AI.Inst << "  " << *AI.Inst << '\n';
   OS << "Addr: " << *AI.Addr << '\n';
@@ -375,7 +375,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
   return OS;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
   OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");
   OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';
@@ -394,7 +394,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
   return OS;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS,
                         const AlignVectors::ByteSpan::Block &B) {
   OS << "  @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";
@@ -408,7 +408,7 @@ raw_ostream &operator<<(raw_ostream &OS,
   return OS;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
   OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
   for (const AlignVectors::ByteSpan::Block &B : BS)
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index fa8ae60..2ff5843 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -111,7 +111,7 @@ namespace {
    friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
   };
 
-  LLVM_ATTRIBUTE_UNUSED
+  [[maybe_unused]]
   raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
     const ChainOfDependences &CD = D.Chain;
     int ChainSize = CD.size();
@@ -144,7 +144,7 @@ namespace {
     bool isDefined() { return Inst2Replace != nullptr; }
   };
 
-  LLVM_ATTRIBUTE_UNUSED
+  [[maybe_unused]]
   raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
     OS << "** ReuseValue ***\n";
     OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index ca98269..e3094b4 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -275,7 +275,7 @@ namespace HexagonII {
     INST_ICLASS_ALU32_3   = 0xf0000000
   };
 
-  LLVM_ATTRIBUTE_UNUSED
+  [[maybe_unused]]
   static unsigned getMemAccessSizeInBytes(MemAccessSize S) {
     switch (S) {
       case ByteAccess:        return 1;
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 66ca436..de433e4 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -1100,6 +1100,12 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
     --NumOps;
   if (RISCVII::hasRoundModeOp(TSFlags))
     --NumOps;
+  if (RISCVII::hasTWidenOp(TSFlags))
+    --NumOps;
+  if (RISCVII::hasTMOp(TSFlags))
+    --NumOps;
+  if (RISCVII::hasTKOp(TSFlags))
+    --NumOps;
 
   bool hasVLOutput = RISCVInstrInfo::isFaultOnlyFirstLoad(*MI);
   for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 437022f..9a6afa1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -516,6 +516,44 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
               CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
 }
 
+void RISCVDAGToDAGISel::selectXSfmmVSET(SDNode *Node) {
+  if (!Subtarget->hasVendorXSfmmbase())
+    return;
+
+  assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
+
+  SDLoc DL(Node);
+  MVT XLenVT = Subtarget->getXLenVT();
+
+  unsigned IntNo = Node->getConstantOperandVal(0);
+
+  assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
+          IntNo == Intrinsic::riscv_sf_vsettm ||
+          IntNo == Intrinsic::riscv_sf_vsettk) &&
+         "Unexpected XSfmm vset intrinsic");
+
+  unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
+  unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
+  unsigned PseudoOpCode =
+      IntNo == Intrinsic::riscv_sf_vsettnt  ? RISCV::PseudoSF_VSETTNT
+      : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
+                                            : RISCV::PseudoSF_VSETTK;
+
+  if (IntNo == Intrinsic::riscv_sf_vsettnt) {
+    unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
+    SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
+
+    ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
+                                             Node->getOperand(1), VTypeIOp));
+  } else {
+    SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
+    SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
+    ReplaceNode(Node,
+                CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
+                                       Node->getOperand(1), Log2SEW, TWiden));
+  }
+}
+
 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
   MVT VT = Node->getSimpleValueType(0);
   unsigned Opcode = Node->getOpcode();
@@ -847,6 +885,11 @@ bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
   return true;
 }
 
+static Register getTileReg(uint64_t TileNum) {
+  assert(TileNum <= 15 && "Invalid tile number");
+  return RISCV::T0 + TileNum;
+}
+
 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
   if (!Subtarget->hasVInstructions())
     return;
@@ -2035,6 +2078,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     case Intrinsic::riscv_vsetvli:
     case Intrinsic::riscv_vsetvlimax:
       return selectVSETVLI(Node);
+    case Intrinsic::riscv_sf_vsettnt:
+    case Intrinsic::riscv_sf_vsettm:
+    case Intrinsic::riscv_sf_vsettk:
+      return selectXSfmmVSET(Node);
     }
     break;
   }
@@ -2458,6 +2505,142 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     case Intrinsic::riscv_sf_vc_i_se:
       selectSF_VC_X_SE(Node);
       return;
+    case Intrinsic::riscv_sf_vlte8:
+    case Intrinsic::riscv_sf_vlte16:
+    case Intrinsic::riscv_sf_vlte32:
+    case Intrinsic::riscv_sf_vlte64: {
+      unsigned Log2SEW;
+      unsigned PseudoInst;
+      switch (IntNo) {
+      case Intrinsic::riscv_sf_vlte8:
+        PseudoInst = RISCV::PseudoSF_VLTE8;
+        Log2SEW = 3;
+        break;
+      case Intrinsic::riscv_sf_vlte16:
+        PseudoInst = RISCV::PseudoSF_VLTE16;
+        Log2SEW = 4;
+        break;
+      case Intrinsic::riscv_sf_vlte32:
+        PseudoInst = RISCV::PseudoSF_VLTE32;
+        Log2SEW = 5;
+        break;
+      case Intrinsic::riscv_sf_vlte64:
+        PseudoInst = RISCV::PseudoSF_VLTE64;
+        Log2SEW = 6;
+        break;
+      }
+
+      SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
+      SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
+      SDValue Operands[] = {Node->getOperand(2),
+                            Node->getOperand(3),
+                            Node->getOperand(4),
+                            SEWOp,
+                            TWidenOp,
+                            Node->getOperand(0)};
+
+      MachineSDNode *TileLoad =
+          CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
+      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
+        CurDAG->setNodeMemRefs(TileLoad, {MemOp->getMemOperand()});
+
+      ReplaceNode(Node, TileLoad);
+      return;
+    }
+    case Intrinsic::riscv_sf_mm_s_s:
+    case Intrinsic::riscv_sf_mm_s_u:
+    case Intrinsic::riscv_sf_mm_u_s:
+    case Intrinsic::riscv_sf_mm_u_u:
+    case Intrinsic::riscv_sf_mm_e5m2_e5m2:
+    case Intrinsic::riscv_sf_mm_e5m2_e4m3:
+    case Intrinsic::riscv_sf_mm_e4m3_e5m2:
+    case Intrinsic::riscv_sf_mm_e4m3_e4m3:
+    case Intrinsic::riscv_sf_mm_f_f: {
+      bool HasFRM = false;
+      unsigned PseudoInst;
+      switch (IntNo) {
+      case Intrinsic::riscv_sf_mm_s_s:
+        PseudoInst = RISCV::PseudoSF_MM_S_S;
+        break;
+      case Intrinsic::riscv_sf_mm_s_u:
+        PseudoInst = RISCV::PseudoSF_MM_S_U;
+        break;
+      case Intrinsic::riscv_sf_mm_u_s:
+        PseudoInst = RISCV::PseudoSF_MM_U_S;
+        break;
+      case Intrinsic::riscv_sf_mm_u_u:
+        PseudoInst = RISCV::PseudoSF_MM_U_U;
+        break;
+      case Intrinsic::riscv_sf_mm_e5m2_e5m2:
+        PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
+        HasFRM = true;
+        break;
+      case Intrinsic::riscv_sf_mm_e5m2_e4m3:
+        PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
+        HasFRM = true;
+        break;
+      case Intrinsic::riscv_sf_mm_e4m3_e5m2:
+        PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
+        HasFRM = true;
+        break;
+      case Intrinsic::riscv_sf_mm_e4m3_e4m3:
+        PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
+        HasFRM = true;
+        break;
+      case Intrinsic::riscv_sf_mm_f_f:
+        if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
+          PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
+        else
+          PseudoInst = RISCV::PseudoSF_MM_F_F;
+        HasFRM = true;
+        break;
+      }
+      uint64_t TileNum = Node->getConstantOperandVal(2);
+      SDValue Op1 = Node->getOperand(3);
+      SDValue Op2 = Node->getOperand(4);
+      MVT VT = Op1->getSimpleValueType(0);
+      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
+      SDValue TmOp = Node->getOperand(5);
+      SDValue TnOp = Node->getOperand(6);
+      SDValue TkOp = Node->getOperand(7);
+      SDValue TWidenOp = Node->getOperand(8);
+      SDValue Chain = Node->getOperand(0);
+
+      // sf.mm.f.f with sew=32, twiden=2 is invalid
+      if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
+          TWidenOp->getAsZExtVal() == 2)
+        reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
+
+      SmallVector<SDValue, 10> Operands(
+          {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
+      if (HasFRM)
+        Operands.push_back(
+            CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
+      Operands.append({TmOp, TnOp, TkOp,
+                       CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
+                       Chain});
+
+      auto *NewNode =
+          CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
+
+      ReplaceNode(Node, NewNode);
+      return;
+    }
+    case Intrinsic::riscv_sf_vtzero_t: {
+      uint64_t TileNum = Node->getConstantOperandVal(2);
+      SDValue Tm = Node->getOperand(3);
+      SDValue Tn = Node->getOperand(4);
+      SDValue Log2SEW = Node->getOperand(5);
+      SDValue TWiden = Node->getOperand(6);
+      SDValue Chain = Node->getOperand(0);
+      auto *NewNode = CurDAG->getMachineNode(
+          RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
+          {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
+           TWiden, Chain});
+
+      ReplaceNode(Node, NewNode);
+      return;
+    }
     }
     break;
   }
@@ -3353,14 +3536,20 @@ bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
           0);
       return true;
     }
-    // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
-    // LHS is equal to the RHS and non-zero otherwise.
+    // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
+    // if the LHS is equal to the RHS and non-zero otherwise.
     if (isInt<12>(CVal) || CVal == 2048) {
-      Val = SDValue(
-          CurDAG->getMachineNode(
-              RISCV::ADDI, DL, N->getValueType(0), LHS,
-              CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
-          0);
+      unsigned Opc = RISCV::ADDI;
+      if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+          cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
+        Opc = RISCV::ADDIW;
+        LHS = LHS.getOperand(0);
+      }
+
+      Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
+                                           CurDAG->getSignedTargetConstant(
+                                               -CVal, DL, N->getValueType(0))),
+                    0);
       return true;
     }
     if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index f03b44c..19ee103 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -165,6 +165,7 @@ public:
   void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered);
 
   void selectVSETVLI(SDNode *Node);
+  void selectXSfmmVSET(SDNode *Node);
 
   void selectSF_VC_X_SE(SDNode *Node);
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 5b06303..fee1d15 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -177,7 +177,7 @@ def EltDepsVL        : EltDeps<vl=1, mask=0>;
 def EltDepsMask      : EltDeps<vl=0, mask=1>;
 def EltDepsVLMask    : EltDeps<vl=1, mask=1>;
 
-class EEW <bits<2> val> {
+class EEW<bits<2> val> {
   bits<2> Value = val;
 }
 def EEW1     : EEW<0>;
@@ -185,6 +185,13 @@ def EEWSEWx1 : EEW<1>;
 def EEWSEWx2 : EEW<2>;
 def EEWSEWx4 : EEW<3>;
 
+class AltFmtType<bits<2> val> {
+  bits<2> Value = val;
+}
+def DONT_CARE_ALTFMT : AltFmtType<0>;
+def IS_NOT_ALTFMT    : AltFmtType<1>;
+def IS_ALTFMT        : AltFmtType<2>;
+
 class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
                    list<dag> pattern, InstFormat format> : Instruction {
   let Namespace = "RISCV";
@@ -271,8 +278,8 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
   // 0 -> Don't care about altfmt bit in VTYPE.
   // 1 -> Is not altfmt.
   // 2 -> Is altfmt(BF16).
-  bits<2> AltFmtType = 0;
-  let TSFlags{28-27} = AltFmtType;
+  AltFmtType AltFmtType = DONT_CARE_ALTFMT;
+  let TSFlags{28-27} = AltFmtType.Value;
 
   // XSfmmbase
   bit HasTWidenOp = 0;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index c1b23af..65865ce 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -128,9 +128,6 @@ defvar TAIL_AGNOSTIC = 1;
 defvar TU_MU = 0;
 defvar TA_MU = 1;
 defvar TA_MA = 3;
-defvar DONT_CARE_ALTFMT = 0;
-defvar IS_NOT_ALTFMT = 1;
-defvar IS_ALTFMT = 2;
 
 //===----------------------------------------------------------------------===//
 // Utilities.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
index 5ad22e6b..d77a44a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
@@ -418,3 +418,54 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
                             ixlenimm:$twiden)>;
   def PseudoSF_VTDISCARD : RISCVVPseudo<(outs), (ins), []>;
 }
+
+class VPatXSfmmTileStore<string intrinsic_name,
+                         string inst_name,
+                         int log2sew> :
+  Pat<(!cast<Intrinsic>(intrinsic_name)
+       (XLenVT GPR:$rs2),
+       (XLenVT GPR:$rs1),
+       (XLenVT AVL:$tn)),
+      (!cast<Instruction>(inst_name)
+       (XLenVT GPR:$rs2),
+       (XLenVT GPR:$rs1),
+       GPR:$tn, log2sew, 1)>;
+
+class VPatXSfmmTileMove_T_V<string intrinsic_name,
+                            string inst_name,
+                            ValueType reg_type,
+                            int log2sew> :
+  Pat<(!cast<Intrinsic>(intrinsic_name)
+                    (XLenVT GPR:$rs1),
+                    (reg_type VRM8:$vs2),
+                    (XLenVT AVL:$atn)),
+      (!cast<Instruction>(inst_name)
+       (XLenVT GPR:$rs1),
+       (reg_type VRM8:$vs2),
+       GPR:$atn, log2sew, 1)>;
+
+class VPatXSfmmTileMove_V_T<string intrinsic_name,
+                            string inst_name,
+                            ValueType result_type,
+                            int log2sew> :
+  Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+                    (XLenVT GPR:$rs1),
+                    (XLenVT AVL:$atn))),
+      (!cast<Instruction>(inst_name)
+       (XLenVT GPR:$rs1),
+       GPR:$atn, log2sew, 1)>;
+
+class VPatXSfmmVTDiscard<string intrinsic_name,
+                         string inst_name> :
+  Pat<(!cast<Intrinsic>(intrinsic_name)),
+      (!cast<Instruction>(inst_name))>;
+
+foreach eew = [8, 16, 32, 64] in
+  def : VPatXSfmmTileStore<"int_riscv_sf_vste" # eew, "PseudoSF_VSTE" # eew, !logtwo(eew)>;
+
+foreach vti = [VI8M8, VI16M8, VI32M8, VI64M8, VF16M8, VF32M8, VF64M8, VBF16M8] in {
+  def : VPatXSfmmTileMove_T_V<"int_riscv_sf_vtmv_t_v", "PseudoSF_VTMV_T_V", vti.Vector, vti.Log2SEW>;
+  def : VPatXSfmmTileMove_V_T<"int_riscv_sf_vtmv_v_t", "PseudoSF_VTMV_V_T", vti.Vector, vti.Log2SEW>;
+}
+
+def : VPatXSfmmVTDiscard<"int_riscv_sf_vtdiscard", "PseudoSF_VTDISCARD">;
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 96ad5c6..0a8838c 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -156,13 +156,13 @@ FunctionPass *llvm::createRISCVVLOptimizerPass() {
   return new RISCVVLOptimizer();
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
   OI.print(OS);
   return OS;
 }
 
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
 static raw_ostream &operator<<(raw_ostream &OS,
                                const std::optional<OperandInfo> &OI) {
   if (OI)
diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
index b765fec..56a6168 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -139,8 +139,8 @@ void SPIRVAsmPrinter::emitEndOfAsmFile(Module &M) {
 // anymore.
 void SPIRVAsmPrinter::cleanUp(Module &M) {
   // Verifier disallows uses of intrinsic global variables.
-  for (StringRef GVName : {"llvm.global_ctors", "llvm.global_dtors",
-                           "llvm.used", "llvm.compiler.used"}) {
+  for (StringRef GVName :
+       {"llvm.global_ctors", "llvm.global_dtors", "llvm.used"}) {
     if (GlobalVariable *GV = M.getNamedGlobal(GVName))
       GV->setName("");
   }
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index e16c8f0..c6c6182 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -17,6 +17,7 @@
 #include "SPIRVTargetMachine.h"
 #include "SPIRVUtils.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstVisitor.h"
@@ -2028,9 +2029,13 @@ Instruction *SPIRVEmitIntrinsics::visitUnreachableInst(UnreachableInst &I) {
 
 void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV,
                                              IRBuilder<> &B) {
-  // Skip special artifical variable llvm.global.annotations.
-  if (GV.getName() == "llvm.global.annotations")
+  // Skip special artificial variables.
+  static const StringSet<> ArtificialGlobals{"llvm.global.annotations",
+                                             "llvm.compiler.used"};
+
+  if (ArtificialGlobals.contains(GV.getName()))
     return;
+
   Constant *Init = nullptr;
   if (hasInitializer(&GV)) {
     // Deduce element type and store results in Global Registry.
diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h
index a0cf881..5a06ea3 100644
--- a/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/llvm/lib/Target/SystemZ/SystemZ.h
@@ -24,6 +24,7 @@ class SystemZTargetMachine;
 
 namespace SystemZ {
 // Condition-code mask values.
+const unsigned CCMASK_NONE = 0;
 const unsigned CCMASK_0 = 1 << 3;
 const unsigned CCMASK_1 = 1 << 2;
 const unsigned CCMASK_2 = 1 << 1;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3b7d11a..de28faf 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "SystemZConstantPoolValue.h"
 #include "SystemZMachineFunctionInfo.h"
 #include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -24,6 +25,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsS390.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
@@ -1514,6 +1516,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
     default:
       break;
     }
+  } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
+    if (StringRef("{@cc}").compare(Constraint) == 0)
+      return C_Other;
   }
   return TargetLowering::getConstraintType(Constraint);
 }
@@ -1707,6 +1712,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
       return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
                                  SystemZMC::VR128Regs, 32);
     }
+    if (Constraint[1] == '@') {
+      if (StringRef("{@cc}").compare(Constraint) == 0)
+        return std::make_pair(0u, &SystemZ::GR32BitRegClass);
+    }
   }
   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
@@ -1737,6 +1746,38 @@ Register SystemZTargetLowering::getExceptionSelectorRegister(
   return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
 }
 
+// Convert condition code in CCReg to an i32 value.
+static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
+  SDLoc DL(CCReg);
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
+  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+                     DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+}
+
+// Lower @cc targets via setcc.
+SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
+    SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+    const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+  if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
+    return SDValue();
+
+  // Check that return type is valid.
+  if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+      OpInfo.ConstraintVT.getSizeInBits() < 8)
+    report_fatal_error("Glue output operand is of invalid type");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.addLiveIn(SystemZ::CC);
+
+  if (Glue.getNode()) {
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
+    Chain = Glue.getValue(1);
+  } else
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
+  return getCCResult(DAG, Glue);
+}
+
 void SystemZTargetLowering::LowerAsmOperandForConstraint(
     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
     SelectionDAG &DAG) const {
@@ -5300,14 +5341,6 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
                                  Node->getMemoryVT(), Node->getMemOperand());
 }
 
-// Convert condition code in CCReg to an i32 value.
-static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
-  SDLoc DL(CCReg);
-  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
-  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
-                     DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
-}
-
 SDValue
 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
                                               SelectionDAG &DAG) const {
@@ -8723,95 +8756,247 @@ SDValue SystemZTargetLowering::combineSETCC(
   return SDValue();
 }
 
-static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
+static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
+  switch (Val.getOpcode()) {
+  default:
+    return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
+  case SystemZISD::IPM:
+    if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
+        Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
+      return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
+    return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
+  case SystemZISD::SELECT_CCMASK: {
+    SDValue Op4CCReg = Val.getOperand(4);
+    if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
+        Op4CCReg.getOpcode() == SystemZISD::TM) {
+      auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
+      if (OpCC != SDValue())
+        return std::make_pair(OpCC, OpCCValid);
+    }
+    auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
+    if (!CCValid)
+      return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
+    int CCValidVal = CCValid->getZExtValue();
+    return std::make_pair(Op4CCReg, CCValidVal);
+  }
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
+    if (Op0CC != SDValue())
+      return std::make_pair(Op0CC, Op0CCValid);
+    return findCCUse(Val.getOperand(1));
+  }
+}
+
+static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
+                          SelectionDAG &DAG);
+
+SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC,
+                                                     SelectionDAG &DAG) {
+  SDLoc DL(Val);
+  auto Opcode = Val.getOpcode();
+  switch (Opcode) {
+  default:
+    return {};
+  case ISD::Constant:
+    return {Val, Val, Val, Val};
+  case SystemZISD::IPM: {
+    SDValue IPMOp0 = Val.getOperand(0);
+    if (IPMOp0 != CC)
+      return {};
+    SmallVector<SDValue, 4> ShiftedCCVals;
+    for (auto CC : {0, 1, 2, 3})
+      ShiftedCCVals.emplace_back(
+          DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
+    return ShiftedCCVals;
+  }
+  case SystemZISD::SELECT_CCMASK: {
+    SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
+    auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
+    auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
+    if (!CCValid || !CCMask)
+      return {};
+
+    int CCValidVal = CCValid->getZExtValue();
+    int CCMaskVal = CCMask->getZExtValue();
+    const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
+    const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
+    if (TrueSDVals.empty() || FalseSDVals.empty())
+      return {};
+    SDValue Op4CCReg = Val.getOperand(4);
+    if (Op4CCReg != CC)
+      combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
+    if (Op4CCReg != CC)
+      return {};
+    SmallVector<SDValue, 4> MergedSDVals;
+    for (auto &CCVal : {0, 1, 2, 3})
+      MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
+                                    ? TrueSDVals[CCVal]
+                                    : FalseSDVals[CCVal]);
+    return MergedSDVals;
+  }
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SRA:
+    // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
+    // would clobber CC).
+    if (!Val.hasOneUse())
+      return {};
+    [[fallthrough]];
+  case ISD::SHL:
+  case ISD::SRL:
+    SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
+    const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
+    const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
+    if (Op0SDVals.empty() || Op1SDVals.empty())
+      return {};
+    SmallVector<SDValue, 4> BinaryOpSDVals;
+    for (auto CCVal : {0, 1, 2, 3})
+      BinaryOpSDVals.emplace_back(DAG.getNode(
+          Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
+    return BinaryOpSDVals;
+  }
+}
+
+static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
+                          SelectionDAG &DAG) {
   // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
   // set by the CCReg instruction using the CCValid / CCMask masks,
-  // If the CCReg instruction is itself a ICMP testing the condition
+  // If the CCReg instruction is itself a ICMP / TM  testing the condition
   // code set by some other instruction, see whether we can directly
   // use that condition code.
-
-  // Verify that we have an ICMP against some constant.
-  if (CCValid != SystemZ::CCMASK_ICMP)
-    return false;
-  auto *ICmp = CCReg.getNode();
-  if (ICmp->getOpcode() != SystemZISD::ICMP)
-    return false;
-  auto *CompareLHS = ICmp->getOperand(0).getNode();
-  auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
-  if (!CompareRHS)
+  auto *CCNode = CCReg.getNode();
+  if (!CCNode)
     return false;
 
-  // Optimize the case where CompareLHS is a SELECT_CCMASK.
-  if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
-    // Verify that we have an appropriate mask for a EQ or NE comparison.
-    bool Invert = false;
-    if (CCMask == SystemZ::CCMASK_CMP_NE)
-      Invert = !Invert;
-    else if (CCMask != SystemZ::CCMASK_CMP_EQ)
+  if (CCNode->getOpcode() == SystemZISD::TM) {
+    if (CCValid != SystemZ::CCMASK_TM)
       return false;
-
-    // Verify that the ICMP compares against one of select values.
-    auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
-    if (!TrueVal)
-      return false;
-    auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
-    if (!FalseVal)
+    auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
+      auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
+      auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
+      if (!Op0Node || !Op1Node)
+        return -1;
+      auto Op0APVal = Op0Node->getAPIntValue();
+      auto Op1APVal = Op1Node->getAPIntValue();
+      auto Result = Op0APVal & Op1APVal;
+      bool AllOnes = Result == Op1APVal;
+      bool AllZeros = Result == 0;
+      bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
+      return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
+    };
+    SDValue Op0 = CCNode->getOperand(0);
+    SDValue Op1 = CCNode->getOperand(1);
+    auto [Op0CC, Op0CCValid] = findCCUse(Op0);
+    if (Op0CC == SDValue())
       return false;
-    if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue())
-      Invert = !Invert;
-    else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue())
+    const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
+    const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
+    if (Op0SDVals.empty() || Op1SDVals.empty())
       return false;
-
-    // Compute the effective CC mask for the new branch or select.
-    auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
-    auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
-    if (!NewCCValid || !NewCCMask)
-      return false;
-    CCValid = NewCCValid->getZExtValue();
-    CCMask = NewCCMask->getZExtValue();
-    if (Invert)
-      CCMask ^= CCValid;
-
-    // Return the updated CCReg link.
-    CCReg = CompareLHS->getOperand(4);
+    int NewCCMask = 0;
+    for (auto CC : {0, 1, 2, 3}) {
+      auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
+      if (CCVal < 0)
+        return false;
+      NewCCMask <<= 1;
+      NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
+    }
+    NewCCMask &= Op0CCValid;
+    CCReg = Op0CC;
+    CCMask = NewCCMask;
+    CCValid = Op0CCValid;
     return true;
   }
+  if (CCNode->getOpcode() != SystemZISD::ICMP ||
+      CCValid != SystemZ::CCMASK_ICMP)
+    return false;
 
-  // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
-  if (CompareLHS->getOpcode() == ISD::SRA) {
-    auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
-    if (!SRACount || SRACount->getZExtValue() != 30)
-      return false;
-    auto *SHL = CompareLHS->getOperand(0).getNode();
-    if (SHL->getOpcode() != ISD::SHL)
-      return false;
-    auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
-    if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
-      return false;
-    auto *IPM = SHL->getOperand(0).getNode();
-    if (IPM->getOpcode() != SystemZISD::IPM)
-      return false;
-
-    // Avoid introducing CC spills (because SRA would clobber CC).
-    if (!CompareLHS->hasOneUse())
-      return false;
-    // Verify that the ICMP compares against zero.
-    if (CompareRHS->getZExtValue() != 0)
+  SDValue CmpOp0 = CCNode->getOperand(0);
+  SDValue CmpOp1 = CCNode->getOperand(1);
+  SDValue CmpOp2 = CCNode->getOperand(2);
+  auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
+  if (Op0CC != SDValue()) {
+    const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
+    const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
+    if (Op0SDVals.empty() || Op1SDVals.empty())
       return false;
 
-    // Compute the effective CC mask for the new branch or select.
-    CCMask = SystemZ::reverseCCMask(CCMask);
-
-    // Return the updated CCReg link.
-    CCReg = IPM->getOperand(0);
+    auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
+    auto CmpTypeVal = CmpType->getZExtValue();
+    const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
+                                               const SDValue &Op1Val) {
+      auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
+      auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
+      if (!Op0Node || !Op1Node)
+        return -1;
+      auto Op0APVal = Op0Node->getAPIntValue();
+      auto Op1APVal = Op1Node->getAPIntValue();
+      if (CmpTypeVal == SystemZICMP::SignedOnly)
+        return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
+      return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
+    };
+    int NewCCMask = 0;
+    for (auto CC : {0, 1, 2, 3}) {
+      auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
+      if (CCVal < 0)
+        return false;
+      NewCCMask <<= 1;
+      NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
+    }
+    NewCCMask &= Op0CCValid;
+    CCMask = NewCCMask;
+    CCReg = Op0CC;
+    CCValid = Op0CCValid;
     return true;
   }
 
   return false;
 }
 
-SDValue SystemZTargetLowering::combineBR_CCMASK(
-    SDNode *N, DAGCombinerInfo &DCI) const {
+// Merging versus split in multiple branches cost.
+TargetLoweringBase::CondMergingParams
+SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
+                                                     const Value *Lhs,
+                                                     const Value *Rhs) const {
+  const auto isFlagOutOpCC = [](const Value *V) {
+    using namespace llvm::PatternMatch;
+    const Value *RHSVal;
+    const APInt *RHSC;
+    if (const auto *I = dyn_cast<Instruction>(V)) {
+      // PatternMatch.h provides concise tree-based pattern match of llvm IR.
+      if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
+          match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
+        if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
+          if (CB->isInlineAsm()) {
+            const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
+            return IA &&
+                   IA->getConstraintString().find("{@cc}") != std::string::npos;
+          }
+        }
+      }
+    }
+    return false;
+  };
+  // Pattern (ICmp %asm) or (ICmp (And %asm)).
+  // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
+  // BaseCost can be set >=2. If cost of instruction <= CostThreshold
+  // conditionals will be merged or else conditionals will be split.
+  if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
+    return {3, 0, -1};
+  // Default.
+  return {-1, -1, -1};
+}
+
+SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
+                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
 
   // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
@@ -8824,8 +9009,7 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(
   int CCMaskVal = CCMask->getZExtValue();
   SDValue Chain = N->getOperand(0);
   SDValue CCReg = N->getOperand(4);
-
-  if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
+  if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG))
     return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
                        Chain,
                        DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
@@ -8848,16 +9032,80 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK(
   int CCMaskVal = CCMask->getZExtValue();
   SDValue CCReg = N->getOperand(4);
 
-  if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
-    return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
-                       N->getOperand(0), N->getOperand(1),
-                       DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
-                       DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
-                       CCReg);
+  bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
+
+  // Populate SDVals vector for each condition code ccval for given Val, which
+  // can again be another nested select_ccmask with the same CC.
+  const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
+    if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
+      SmallVector<SDValue, 4> Res;
+      if (Val.getOperand(4) != CCReg)
+        return SmallVector<SDValue, 4>{};
+      SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
+      auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
+      if (!CCMask)
+        return SmallVector<SDValue, 4>{};
+
+      int CCMaskVal = CCMask->getZExtValue();
+      for (auto &CC : {0, 1, 2, 3})
+        Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
+                                                              : FalseVal);
+      return Res;
+    }
+    return SmallVector<SDValue, 4>{Val, Val, Val, Val};
+  };
+  // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
+  // with CCReg found by combineCCMask or original CCReg.
+  SDValue TrueVal = N->getOperand(0);
+  SDValue FalseVal = N->getOperand(1);
+  auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
+  auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
+  // TrueSDVals/FalseSDVals might be empty in case of non-constant
+  // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
+  if (TrueSDVals.empty())
+    TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
+  if (FalseSDVals.empty())
+    FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
+  if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
+    SmallSet<SDValue, 4> MergedSDValsSet;
+    // Ignoring CC values outside CCValiid.
+    for (auto CC : {0, 1, 2, 3}) {
+      if ((CCValidVal & ((1 << (3 - CC)))) != 0)
+        MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
+                                   ? TrueSDVals[CC]
+                                   : FalseSDVals[CC]);
+    }
+    if (MergedSDValsSet.size() == 1)
+      return *MergedSDValsSet.begin();
+    if (MergedSDValsSet.size() == 2) {
+      auto BeginIt = MergedSDValsSet.begin();
+      SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
+      if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
+        std::swap(NewTrueVal, NewFalseVal);
+      int NewCCMask = 0;
+      for (auto CC : {0, 1, 2, 3}) {
+        NewCCMask <<= 1;
+        NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
+                         ? (TrueSDVals[CC] == NewTrueVal)
+                         : (FalseSDVals[CC] == NewTrueVal);
+      }
+      CCMaskVal = NewCCMask;
+      CCMaskVal &= CCValidVal;
+      TrueVal = NewTrueVal;
+      FalseVal = NewFalseVal;
+      IsCombinedCCReg = true;
+    }
+  }
+
+  if (IsCombinedCCReg)
+    return DAG.getNode(
+        SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
+        FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
+        DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
+
   return SDValue();
 }
 
-
 SDValue SystemZTargetLowering::combineGET_CCMASK(
     SDNode *N, DAGCombinerInfo &DCI) const {
 
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index f8706b7..d5b7603 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -533,6 +533,18 @@ public:
   }
 
   const char *getTargetNodeName(unsigned Opcode) const override;
+
+  // This function currently returns cost for srl/ipm/cc sequence for merging.
+  CondMergingParams
+  getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
+                                const Value *Rhs) const override;
+
+  // Handle Lowering flag assembly outputs.
+  SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+                                      const SDLoc &DL,
+                                      const AsmOperandInfo &Constraint,
+                                      SelectionDAG &DAG) const override;
+
   std::pair<unsigned, const TargetRegisterClass *>
   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                StringRef Constraint, MVT VT) const override;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 28fa2cd..b81641f 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -414,6 +414,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
 
   getActionDefinitionsBuilder(G_SEXT_INREG).lower();
 
+  getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
+
   // fp constants
   getActionDefinitionsBuilder(G_FCONSTANT)
       .legalFor({s32, s64})
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 3af8b3e..2bf016a 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1335,9 +1335,8 @@ def ProcessorFeatures {
     !listconcat(ARLFeatures, ARLSAdditionalFeatures);
 
   // Pantherlake
-  list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
   list<SubtargetFeature> PTLFeatures =
-    !listremove(!listconcat(ARLSFeatures, PTLAdditionalFeatures), [FeatureWIDEKL]);
+    !listremove(ARLSFeatures, [FeatureWIDEKL]);
 
 
   // Clearwaterforest
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index eea84a2..c32b1a6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3624,6 +3624,16 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
       match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
       match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
     BaseCost += 1;
+
+  // For OR conditions with EQ comparisons, prefer splitting into branches
+  // (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops,
+  // unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed
+  // comparisons (SLT, SGT) that can be optimized.
+  if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or &&
+      match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
+      match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
+    return {-1, -1, -1};
+
   return {BaseCost, BrMergingLikelyBias.getValue(),
           BrMergingUnlikelyBias.getValue()};
 }
@@ -3787,7 +3797,7 @@ static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
 
 /// Return true if every element in Mask, is an in-place blend/select mask or is
 /// undef.
-LLVM_ATTRIBUTE_UNUSED static bool isBlendOrUndef(ArrayRef<int> Mask) {
+[[maybe_unused]] static bool isBlendOrUndef(ArrayRef<int> Mask) {
   unsigned NumElts = Mask.size();
   for (auto [I, M] : enumerate(Mask))
     if (!isUndefOrEqual(M, I) && !isUndefOrEqual(M, I + NumElts))
@@ -8096,7 +8106,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl,
   return DstVec;
 }
 
-LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) {
+[[maybe_unused]] static bool isHorizOp(unsigned Opcode) {
   switch (Opcode) {
   case X86ISD::PACKSS:
   case X86ISD::PACKUS:
@@ -20813,7 +20823,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     // for DAG type consistency we have to match the FP operand type.
 
     APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000));
-    LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
+    [[maybe_unused]] APFloat::opStatus Status = APFloat::opOK;
     bool LosesInfo = false;
     if (TheVT == MVT::f64)
       // The rounding mode is irrelevant as the conversion should be exact.
@@ -22856,7 +22866,7 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
   // be generated by the memcmp expansion pass with oversized integer compares
   // (see PR33325).
   bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
-  if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
+  if (isNullConstant(Y) && OpSize == 128 && !IsOrXorXorTreeCCZero)
     return SDValue();
 
   // Don't perform this combine if constructing the vector will be expensive.
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index edca7c1..1932a3a 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -175,7 +175,7 @@ constexpr FeatureBitset FeaturesArrowlakeS =
     FeaturesArrowlake | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
     FeatureSM4;
 constexpr FeatureBitset FeaturesPantherlake =
-    (FeaturesArrowlakeS ^ FeatureWIDEKL) | FeaturePREFETCHI;
+    (FeaturesArrowlakeS ^ FeatureWIDEKL);
 constexpr FeatureBitset FeaturesClearwaterforest =
     (FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 |
     FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 6b67b48..09cb225 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2979,10 +2979,14 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,
          "Op must be either i1 or vector of i1.");
   if (SI.getCondition()->getType() != Op->getType())
     return nullptr;
-  if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL))
-    return SelectInst::Create(Op,
-                              IsAnd ? V : ConstantInt::getTrue(Op->getType()),
-                              IsAnd ? ConstantInt::getFalse(Op->getType()) : V);
+  if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL)) {
+    Instruction *MDFrom = nullptr;
+    if (!ProfcheckDisableMetadataFixes)
+      MDFrom = &SI;
+    return SelectInst::Create(
+        Op, IsAnd ? V : ConstantInt::getTrue(Op->getType()),
+        IsAnd ? ConstantInt::getFalse(Op->getType()) : V, "", nullptr, MDFrom);
+  }
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 860f8f7..511bca4 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -3337,7 +3337,7 @@ PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond,
                                           Value *ValueIfTrue,
                                           Instruction *ThenTerm,
                                           Value *ValueIfFalse) {
-  PHINode *PHI = IRB.CreatePHI(IntptrTy, 2);
+  PHINode *PHI = IRB.CreatePHI(ValueIfTrue->getType(), 2);
   BasicBlock *CondBlock = cast<Instruction>(Cond)->getParent();
   PHI->addIncoming(ValueIfFalse, CondBlock);
   BasicBlock *ThenBlock = ThenTerm->getParent();
@@ -3360,7 +3360,7 @@ Value *FunctionStackPoisoner::createAllocaForLayout(
   assert((ClRealignStack & (ClRealignStack - 1)) == 0);
   uint64_t FrameAlignment = std::max(L.FrameAlignment, uint64_t(ClRealignStack));
   Alloca->setAlignment(Align(FrameAlignment));
-  return IRB.CreatePointerCast(Alloca, IntptrTy);
+  return Alloca;
 }
 
 void FunctionStackPoisoner::createDynamicAllocasInitStorage() {
@@ -3572,10 +3572,12 @@ void FunctionStackPoisoner::processStaticAllocas() {
   DoDynamicAlloca &= !HasInlineAsm && !HasReturnsTwiceCall;
   DoStackMalloc &= !HasInlineAsm && !HasReturnsTwiceCall;
 
+  Type *PtrTy = F.getDataLayout().getAllocaPtrType(F.getContext());
   Value *StaticAlloca =
       DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
 
-  Value *FakeStack;
+  Value *FakeStackPtr;
+  Value *FakeStackInt;
   Value *LocalStackBase;
   Value *LocalStackBaseAlloca;
   uint8_t DIExprFlags = DIExpression::ApplyOffset;
@@ -3603,20 +3605,21 @@ void FunctionStackPoisoner::processStaticAllocas() {
           RTCI.createRuntimeCall(IRBIf, AsanStackMallocFunc[StackMallocIdx],
                                  ConstantInt::get(IntptrTy, LocalStackSize));
       IRB.SetInsertPoint(InsBefore);
-      FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term,
-                            ConstantInt::get(IntptrTy, 0));
+      FakeStackInt = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue,
+                               Term, ConstantInt::get(IntptrTy, 0));
     } else {
       // assert(ASan.UseAfterReturn == AsanDetectStackUseAfterReturnMode:Always)
       // void *FakeStack = __asan_stack_malloc_N(LocalStackSize);
       // void *LocalStackBase = (FakeStack) ? FakeStack :
       //                        alloca(LocalStackSize);
       StackMallocIdx = StackMallocSizeClass(LocalStackSize);
-      FakeStack =
+      FakeStackInt =
           RTCI.createRuntimeCall(IRB, AsanStackMallocFunc[StackMallocIdx],
                                  ConstantInt::get(IntptrTy, LocalStackSize));
     }
+    FakeStackPtr = IRB.CreateIntToPtr(FakeStackInt, PtrTy);
     Value *NoFakeStack =
-        IRB.CreateICmpEQ(FakeStack, Constant::getNullValue(IntptrTy));
+        IRB.CreateICmpEQ(FakeStackInt, Constant::getNullValue(IntptrTy));
     Instruction *Term =
         SplitBlockAndInsertIfThen(NoFakeStack, InsBefore, false);
     IRBuilder<> IRBIf(Term);
@@ -3624,67 +3627,53 @@ void FunctionStackPoisoner::processStaticAllocas() {
         DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca;
 
     IRB.SetInsertPoint(InsBefore);
-    LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack);
+    LocalStackBase =
+        createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStackPtr);
     IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca);
     DIExprFlags |= DIExpression::DerefBefore;
   } else {
     // void *FakeStack = nullptr;
     // void *LocalStackBase = alloca(LocalStackSize);
-    FakeStack = ConstantInt::get(IntptrTy, 0);
+    FakeStackInt = Constant::getNullValue(IntptrTy);
+    FakeStackPtr = Constant::getNullValue(PtrTy);
     LocalStackBase =
         DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca;
     LocalStackBaseAlloca = LocalStackBase;
   }
 
-  // It shouldn't matter whether we pass an `alloca` or a `ptrtoint` as the
-  // dbg.declare address opereand, but passing a `ptrtoint` seems to confuse
-  // later passes and can result in dropped variable coverage in debug info.
-  Value *LocalStackBaseAllocaPtr =
-      isa<PtrToIntInst>(LocalStackBaseAlloca)
-          ? cast<PtrToIntInst>(LocalStackBaseAlloca)->getPointerOperand()
-          : LocalStackBaseAlloca;
-  assert(isa<AllocaInst>(LocalStackBaseAllocaPtr) &&
-         "Variable descriptions relative to ASan stack base will be dropped");
-
   // Replace Alloca instructions with base+offset.
   SmallVector<Value *> NewAllocaPtrs;
   for (const auto &Desc : SVD) {
     AllocaInst *AI = Desc.AI;
-    replaceDbgDeclare(AI, LocalStackBaseAllocaPtr, DIB, DIExprFlags,
-                      Desc.Offset);
-    Value *NewAllocaPtr = IRB.CreateIntToPtr(
-        IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
-        AI->getType());
+    replaceDbgDeclare(AI, LocalStackBaseAlloca, DIB, DIExprFlags, Desc.Offset);
+    Value *NewAllocaPtr = IRB.CreatePtrAdd(
+        LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset));
     AI->replaceAllUsesWith(NewAllocaPtr);
     NewAllocaPtrs.push_back(NewAllocaPtr);
   }
 
   // The left-most redzone has enough space for at least 4 pointers.
   // Write the Magic value to redzone[0].
-  Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
   IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
-                  BasePlus0);
+                  LocalStackBase);
   // Write the frame description constant to redzone[1].
-  Value *BasePlus1 = IRB.CreateIntToPtr(
-      IRB.CreateAdd(LocalStackBase,
-                    ConstantInt::get(IntptrTy, ASan.LongSize / 8)),
-      IntptrPtrTy);
+  Value *BasePlus1 = IRB.CreatePtrAdd(
+      LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize / 8));
   GlobalVariable *StackDescriptionGlobal =
       createPrivateGlobalForString(*F.getParent(), DescriptionString,
                                    /*AllowMerging*/ true, genName("stack"));
   Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
   IRB.CreateStore(Description, BasePlus1);
   // Write the PC to redzone[2].
-  Value *BasePlus2 = IRB.CreateIntToPtr(
-      IRB.CreateAdd(LocalStackBase,
-                    ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)),
-      IntptrPtrTy);
+  Value *BasePlus2 = IRB.CreatePtrAdd(
+      LocalStackBase, ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8));
   IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
 
   const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L);
 
   // Poison the stack red zones at the entry.
-  Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
+  Value *ShadowBase =
+      ASan.memToShadow(IRB.CreatePtrToInt(LocalStackBase, IntptrTy), IRB);
   // As mask we must use most poisoned case: red zones and after scope.
   // As bytes we can use either the same or just red zones only.
   copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase);
@@ -3723,7 +3712,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
     IRBuilder<> IRBRet(Ret);
     // Mark the current frame as retired.
     IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
-                       BasePlus0);
+                       LocalStackBase);
     if (DoStackMalloc) {
       assert(StackMallocIdx >= 0);
       // if FakeStack != 0  // LocalStackBase == FakeStack
@@ -3737,7 +3726,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
       // else
       //     <This is not a fake stack; unpoison the redzones>
       Value *Cmp =
-          IRBRet.CreateICmpNE(FakeStack, Constant::getNullValue(IntptrTy));
+          IRBRet.CreateICmpNE(FakeStackInt, Constant::getNullValue(IntptrTy));
       Instruction *ThenTerm, *ElseTerm;
       SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm);
 
@@ -3748,11 +3737,10 @@ void FunctionStackPoisoner::processStaticAllocas() {
                                  kAsanStackUseAfterReturnMagic);
         copyToShadow(ShadowAfterReturn, ShadowAfterReturn, IRBPoison,
                      ShadowBase);
-        Value *SavedFlagPtrPtr = IRBPoison.CreateAdd(
-            FakeStack,
+        Value *SavedFlagPtrPtr = IRBPoison.CreatePtrAdd(
+            FakeStackPtr,
             ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
-        Value *SavedFlagPtr = IRBPoison.CreateLoad(
-            IntptrTy, IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+        Value *SavedFlagPtr = IRBPoison.CreateLoad(IntptrTy, SavedFlagPtrPtr);
         IRBPoison.CreateStore(
             Constant::getNullValue(IRBPoison.getInt8Ty()),
             IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getPtrTy()));
@@ -3760,7 +3748,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
         // For larger frames call __asan_stack_free_*.
         RTCI.createRuntimeCall(
             IRBPoison, AsanStackFreeFunc[StackMallocIdx],
-            {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)});
+            {FakeStackInt, ConstantInt::get(IntptrTy, LocalStackSize)});
       }
 
       IRBuilder<> IRBElse(ElseTerm);
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index ff5f390..66e45ec 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -266,8 +266,7 @@ void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI,
     if (!ProfcheckDisableMetadataFixes)
       BI->setMetadata(LLVMContext::MD_prof,
                       SI->getMetadata(LLVMContext::MD_prof));
-    DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock},
-                       {DominatorTree::Insert, StartBlock, NewBlock}});
+    DTU->applyUpdates({{DominatorTree::Insert, StartBlock, NewBlock}});
   } else {
     BasicBlock *EndBlock = SIUse->getParent();
     BasicBlock *NewBlockT = BasicBlock::Create(
@@ -1479,10 +1478,13 @@ bool DFAJumpThreading::run(Function &F) {
   DTU->flush();
 
 #ifdef EXPENSIVE_CHECKS
-  assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full));
   verifyFunction(F, &dbgs());
 #endif
 
+  if (MadeChanges && VerifyDomInfo)
+    assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full) &&
+           "Failed to maintain validity of domtree!");
+
   return MadeChanges;
 }
 
diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 213d0f3..1335665 100644
--- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -39,10 +39,11 @@ public:
 private:
   AliasAnalysis *AA;
 };
+} // namespace
 
 /// iterativelyFlattenCFG - Call FlattenCFG on all the blocks in the function,
 /// iterating until no more changes are made.
-bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
+static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
   bool Changed = false;
   bool LocalChange = true;
 
@@ -67,7 +68,6 @@ bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
   }
   return Changed;
 }
-} // namespace
 
 char FlattenCFGLegacyPass::ID = 0;
 
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index b9d332b..d2f09e9 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -118,9 +118,13 @@ STATISTIC(
 STATISTIC(NumDeleted, "Number of instructions deleted");
 STATISTIC(NumVectorized, "Number of vectorized aggregates");
 
+namespace llvm {
 /// Disable running mem2reg during SROA in order to test or debug SROA.
 static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
                                      cl::Hidden);
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
 namespace {
 
 class AllocaSliceRewriter;
@@ -1777,7 +1781,8 @@ static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI,
   }
 
   Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
-                              LI.getName() + ".sroa.speculated");
+                              LI.getName() + ".sroa.speculated",
+                              ProfcheckDisableMetadataFixes ? nullptr : &SI);
 
   LLVM_DEBUG(dbgs() << "          speculated to: " << *V << "\n");
   LI.replaceAllUsesWith(V);
@@ -4360,10 +4365,13 @@ private:
     };
 
     Value *Cond, *True, *False;
+    Instruction *MDFrom = nullptr;
     if (auto *SI = dyn_cast<SelectInst>(Sel)) {
       Cond = SI->getCondition();
       True = SI->getTrueValue();
       False = SI->getFalseValue();
+      if (!ProfcheckDisableMetadataFixes)
+        MDFrom = SI;
     } else {
       Cond = Sel->getOperand(0);
       True = ConstantInt::get(Sel->getType(), 1);
@@ -4383,8 +4391,12 @@ private:
         IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
                       False->getName() + ".sroa.gep", NW);
 
-    Value *NSel =
-        IRB.CreateSelect(Cond, NTrue, NFalse, Sel->getName() + ".sroa.sel");
+    Value *NSel = MDFrom
+                      ? IRB.CreateSelect(Cond, NTrue, NFalse,
+                                         Sel->getName() + ".sroa.sel", MDFrom)
+                      : IRB.CreateSelectWithUnknownProfile(
+                            Cond, NTrue, NFalse, DEBUG_TYPE,
+                            Sel->getName() + ".sroa.sel");
     Visited.erase(&GEPI);
     GEPI.replaceAllUsesWith(NSel);
     GEPI.eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 2ee91a9..0f3978f 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -47,6 +47,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
 #include <cassert>
 #include <utility>
 
@@ -321,7 +322,7 @@ class StructurizeCFG {
 
   void collectInfos();
 
-  void insertConditions(bool Loops);
+  void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
 
   void simplifyConditions();
 
@@ -671,10 +672,9 @@ void StructurizeCFG::collectInfos() {
 }
 
 /// Insert the missing branch conditions
-void StructurizeCFG::insertConditions(bool Loops) {
+void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) {
   BranchVector &Conds = Loops ? LoopConds : Conditions;
   Value *Default = Loops ? BoolTrue : BoolFalse;
-  SSAUpdater PhiInserter;
 
   for (BranchInst *Term : Conds) {
     assert(Term->isConditional());
@@ -683,8 +683,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
     BasicBlock *SuccTrue = Term->getSuccessor(0);
     BasicBlock *SuccFalse = Term->getSuccessor(1);
 
-    PhiInserter.Initialize(Boolean, "");
-    PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
+    unsigned Variable = PhiInserter.AddVariable("", Boolean);
+    PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent,
+                                  Default);
 
     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
 
@@ -697,7 +698,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
         ParentInfo = PI;
         break;
       }
-      PhiInserter.AddAvailableValue(BB, PI.Pred);
+      PhiInserter.AddAvailableValue(Variable, BB, PI.Pred);
       Dominator.addAndRememberBlock(BB);
     }
 
@@ -706,9 +707,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
       CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
     } else {
       if (!Dominator.resultIsRememberedBlock())
-        PhiInserter.AddAvailableValue(Dominator.result(), Default);
+        PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default);
 
-      Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+      PhiInserter.AddUse(Variable, &Term->getOperandUse(0));
     }
   }
 }
@@ -1414,8 +1415,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
   orderNodes();
   collectInfos();
   createFlow();
-  insertConditions(false);
-  insertConditions(true);
+
+  SSAUpdaterBulk PhiInserter;
+  insertConditions(false, PhiInserter);
+  insertConditions(true, PhiInserter);
+  PhiInserter.RewriteAndOptimizeAllUses(*DT);
+
   setPhiValues();
   simplifyHoistedPhis();
   simplifyConditions();
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 3ce569f..32924e7 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -826,7 +826,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
       BasicBlock::iterator I = NewBB->begin();
       for (; (PN = dyn_cast<PHINode>(I)); ++I) {
         for (const auto &[Pred, Count] : PredCount) {
-          for (unsigned _ : llvm::seq<unsigned>(Count))
+          for ([[maybe_unused]] unsigned _ : llvm::seq<unsigned>(Count))
             PN->removeIncomingValue(Pred, false);
         }
       }
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8ca3bed..23f5623 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -24,12 +24,9 @@
 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
 
-#include "VPlanAnalysis.h"
 #include "VPlanValue.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist.h"
@@ -41,10 +38,11 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstructionCost.h"
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
+#include <functional>
 #include <string>
+#include <utility>
 
 namespace llvm {
 
@@ -346,13 +344,6 @@ public:
   /// Return the cost of the block.
   virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
 
-  /// Return true if it is legal to hoist instructions into this block.
-  bool isLegalToHoistInto() {
-    // There are currently no constraints that prevent an instruction to be
-    // hoisted into a VPBlockBase.
-    return true;
-  }
-
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
     OS << getName();
diff --git a/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll b/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll
new file mode 100644
index 0000000..49fbad3
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 | FileCheck %s
+
+; Test for function isKnownLessThan that calculates a back-edge taken count,
+; which can return a CouldNotCompute SCEV.
+
+define void @test(i64 %conv, ptr %a) {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT:  Src: %ld = load i32, ptr %arrayidx12, align 4 --> Dst: %ld = load i32, ptr %arrayidx12, align 4
+; CHECK-NEXT:    da analyze - none!
+;
+entry:
+  %sub = add i64 %conv, 1
+  br label %loop
+
+loop:
+  %i = phi i64 [ %add26, %loop ], [ 0, %entry ]
+  %arrayidx12 = getelementptr i32, ptr %a, i64 %i
+  %ld = load i32, ptr %arrayidx12, align 4
+  %add26 = add nsw i64 %sub, %i
+  br label %loop
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
index acac2c9..0c1f37b 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
@@ -382,7 +382,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X64-NEXT:    %i9 = ptrtoint ptr %i7 to i64
 ; X64-NEXT:    --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:    %i10 = sub i64 %i9, %i4
-; X64-NEXT:    --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
+; X64-NEXT:    --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:    %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
 ; X64-NEXT:    --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:    %i12 = load i8, ptr %i11, align 1
@@ -393,7 +393,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X64-NEXT:    --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:  Determining loop execution counts for: @pr46786_c26_char
 ; X64-NEXT:  Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
-; X64-NEXT:  Loop %bb6: constant max backedge-taken count is i64 -1
+; X64-NEXT:  Loop %bb6: constant max backedge-taken count is i64 -2
 ; X64-NEXT:  Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
 ; X64-NEXT:  Loop %bb6: Trip multiple is 1
 ;
@@ -406,9 +406,9 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X32-NEXT:    %i8 = load i8, ptr %i7, align 1
 ; X32-NEXT:    --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i9 = ptrtoint ptr %i7 to i64
-; X32-NEXT:    --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
+; X32-NEXT:    --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i10 = sub i64 %i9, %i4
-; X32-NEXT:    --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
+; X32-NEXT:    --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
 ; X32-NEXT:    --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i12 = load i8, ptr %i11, align 1
@@ -419,7 +419,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X32-NEXT:    --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:  Determining loop execution counts for: @pr46786_c26_char
 ; X32-NEXT:  Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
-; X32-NEXT:  Loop %bb6: constant max backedge-taken count is i32 -1
+; X32-NEXT:  Loop %bb6: constant max backedge-taken count is i32 -2
 ; X32-NEXT:  Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
 ; X32-NEXT:  Loop %bb6: Trip multiple is 1
 ;
@@ -459,7 +459,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X64-NEXT:    %i9 = ptrtoint ptr %i7 to i64
 ; X64-NEXT:    --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:    %i10 = sub i64 %i9, %i4
-; X64-NEXT:    --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
+; X64-NEXT:    --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:    %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
 ; X64-NEXT:    --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:    %i12 = load i8, ptr %i11, align 1
@@ -470,7 +470,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X64-NEXT:    --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:  Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped
 ; X64-NEXT:  Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
-; X64-NEXT:  Loop %bb6: constant max backedge-taken count is i64 -1
+; X64-NEXT:  Loop %bb6: constant max backedge-taken count is i64 -2
 ; X64-NEXT:  Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
 ; X64-NEXT:  Loop %bb6: Trip multiple is 1
 ;
@@ -483,9 +483,9 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X32-NEXT:    %i8 = load i8, ptr %i7, align 1
 ; X32-NEXT:    --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i9 = ptrtoint ptr %i7 to i64
-; X32-NEXT:    --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
+; X32-NEXT:    --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i10 = sub i64 %i9, %i4
-; X32-NEXT:    --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
+; X32-NEXT:    --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
 ; X32-NEXT:    --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i12 = load i8, ptr %i11, align 1
@@ -496,7 +496,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X32-NEXT:    --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:  Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped
 ; X32-NEXT:  Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
-; X32-NEXT:  Loop %bb6: constant max backedge-taken count is i32 -1
+; X32-NEXT:  Loop %bb6: constant max backedge-taken count is i32 -2
 ; X32-NEXT:  Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
 ; X32-NEXT:  Loop %bb6: Trip multiple is 1
 ;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir
new file mode 100644
index 0000000..824ada1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir
@@ -0,0 +1,278 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -passes="print<gisel-value-tracking>" -filetype=null %s 2>&1 | FileCheck %s
+
+---
+name:            Cst
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @Cst
+  ; CHECK-NEXT: %0:_ KnownBits:00000010 SignBits:6
+  ; CHECK-NEXT: %1:_ KnownBits:00011000 SignBits:3
+  ; CHECK-NEXT: %2:_ KnownBits:00011010 SignBits:3
+    %0:_(s8) = G_CONSTANT i8 2
+    %1:_(s8) = G_CONSTANT i8 24
+    %2:_(s8) = G_ADD %0, %1
+...
+---
+name:            CstZero
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @CstZero
+  ; CHECK-NEXT: %0:_ KnownBits:00000001 SignBits:7
+  ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8
+  ; CHECK-NEXT: %2:_ KnownBits:00000000 SignBits:8
+    %0:_(s8) = G_CONSTANT i8 1
+    %1:_(s8) = G_CONSTANT i8 255
+    %2:_(s8) = G_ADD %0, %1
+...
+---
+name:            CstNegOne
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @CstNegOne
+  ; CHECK-NEXT: %0:_ KnownBits:00000000 SignBits:8
+  ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8
+  ; CHECK-NEXT: %2:_ KnownBits:11111111 SignBits:8
+    %0:_(s8) = G_CONSTANT i8 0
+    %1:_(s8) = G_CONSTANT i8 255
+    %2:_(s8) = G_ADD %0, %1
+...
+---
+name:            CstSeven
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @CstSeven
+  ; CHECK-NEXT: %0:_ KnownBits:00001000 SignBits:4
+  ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8
+  ; CHECK-NEXT: %2:_ KnownBits:00000111 SignBits:5
+    %0:_(s8) = G_CONSTANT i8 8
+    %1:_(s8) = G_CONSTANT i8 255
+    %2:_(s8) = G_ADD %0, %1
+...
+---
+name:            CstNeg
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @CstNeg
+  ; CHECK-NEXT: %0:_ KnownBits:11100000 SignBits:3
+  ; CHECK-NEXT: %1:_ KnownBits:00000010 SignBits:6
+  ; CHECK-NEXT: %2:_ KnownBits:11100010 SignBits:3
+    %0:_(s8) = G_CONSTANT i8 224
+    %1:_(s8) = G_CONSTANT i8 2
+    %2:_(s8) = G_ADD %0, %1
+...
+---
+name:            ScalarVar
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @ScalarVar
+  ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1
+  ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
+    %0:_(s8) = COPY $b0
+    %1:_(s8) = COPY $b1
+    %2:_(s8) = G_ADD %0, %1
+...
+---
+name:            ScalarRhsEarlyOut
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @ScalarRhsEarlyOut
+  ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6
+  ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
+    %0:_(s8) = COPY $b0
+    %1:_(s8) = G_CONSTANT i8 3
+    %2:_(s8) = G_ADD %0, %1
+...
+---
+name:            ScalarNonNegative
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @ScalarNonNegative
+  ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:00001111 SignBits:4
+  ; CHECK-NEXT: %2:_ KnownBits:0000???? SignBits:4
+  ; CHECK-NEXT: %3:_ KnownBits:11111111 SignBits:8
+  ; CHECK-NEXT: %4:_ KnownBits:???????? SignBits:4
+    %0:_(s8) = COPY $b0
+    %1:_(s8) = G_CONSTANT i8 15
+    %2:_(s8) = G_AND %0, %1
+    %3:_(s8) = G_CONSTANT i8 255
+    %4:_(s8) = G_ADD %2, %3
+...
+---
+name:            ScalarLhsEarlyOut
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @ScalarLhsEarlyOut
+  ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6
+  ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
+    %0:_(s8) = COPY $b0
+    %1:_(s8) = G_CONSTANT i8 3
+    %2:_(s8) = G_ADD %1, %0
+...
+---
+name:            ScalarPartKnown
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @ScalarPartKnown
+  ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:00001111 SignBits:4
+  ; CHECK-NEXT: %2:_ KnownBits:0000???? SignBits:4
+  ; CHECK-NEXT: %3:_ KnownBits:00000101 SignBits:5
+  ; CHECK-NEXT: %4:_ KnownBits:000????? SignBits:3
+    %0:_(s8) = COPY $b0
+    %1:_(s8) = G_CONSTANT i8 15
+    %2:_(s8) = G_AND %0, %1
+    %3:_(s8) = G_CONSTANT i8 5
+    %4:_(s8) = G_ADD %2, %3
+...
+---
+name:            VectorCstZero
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @VectorCstZero
+  ; CHECK-NEXT: %0:_ KnownBits:0000000000000001 SignBits:15
+  ; CHECK-NEXT: %1:_ KnownBits:1111111111111111 SignBits:16
+  ; CHECK-NEXT: %2:_ KnownBits:0000000000000001 SignBits:15
+  ; CHECK-NEXT: %3:_ KnownBits:1111111111111111 SignBits:16
+  ; CHECK-NEXT: %4:_ KnownBits:0000000000000000 SignBits:16
+    %0:_(s16) = G_CONSTANT i16 1
+    %1:_(s16) = G_CONSTANT i16 65535
+    %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0
+    %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+    %4:_(<4 x s16>) = G_ADD %2, %3
+...
+---
+name:            VectorCstNegOne
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @VectorCstNegOne
+  ; CHECK-NEXT: %0:_ KnownBits:0000000000000000 SignBits:16
+  ; CHECK-NEXT: %1:_ KnownBits:1111111111111111 SignBits:16
+  ; CHECK-NEXT: %2:_ KnownBits:0000000000000000 SignBits:16
+  ; CHECK-NEXT: %3:_ KnownBits:1111111111111111 SignBits:16
+  ; CHECK-NEXT: %4:_ KnownBits:1111111111111111 SignBits:16
+    %0:_(s16) = G_CONSTANT i16 0
+    %1:_(s16) = G_CONSTANT i16 65535
+    %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0
+    %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+    %4:_(<4 x s16>) = G_ADD %2, %3
+...
+---
+name:            VectorVar
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @VectorVar
+  ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
+  ; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:1
+    %0:_(<4 x s16>) = COPY $d0
+    %1:_(<4 x s16>) = COPY $d1
+    %2:_(<4 x s16>) = G_ADD %0, %1
+...
+---
+name:            VectorRhsEarlyOut
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @VectorRhsEarlyOut
+  ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14
+  ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14
+  ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
+    %0:_(<4 x s16>) = COPY $d0
+    %1:_(s16) = G_CONSTANT i16 3
+    %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+    %3:_(<4 x s16>) = G_ADD %2, %0
+...
+---
+name:            VectorNonNegative
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @VectorNonNegative
+  ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:0000000011111111 SignBits:8
+  ; CHECK-NEXT: %2:_ KnownBits:0000000011111111 SignBits:8
+  ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8
+  ; CHECK-NEXT: %4:_ KnownBits:1111111111111111 SignBits:16
+  ; CHECK-NEXT: %5:_ KnownBits:1111111111111111 SignBits:16
+  ; CHECK-NEXT: %6:_ KnownBits:???????????????? SignBits:8
+    %0:_(<4 x s16>) = COPY $d0
+    %1:_(s16) = G_CONSTANT i16 255
+    %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+    %3:_(<4 x s16>) = G_AND %0, %2
+    %4:_(s16) = G_CONSTANT i16 65535
+    %5:_(<4 x s16>) = G_BUILD_VECTOR %4, %4, %4, %4
+    %6:_(<4 x s16>) = G_ADD %3, %5
+...
+---
+name:            VectorLhsEarlyOut
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @VectorLhsEarlyOut
+  ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14
+  ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14
+  ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
+    %0:_(<4 x s16>) = COPY $d0
+    %1:_(s16) = G_CONSTANT i16 3
+    %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+    %3:_(<4 x s16>) = G_ADD %0, %2
+...
+---
+name:            VectorPartKnown
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @VectorPartKnown
+  ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:0000000011111111 SignBits:8
+  ; CHECK-NEXT: %2:_ KnownBits:0000000011111111 SignBits:8
+  ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8
+  ; CHECK-NEXT: %4:_ KnownBits:0000000000101010 SignBits:10
+  ; CHECK-NEXT: %5:_ KnownBits:0000000001001010 SignBits:9
+  ; CHECK-NEXT: %6:_ KnownBits:000000000??01010 SignBits:9
+  ; CHECK-NEXT: %7:_ KnownBits:0000000????????? SignBits:7
+    %0:_(<4 x s16>) = COPY $d0
+    %1:_(s16) = G_CONSTANT i16 255
+    %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+    %3:_(<4 x s16>) = G_AND %0, %2
+    %4:_(s16) = G_CONSTANT i16 42
+    %5:_(s16) = G_CONSTANT i16 74
+    %6:_(<4 x s16>) = G_BUILD_VECTOR %4, %5, %5, %4
+    %7:_(<4 x s16>) = G_ADD %6, %3
+...
+---
+name:            VectorCst36
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @VectorCst36
+  ; CHECK-NEXT: %0:_ KnownBits:0000000000000011 SignBits:14
+  ; CHECK-NEXT: %1:_ KnownBits:0000000000000110 SignBits:13
+  ; CHECK-NEXT: %2:_ KnownBits:0000000000000?1? SignBits:13
+  ; CHECK-NEXT: %3:_ KnownBits:0000000000000?1? SignBits:13
+  ; CHECK-NEXT: %4:_ KnownBits:000000000000???? SignBits:12
+    %0:_(s16) = G_CONSTANT i16 3
+    %1:_(s16) = G_CONSTANT i16 6
+    %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0
+    %3:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0
+    %4:_(<4 x s16>) = G_ADD %2, %3
+...
+
+---
+name:            VectorCst3unknown
+body:             |
+  bb.1:
+  ; CHECK-LABEL: name: @VectorCst3unknown
+  ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+  ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
+  ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14
+  ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
+  ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1
+    %0:_(<4 x s16>) = COPY $d0
+    %1:_(s16) = COPY $h0
+    %2:_(s16) = G_CONSTANT i16 3
+    %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %2, %2, %1
+    %4:_(<4 x s16>) = G_ADD %0, %3
+...
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index dc88f94..cca190f 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1774,3 +1774,88 @@ define i128 @combine_i128_sdiv_const100(i128 %x) {
   %1 = sdiv i128 %x, 100
   ret i128 %1
 }
+
+; The following only becomes an sdiv_by_one after type legalisation, after which
+; the splatted scalar constant has a different type to the splat vector. This
+; test verifies DAGCombiner does not care about this type difference.
+define <16 x i16> @combine_vec_sdiv_by_one_obfuscated(<16 x i16> %x) "target-features"="+sve" {
+; CHECK-SD-LABEL: combine_vec_sdiv_by_one_obfuscated:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: combine_vec_sdiv_by_one_obfuscated:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT:    movi v3.8h, #1
+; CHECK-GI-NEXT:    smov w8, v0.h[0]
+; CHECK-GI-NEXT:    mov v3.h[0], v2.h[0]
+; CHECK-GI-NEXT:    smov w9, v3.h[0]
+; CHECK-GI-NEXT:    smov w16, v3.h[7]
+; CHECK-GI-NEXT:    sdiv w14, w8, w9
+; CHECK-GI-NEXT:    smov w8, v0.h[1]
+; CHECK-GI-NEXT:    smov w9, v3.h[1]
+; CHECK-GI-NEXT:    sdiv w15, w8, w9
+; CHECK-GI-NEXT:    smov w8, v0.h[2]
+; CHECK-GI-NEXT:    smov w9, v3.h[2]
+; CHECK-GI-NEXT:    sdiv w13, w8, w9
+; CHECK-GI-NEXT:    smov w8, v0.h[3]
+; CHECK-GI-NEXT:    smov w9, v3.h[3]
+; CHECK-GI-NEXT:    sdiv w12, w8, w9
+; CHECK-GI-NEXT:    smov w8, v0.h[4]
+; CHECK-GI-NEXT:    smov w9, v3.h[4]
+; CHECK-GI-NEXT:    sdiv w11, w8, w9
+; CHECK-GI-NEXT:    smov w8, v0.h[5]
+; CHECK-GI-NEXT:    smov w9, v3.h[5]
+; CHECK-GI-NEXT:    sdiv w10, w8, w9
+; CHECK-GI-NEXT:    smov w8, v0.h[6]
+; CHECK-GI-NEXT:    smov w9, v3.h[6]
+; CHECK-GI-NEXT:    movi v3.8h, #1
+; CHECK-GI-NEXT:    smov w17, v3.h[0]
+; CHECK-GI-NEXT:    smov w18, v3.h[1]
+; CHECK-GI-NEXT:    smov w0, v3.h[2]
+; CHECK-GI-NEXT:    smov w1, v3.h[3]
+; CHECK-GI-NEXT:    smov w2, v3.h[4]
+; CHECK-GI-NEXT:    smov w3, v3.h[5]
+; CHECK-GI-NEXT:    sdiv w8, w8, w9
+; CHECK-GI-NEXT:    smov w9, v0.h[7]
+; CHECK-GI-NEXT:    fmov s0, w14
+; CHECK-GI-NEXT:    mov v0.h[1], w15
+; CHECK-GI-NEXT:    smov w15, v1.h[6]
+; CHECK-GI-NEXT:    mov v0.h[2], w13
+; CHECK-GI-NEXT:    sdiv w9, w9, w16
+; CHECK-GI-NEXT:    smov w16, v1.h[0]
+; CHECK-GI-NEXT:    mov v0.h[3], w12
+; CHECK-GI-NEXT:    smov w12, v1.h[7]
+; CHECK-GI-NEXT:    mov v0.h[4], w11
+; CHECK-GI-NEXT:    sdiv w16, w16, w17
+; CHECK-GI-NEXT:    smov w17, v1.h[1]
+; CHECK-GI-NEXT:    mov v0.h[5], w10
+; CHECK-GI-NEXT:    mov v0.h[6], w8
+; CHECK-GI-NEXT:    sdiv w17, w17, w18
+; CHECK-GI-NEXT:    smov w18, v1.h[2]
+; CHECK-GI-NEXT:    fmov s2, w16
+; CHECK-GI-NEXT:    smov w16, v3.h[6]
+; CHECK-GI-NEXT:    mov v0.h[7], w9
+; CHECK-GI-NEXT:    sdiv w18, w18, w0
+; CHECK-GI-NEXT:    smov w0, v1.h[3]
+; CHECK-GI-NEXT:    mov v2.h[1], w17
+; CHECK-GI-NEXT:    sdiv w0, w0, w1
+; CHECK-GI-NEXT:    smov w1, v1.h[4]
+; CHECK-GI-NEXT:    mov v2.h[2], w18
+; CHECK-GI-NEXT:    sdiv w1, w1, w2
+; CHECK-GI-NEXT:    smov w2, v1.h[5]
+; CHECK-GI-NEXT:    mov v2.h[3], w0
+; CHECK-GI-NEXT:    sdiv w14, w2, w3
+; CHECK-GI-NEXT:    mov v2.h[4], w1
+; CHECK-GI-NEXT:    sdiv w13, w15, w16
+; CHECK-GI-NEXT:    smov w15, v3.h[7]
+; CHECK-GI-NEXT:    mov v2.h[5], w14
+; CHECK-GI-NEXT:    sdiv w10, w12, w15
+; CHECK-GI-NEXT:    mov v2.h[6], w13
+; CHECK-GI-NEXT:    mov v2.h[7], w10
+; CHECK-GI-NEXT:    mov v1.16b, v2.16b
+; CHECK-GI-NEXT:    ret
+  %zero_and_ones = shufflevector <16 x i16> zeroinitializer, <16 x i16> splat (i16 1), <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %div = sdiv <16 x i16> %x, %zero_and_ones
+  ret <16 x i16> %div
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
index b6dee97e..b8d6c88 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
@@ -732,6 +732,247 @@ exit:
   ret void
 }
 
+; This example corresponds to:
+;
+; __arm_agnostic("sme_za_state") void try_catch_agnostic_za_invoke()
+; {
+;    try {
+;        agnostic_za_call();
+;    } catch(...) {
+;    }
+; }
+;
+; In this example we preserve all SME state enabled by PSTATE.ZA using
+; `__arm_sme_save` before agnostic_za_call(). This is because on all normal
+; returns from an agnostic ZA function ZA state should be preserved. That means
+; we need to make sure ZA state is saved in case agnostic_za_call() throws, and
+; we need to restore ZA state after unwinding to the catch block.
+
+define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: try_catch_agnostic_za_invoke:
+; CHECK:       .Lfunc_begin5:
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-NEXT:    .cfi_lsda 28, .Lexception5
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 32
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    .cfi_offset w30, -24
+; CHECK-NEXT:    .cfi_offset w29, -32
+; CHECK-NEXT:    bl __arm_sme_state_size
+; CHECK-NEXT:    sub sp, sp, x0
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:  .Ltmp15: // EH_LABEL
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __arm_sme_save
+; CHECK-NEXT:    bl agnostic_za_call
+; CHECK-NEXT:  .Ltmp16: // EH_LABEL
+; CHECK-NEXT:  .LBB5_1: // %exit
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __arm_sme_restore
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB5_2: // %catch
+; CHECK-NEXT:  .Ltmp17: // EH_LABEL
+; CHECK-NEXT:    bl __cxa_begin_catch
+; CHECK-NEXT:    bl __cxa_end_catch
+; CHECK-NEXT:    b .LBB5_1
+;
+; CHECK-SDAG-LABEL: try_catch_agnostic_za_invoke:
+; CHECK-SDAG:       .Lfunc_begin5:
+; CHECK-SDAG-NEXT:    .cfi_startproc
+; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception5
+; CHECK-SDAG-NEXT:  // %bb.0: // %entry
+; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-SDAG-NEXT:    mov x29, sp
+; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
+; CHECK-SDAG-NEXT:    .cfi_offset w19, -16
+; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
+; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
+; CHECK-SDAG-NEXT:    bl __arm_sme_state_size
+; CHECK-SDAG-NEXT:    sub sp, sp, x0
+; CHECK-SDAG-NEXT:    mov x19, sp
+; CHECK-SDAG-NEXT:  .Ltmp15: // EH_LABEL
+; CHECK-SDAG-NEXT:    mov x0, x19
+; CHECK-SDAG-NEXT:    bl __arm_sme_save
+; CHECK-SDAG-NEXT:    bl agnostic_za_call
+; CHECK-SDAG-NEXT:    mov x0, x19
+; CHECK-SDAG-NEXT:    bl __arm_sme_restore
+; CHECK-SDAG-NEXT:  .Ltmp16: // EH_LABEL
+; CHECK-SDAG-NEXT:  .LBB5_1: // %exit
+; CHECK-SDAG-NEXT:    mov sp, x29
+; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-SDAG-NEXT:    ret
+; CHECK-SDAG-NEXT:  .LBB5_2: // %catch
+; CHECK-SDAG-NEXT:  .Ltmp17: // EH_LABEL
+; CHECK-SDAG-NEXT:    mov x1, x0
+; CHECK-SDAG-NEXT:    mov x0, x19
+; CHECK-SDAG-NEXT:    bl __arm_sme_restore
+; CHECK-SDAG-NEXT:    mov x0, x19
+; CHECK-SDAG-NEXT:    bl __arm_sme_save
+; CHECK-SDAG-NEXT:    mov x0, x1
+; CHECK-SDAG-NEXT:    bl __cxa_begin_catch
+; CHECK-SDAG-NEXT:    mov x0, x19
+; CHECK-SDAG-NEXT:    bl __arm_sme_restore
+; CHECK-SDAG-NEXT:    mov x0, x19
+; CHECK-SDAG-NEXT:    bl __arm_sme_save
+; CHECK-SDAG-NEXT:    bl __cxa_end_catch
+; CHECK-SDAG-NEXT:    mov x0, x19
+; CHECK-SDAG-NEXT:    bl __arm_sme_restore
+; CHECK-SDAG-NEXT:    b .LBB5_1
+entry:
+  invoke void @agnostic_za_call()
+          to label %exit unwind label %catch
+
+catch:
+  %eh_info = landingpad { ptr, i32 }
+          catch ptr null
+  %exception_ptr = extractvalue { ptr, i32 } %eh_info, 0
+  tail call ptr @__cxa_begin_catch(ptr %exception_ptr)
+  tail call void @__cxa_end_catch()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; This is the same `try_catch_agnostic_za_invoke`, but shows a lazy save would
+; also need to be committed in a shared-ZA function calling an agnostic-ZA function.
+define void @try_catch_inout_za_agnostic_za_callee() "aarch64_inout_za" personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: try_catch_inout_za_agnostic_za_callee:
+; CHECK:       .Lfunc_begin6:
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-NEXT:    .cfi_lsda 28, .Lexception6
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-NEXT:  .Ltmp18: // EH_LABEL
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    msr TPIDR2_EL0, x8
+; CHECK-NEXT:    bl agnostic_za_call
+; CHECK-NEXT:  .Ltmp19: // EH_LABEL
+; CHECK-NEXT:  .LBB6_1: // %exit
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #16
+; CHECK-NEXT:    cbnz x8, .LBB6_3
+; CHECK-NEXT:  // %bb.2: // %exit
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB6_3: // %exit
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB6_4: // %catch
+; CHECK-NEXT:  .Ltmp20: // EH_LABEL
+; CHECK-NEXT:    bl __cxa_begin_catch
+; CHECK-NEXT:    bl __cxa_end_catch
+; CHECK-NEXT:    b .LBB6_1
+;
+; CHECK-SDAG-LABEL: try_catch_inout_za_agnostic_za_callee:
+; CHECK-SDAG:       .Lfunc_begin6:
+; CHECK-SDAG-NEXT:    .cfi_startproc
+; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception6
+; CHECK-SDAG-NEXT:  // %bb.0: // %entry
+; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-SDAG-NEXT:    mov x29, sp
+; CHECK-SDAG-NEXT:    sub sp, sp, #16
+; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
+; CHECK-SDAG-NEXT:    .cfi_offset w19, -16
+; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
+; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
+; CHECK-SDAG-NEXT:    rdsvl x8, #1
+; CHECK-SDAG-NEXT:    mov x9, sp
+; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
+; CHECK-SDAG-NEXT:    mov sp, x9
+; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-SDAG-NEXT:  .Ltmp18: // EH_LABEL
+; CHECK-SDAG-NEXT:    sub x19, x29, #16
+; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
+; CHECK-SDAG-NEXT:    bl agnostic_za_call
+; CHECK-SDAG-NEXT:    smstart za
+; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-SDAG-NEXT:    sub x0, x29, #16
+; CHECK-SDAG-NEXT:    cbnz x8, .LBB6_2
+; CHECK-SDAG-NEXT:  // %bb.1: // %entry
+; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
+; CHECK-SDAG-NEXT:  .LBB6_2: // %entry
+; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-SDAG-NEXT:  .Ltmp19: // EH_LABEL
+; CHECK-SDAG-NEXT:  .LBB6_3: // %exit
+; CHECK-SDAG-NEXT:    mov sp, x29
+; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-SDAG-NEXT:    ret
+; CHECK-SDAG-NEXT:  .LBB6_4: // %catch
+; CHECK-SDAG-NEXT:  .Ltmp20: // EH_LABEL
+; CHECK-SDAG-NEXT:    mov x1, x0
+; CHECK-SDAG-NEXT:    smstart za
+; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-SDAG-NEXT:    sub x0, x29, #16
+; CHECK-SDAG-NEXT:    cbnz x8, .LBB6_6
+; CHECK-SDAG-NEXT:  // %bb.5: // %catch
+; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
+; CHECK-SDAG-NEXT:  .LBB6_6: // %catch
+; CHECK-SDAG-NEXT:    mov x0, x1
+; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
+; CHECK-SDAG-NEXT:    bl __cxa_begin_catch
+; CHECK-SDAG-NEXT:    smstart za
+; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-SDAG-NEXT:    sub x0, x29, #16
+; CHECK-SDAG-NEXT:    cbnz x8, .LBB6_8
+; CHECK-SDAG-NEXT:  // %bb.7: // %catch
+; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
+; CHECK-SDAG-NEXT:  .LBB6_8: // %catch
+; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
+; CHECK-SDAG-NEXT:    bl __cxa_end_catch
+; CHECK-SDAG-NEXT:    smstart za
+; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-SDAG-NEXT:    sub x0, x29, #16
+; CHECK-SDAG-NEXT:    cbnz x8, .LBB6_10
+; CHECK-SDAG-NEXT:  // %bb.9: // %catch
+; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
+; CHECK-SDAG-NEXT:  .LBB6_10: // %catch
+; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-SDAG-NEXT:    b .LBB6_3
+entry:
+  invoke void @agnostic_za_call()
+          to label %exit unwind label %catch
+
+catch:
+  %eh_info = landingpad { ptr, i32 }
+          catch ptr null
+  %exception_ptr = extractvalue { ptr, i32 } %eh_info, 0
+  tail call ptr @__cxa_begin_catch(ptr %exception_ptr)
+  tail call void @__cxa_end_catch()
+  br label %exit
+
+exit:
+  ret void
+}
+
 declare ptr @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(ptr, ptr, ptr)
 declare ptr @__cxa_begin_catch(ptr)
@@ -742,3 +983,4 @@ declare void @may_throw()
 declare void @shared_za_call() "aarch64_inout_za"
 declare void @noexcept_shared_za_call() "aarch64_inout_za"
 declare void @shared_zt0_call() "aarch64_inout_zt0"
+declare void @agnostic_za_call() "aarch64_za_state_agnostic"
diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll
index 0f7e0c7..1325abf 100644
--- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll
@@ -95,3 +95,80 @@ declare i4 @llvm.ctpop.i4(i4) #1
 !6 = !{!"short", !7, i64 0}
 !7 = !{!"omnipotent char", !8, i64 0}
 !8 = !{!"Simple C/C++ TBAA"}
+
+; Function to lockdown changes for floating point vector comparisons
+define range(i32 0, 5) i32 @cols_needed(ptr %colauths){
+; POWERPC_64LE-LABEL: cols_needed:
+; POWERPC_64LE:       # %bb.0: # %entry
+; POWERPC_64LE-NEXT:    lxv vs0, 0(r3)
+; POWERPC_64LE-NEXT:    xxlxor vs1, vs1, vs1
+; POWERPC_64LE-NEXT:    li r4, 4
+; POWERPC_64LE-NEXT:    li r3, 0
+; POWERPC_64LE-NEXT:    xvcmpeqsp vs0, vs0, vs1
+; POWERPC_64LE-NEXT:    xxlnor v2, vs0, vs0
+; POWERPC_64LE-NEXT:    vextuwrx r4, r4, v2
+; POWERPC_64LE-NEXT:    vextuwrx r3, r3, v2
+; POWERPC_64LE-NEXT:    rlwinm r4, r4, 1, 30, 30
+; POWERPC_64LE-NEXT:    sub r3, r4, r3
+; POWERPC_64LE-NEXT:    mfvsrwz r4, v2
+; POWERPC_64LE-NEXT:    rlwinm r4, r4, 2, 29, 29
+; POWERPC_64LE-NEXT:    or r3, r3, r4
+; POWERPC_64LE-NEXT:    li r4, 12
+; POWERPC_64LE-NEXT:    vextuwrx r4, r4, v2
+; POWERPC_64LE-NEXT:    slwi r4, r4, 3
+; POWERPC_64LE-NEXT:    or r3, r3, r4
+; POWERPC_64LE-NEXT:    clrlwi r3, r3, 28
+; POWERPC_64LE-NEXT:    stb r3, -1(r1)
+; POWERPC_64LE-NEXT:    lbz r3, -1(r1)
+; POWERPC_64LE-NEXT:    popcntd r3, r3
+; POWERPC_64LE-NEXT:    blr
+;
+; POWERPC_64-LABEL: cols_needed:
+; POWERPC_64:       # %bb.0: # %entry
+; POWERPC_64-NEXT:    lxv vs0, 0(r3)
+; POWERPC_64-NEXT:    xxlxor vs1, vs1, vs1
+; POWERPC_64-NEXT:    li r4, 8
+; POWERPC_64-NEXT:    xvcmpeqsp vs0, vs0, vs1
+; POWERPC_64-NEXT:    xxlnor v2, vs0, vs0
+; POWERPC_64-NEXT:    vextuwlx r4, r4, v2
+; POWERPC_64-NEXT:    mfvsrwz r3, v2
+; POWERPC_64-NEXT:    rlwinm r4, r4, 1, 30, 30
+; POWERPC_64-NEXT:    rlwimi r4, r3, 2, 29, 29
+; POWERPC_64-NEXT:    li r3, 0
+; POWERPC_64-NEXT:    vextuwlx r3, r3, v2
+; POWERPC_64-NEXT:    rlwimi r4, r3, 3, 0, 28
+; POWERPC_64-NEXT:    li r3, 12
+; POWERPC_64-NEXT:    vextuwlx r3, r3, v2
+; POWERPC_64-NEXT:    sub r3, r4, r3
+; POWERPC_64-NEXT:    clrlwi r3, r3, 28
+; POWERPC_64-NEXT:    stb r3, -1(r1)
+; POWERPC_64-NEXT:    lbz r3, -1(r1)
+; POWERPC_64-NEXT:    popcntd r3, r3
+; POWERPC_64-NEXT:    blr
+;
+; POWERPC_32-LABEL: cols_needed:
+; POWERPC_32:       # %bb.0: # %entry
+; POWERPC_32-NEXT:    lxv vs0, 0(r3)
+; POWERPC_32-NEXT:    xxlxor vs1, vs1, vs1
+; POWERPC_32-NEXT:    xvcmpeqsp vs0, vs0, vs1
+; POWERPC_32-NEXT:    xxlnor vs0, vs0, vs0
+; POWERPC_32-NEXT:    stxv vs0, -32(r1)
+; POWERPC_32-NEXT:    lwz r3, -24(r1)
+; POWERPC_32-NEXT:    lwz r4, -28(r1)
+; POWERPC_32-NEXT:    rlwinm r3, r3, 1, 30, 30
+; POWERPC_32-NEXT:    rlwimi r3, r4, 2, 29, 29
+; POWERPC_32-NEXT:    lwz r4, -32(r1)
+; POWERPC_32-NEXT:    rlwimi r3, r4, 3, 0, 28
+; POWERPC_32-NEXT:    lwz r4, -20(r1)
+; POWERPC_32-NEXT:    sub r3, r3, r4
+; POWERPC_32-NEXT:    clrlwi r3, r3, 28
+; POWERPC_32-NEXT:    popcntw r3, r3
+; POWERPC_32-NEXT:    blr
+entry:
+  %0 = load <4 x float>, ptr %colauths, align 4, !tbaa !5
+  %1 = fcmp une <4 x float> %0, zeroinitializer
+  %2 = bitcast <4 x i1> %1 to i4
+  %3 = tail call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 %2)
+  %4 = zext nneg i4 %3 to i32
+  ret i32 %4
+}
diff --git a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
index 2a46a59..4f036d3 100644
--- a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
+++ b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
@@ -221,8 +221,8 @@ define i64 @test12(i64 %0) #0 {
 ;
 ; RV64-LABEL: test12:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    addiw a0, a0, -16
-; RV64-NEXT:    addi a0, a0, 13
+; RV64-NEXT:    addi a0, a0, -16
+; RV64-NEXT:    addiw a0, a0, 13
 ; RV64-NEXT:    seqz a0, a0
 ; RV64-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/i64-icmp.ll b/llvm/test/CodeGen/RISCV/i64-icmp.ll
index 88d989d..2742b9a 100644
--- a/llvm/test/CodeGen/RISCV/i64-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/i64-icmp.ll
@@ -708,8 +708,7 @@ define i64 @icmp_sle_constant_neg_2050(i64 %a) nounwind {
 define i64 @icmp_eq_zext_inreg_small_constant(i64 %a) nounwind {
 ; RV64I-LABEL: icmp_eq_zext_inreg_small_constant:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    addi a0, a0, -123
+; RV64I-NEXT:    addiw a0, a0, -123
 ; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    ret
   %1 = and i64 %a, 4294967295
@@ -748,8 +747,7 @@ define i64 @icmp_ne_zext_inreg_small_constant(i64 %a) nounwind {
 define i64 @icmp_ne_zext_inreg_large_constant(i64 %a) nounwind {
 ; RV64I-LABEL: icmp_ne_zext_inreg_large_constant:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    addi a0, a0, 2
+; RV64I-NEXT:    addiw a0, a0, 2
 ; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    ret
   %1 = and i64 %a, 4294967295
diff --git a/llvm/test/CodeGen/RISCV/min-max.ll b/llvm/test/CodeGen/RISCV/min-max.ll
index acde8ad..e7f6899 100644
--- a/llvm/test/CodeGen/RISCV/min-max.ll
+++ b/llvm/test/CodeGen/RISCV/min-max.ll
@@ -5,6 +5,12 @@
 ; RUN:   FileCheck %s --check-prefixes=ZBB,RV32ZBB
 ; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | \
 ; RUN:   FileCheck %s --check-prefixes=ZBB,RV64ZBB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s | \
+; RUN:   FileCheck %s --check-prefixes=XQCI
+; RUN: llc < %s -mtriple=riscv32 -mattr=+short-forward-branch-opt | \
+; RUN:   FileCheck %s --check-prefixes=RV32I-SFB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+short-forward-branch-opt | \
+; RUN:   FileCheck %s --check-prefixes=RV64I-SFB
 
 ; Basic tests.
 
@@ -23,6 +29,27 @@ define signext i8 @smax_i8(i8 signext %a, i8 signext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    max a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smax_i8:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvge a0, a1, a0, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smax_i8:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    blt a1, a0, .LBB0_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB0_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smax_i8:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    blt a1, a0, .LBB0_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB0_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i8 @llvm.smax.i8(i8 %a, i8 %b)
   ret i8 %c
 }
@@ -42,6 +69,27 @@ define signext i16 @smax_i16(i16 signext %a, i16 signext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    max a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smax_i16:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvge a0, a1, a0, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smax_i16:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    blt a1, a0, .LBB1_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB1_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smax_i16:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    blt a1, a0, .LBB1_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB1_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i16 @llvm.smax.i16(i16 %a, i16 %b)
   ret i16 %c
 }
@@ -61,6 +109,27 @@ define signext i32 @smax_i32(i32 signext %a, i32 signext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    max a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smax_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvge a0, a1, a0, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smax_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    blt a1, a0, .LBB2_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB2_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smax_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    blt a1, a0, .LBB2_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB2_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.smax.i32(i32 %a, i32 %b)
   ret i32 %c
 }
@@ -112,6 +181,41 @@ define i64 @smax_i64(i64 %a, i64 %b) {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    max a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smax_i64:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    sltu a4, a2, a0
+; XQCI-NEXT:    slt a5, a3, a1
+; XQCI-NEXT:    qc.mveq a5, a1, a3, a4
+; XQCI-NEXT:    qc.mveqi a0, a5, 0, a2
+; XQCI-NEXT:    qc.mveqi a1, a5, 0, a3
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smax_i64:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    sltu a4, a2, a0
+; RV32I-SFB-NEXT:    slt a5, a3, a1
+; RV32I-SFB-NEXT:    bne a1, a3, .LBB3_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a5, a4
+; RV32I-SFB-NEXT:  .LBB3_2:
+; RV32I-SFB-NEXT:    bnez a5, .LBB3_4
+; RV32I-SFB-NEXT:  # %bb.3:
+; RV32I-SFB-NEXT:    mv a0, a2
+; RV32I-SFB-NEXT:  .LBB3_4:
+; RV32I-SFB-NEXT:    bnez a5, .LBB3_6
+; RV32I-SFB-NEXT:  # %bb.5:
+; RV32I-SFB-NEXT:    mv a1, a3
+; RV32I-SFB-NEXT:  .LBB3_6:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smax_i64:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    blt a1, a0, .LBB3_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB3_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i64 @llvm.smax.i64(i64 %a, i64 %b)
   ret i64 %c
 }
@@ -131,6 +235,27 @@ define signext i8 @smin_i8(i8 signext %a, i8 signext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    min a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smin_i8:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvge a0, a0, a1, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smin_i8:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    blt a0, a1, .LBB4_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB4_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smin_i8:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    blt a0, a1, .LBB4_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB4_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i8 @llvm.smin.i8(i8 %a, i8 %b)
   ret i8 %c
 }
@@ -150,6 +275,27 @@ define signext i16 @smin_i16(i16 signext %a, i16 signext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    min a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smin_i16:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvge a0, a0, a1, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smin_i16:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    blt a0, a1, .LBB5_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB5_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smin_i16:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    blt a0, a1, .LBB5_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB5_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i16 @llvm.smin.i16(i16 %a, i16 %b)
   ret i16 %c
 }
@@ -169,6 +315,27 @@ define signext i32 @smin_i32(i32 signext %a, i32 signext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    min a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smin_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvge a0, a0, a1, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smin_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    blt a0, a1, .LBB6_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB6_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smin_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    blt a0, a1, .LBB6_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB6_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   ret i32 %c
 }
@@ -220,6 +387,41 @@ define i64 @smin_i64(i64 %a, i64 %b) {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    min a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smin_i64:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    sltu a4, a0, a2
+; XQCI-NEXT:    slt a5, a1, a3
+; XQCI-NEXT:    qc.mveq a5, a1, a3, a4
+; XQCI-NEXT:    qc.mveqi a0, a5, 0, a2
+; XQCI-NEXT:    qc.mveqi a1, a5, 0, a3
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smin_i64:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    sltu a4, a0, a2
+; RV32I-SFB-NEXT:    slt a5, a1, a3
+; RV32I-SFB-NEXT:    bne a1, a3, .LBB7_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a5, a4
+; RV32I-SFB-NEXT:  .LBB7_2:
+; RV32I-SFB-NEXT:    bnez a5, .LBB7_4
+; RV32I-SFB-NEXT:  # %bb.3:
+; RV32I-SFB-NEXT:    mv a0, a2
+; RV32I-SFB-NEXT:  .LBB7_4:
+; RV32I-SFB-NEXT:    bnez a5, .LBB7_6
+; RV32I-SFB-NEXT:  # %bb.5:
+; RV32I-SFB-NEXT:    mv a1, a3
+; RV32I-SFB-NEXT:  .LBB7_6:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smin_i64:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    blt a0, a1, .LBB7_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB7_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i64 @llvm.smin.i64(i64 %a, i64 %b)
   ret i64 %c
 }
@@ -239,6 +441,27 @@ define i8 @umax_i8(i8 zeroext %a, i8 zeroext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    maxu a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umax_i8:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvgeu a0, a1, a0, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umax_i8:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    bltu a1, a0, .LBB8_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB8_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umax_i8:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    bltu a1, a0, .LBB8_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB8_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i8 @llvm.umax.i8(i8 %a, i8 %b)
   ret i8 %c
 }
@@ -258,6 +481,27 @@ define i16 @umax_i16(i16 zeroext %a, i16 zeroext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    maxu a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umax_i16:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvgeu a0, a1, a0, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umax_i16:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    bltu a1, a0, .LBB9_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB9_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umax_i16:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    bltu a1, a0, .LBB9_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB9_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i16 @llvm.umax.i16(i16 %a, i16 %b)
   ret i16 %c
 }
@@ -277,6 +521,27 @@ define signext i32 @umax_i32(i32 signext %a, i32 signext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    maxu a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umax_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvgeu a0, a1, a0, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umax_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    bltu a1, a0, .LBB10_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB10_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umax_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    bltu a1, a0, .LBB10_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB10_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.umax.i32(i32 %a, i32 %b)
   ret i32 %c
 }
@@ -328,6 +593,41 @@ define i64 @umax_i64(i64 %a, i64 %b) {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    maxu a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umax_i64:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    sltu a4, a2, a0
+; XQCI-NEXT:    sltu a5, a3, a1
+; XQCI-NEXT:    qc.mveq a5, a1, a3, a4
+; XQCI-NEXT:    qc.mveqi a0, a5, 0, a2
+; XQCI-NEXT:    qc.mveqi a1, a5, 0, a3
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umax_i64:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    sltu a4, a2, a0
+; RV32I-SFB-NEXT:    sltu a5, a3, a1
+; RV32I-SFB-NEXT:    bne a1, a3, .LBB11_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a5, a4
+; RV32I-SFB-NEXT:  .LBB11_2:
+; RV32I-SFB-NEXT:    bnez a5, .LBB11_4
+; RV32I-SFB-NEXT:  # %bb.3:
+; RV32I-SFB-NEXT:    mv a0, a2
+; RV32I-SFB-NEXT:  .LBB11_4:
+; RV32I-SFB-NEXT:    bnez a5, .LBB11_6
+; RV32I-SFB-NEXT:  # %bb.5:
+; RV32I-SFB-NEXT:    mv a1, a3
+; RV32I-SFB-NEXT:  .LBB11_6:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umax_i64:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    bltu a1, a0, .LBB11_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB11_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i64 @llvm.umax.i64(i64 %a, i64 %b)
   ret i64 %c
 }
@@ -347,6 +647,27 @@ define zeroext i8 @umin_i8(i8 zeroext %a, i8 zeroext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    minu a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umin_i8:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvgeu a0, a0, a1, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umin_i8:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    bltu a0, a1, .LBB12_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB12_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umin_i8:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    bltu a0, a1, .LBB12_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB12_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i8 @llvm.umin.i8(i8 %a, i8 %b)
   ret i8 %c
 }
@@ -366,6 +687,27 @@ define zeroext i16 @umin_i16(i16 zeroext %a, i16 zeroext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    minu a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umin_i16:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvgeu a0, a0, a1, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umin_i16:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    bltu a0, a1, .LBB13_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB13_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umin_i16:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    bltu a0, a1, .LBB13_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB13_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i16 @llvm.umin.i16(i16 %a, i16 %b)
   ret i16 %c
 }
@@ -385,6 +727,27 @@ define signext i32 @umin_i32(i32 signext %a, i32 signext %b) {
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    minu a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umin_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.mvgeu a0, a0, a1, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umin_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    bltu a0, a1, .LBB14_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB14_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umin_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    bltu a0, a1, .LBB14_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB14_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.umin.i32(i32 %a, i32 %b)
   ret i32 %c
 }
@@ -436,6 +799,41 @@ define i64 @umin_i64(i64 %a, i64 %b) {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    minu a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umin_i64:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    sltu a4, a0, a2
+; XQCI-NEXT:    sltu a5, a1, a3
+; XQCI-NEXT:    qc.mveq a5, a1, a3, a4
+; XQCI-NEXT:    qc.mveqi a0, a5, 0, a2
+; XQCI-NEXT:    qc.mveqi a1, a5, 0, a3
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umin_i64:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    sltu a4, a0, a2
+; RV32I-SFB-NEXT:    sltu a5, a1, a3
+; RV32I-SFB-NEXT:    bne a1, a3, .LBB15_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a5, a4
+; RV32I-SFB-NEXT:  .LBB15_2:
+; RV32I-SFB-NEXT:    bnez a5, .LBB15_4
+; RV32I-SFB-NEXT:  # %bb.3:
+; RV32I-SFB-NEXT:    mv a0, a2
+; RV32I-SFB-NEXT:  .LBB15_4:
+; RV32I-SFB-NEXT:    bnez a5, .LBB15_6
+; RV32I-SFB-NEXT:  # %bb.5:
+; RV32I-SFB-NEXT:    mv a1, a3
+; RV32I-SFB-NEXT:  .LBB15_6:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umin_i64:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    bltu a0, a1, .LBB15_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB15_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i64 @llvm.umin.i64(i64 %a, i64 %b)
   ret i64 %c
 }
@@ -450,6 +848,18 @@ define signext i32 @smin_same_op_i32(i32 signext %a) {
 ; ZBB-LABEL: smin_same_op_i32:
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smin_same_op_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smin_same_op_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smin_same_op_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.smin.i32(i32 %a, i32 %a)
   ret i32 %c
 }
@@ -462,6 +872,18 @@ define signext i32 @smax_same_op_i32(i32 signext %a) {
 ; ZBB-LABEL: smax_same_op_i32:
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smax_same_op_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smax_same_op_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smax_same_op_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.smax.i32(i32 %a, i32 %a)
   ret i32 %c
 }
@@ -474,6 +896,18 @@ define signext i32 @umin_same_op_i32(i32 signext %a) {
 ; ZBB-LABEL: umin_same_op_i32:
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umin_same_op_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umin_same_op_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umin_same_op_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.umin.i32(i32 %a, i32 %a)
   ret i32 %c
 }
@@ -486,6 +920,18 @@ define signext i32 @umax_same_op_i32(i32 signext %a) {
 ; ZBB-LABEL: umax_same_op_i32:
 ; ZBB:       # %bb.0:
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umax_same_op_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umax_same_op_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umax_same_op_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.umax.i32(i32 %a, i32 %a)
   ret i32 %c
 }
@@ -510,6 +956,19 @@ define signext i32 @smin_undef_i32() {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    li a0, 0
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smin_undef_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smin_undef_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smin_undef_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    li a0, 0
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.smin.i32(i32 undef, i32 undef)
   ret i32 %c
 }
@@ -532,6 +991,19 @@ define signext i32 @smax_undef_i32() {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    li a0, 0
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smax_undef_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smax_undef_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smax_undef_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    li a0, 0
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.smax.i32(i32 undef, i32 undef)
   ret i32 %c
 }
@@ -554,6 +1026,19 @@ define signext i32 @umin_undef_i32() {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    li a0, 0
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umin_undef_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umin_undef_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umin_undef_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    li a0, 0
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.umin.i32(i32 undef, i32 undef)
   ret i32 %c
 }
@@ -576,6 +1061,19 @@ define signext i32 @umax_undef_i32() {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    li a0, 0
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umax_undef_i32:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umax_undef_i32:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umax_undef_i32:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    li a0, 0
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.umax.i32(i32 undef, i32 undef)
   ret i32 %c
 }
@@ -595,6 +1093,29 @@ define signext i32 @smax_i32_pos_constant(i32 signext %a) {
 ; ZBB-NEXT:    li a1, 10
 ; ZBB-NEXT:    max a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smax_i32_pos_constant:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.lilti a0, a0, 11, 10
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smax_i32_pos_constant:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    li a1, 10
+; RV32I-SFB-NEXT:    blt a1, a0, .LBB24_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB24_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smax_i32_pos_constant:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    li a1, 10
+; RV64I-SFB-NEXT:    blt a1, a0, .LBB24_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB24_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.smax.i32(i32 %a, i32 10)
   ret i32 %c
 }
@@ -616,6 +1137,33 @@ define signext i32 @smax_i32_pos_constant_trailing_zeros(i32 signext %a) {
 ; ZBB-NEXT:    li a1, 16
 ; ZBB-NEXT:    max a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smax_i32_pos_constant_trailing_zeros:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    andi a1, a0, -8
+; XQCI-NEXT:    li a0, 16
+; XQCI-NEXT:    qc.mvlt a0, a0, a1, a1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smax_i32_pos_constant_trailing_zeros:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    andi a1, a0, -8
+; RV32I-SFB-NEXT:    li a0, 16
+; RV32I-SFB-NEXT:    bge a0, a1, .LBB25_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB25_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smax_i32_pos_constant_trailing_zeros:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    andi a1, a0, -8
+; RV64I-SFB-NEXT:    li a0, 16
+; RV64I-SFB-NEXT:    bge a0, a1, .LBB25_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB25_2:
+; RV64I-SFB-NEXT:    ret
   %b = and i32 %a, -8
   %c = call i32 @llvm.smax.i32(i32 %b, i32 16)
   %d = and i32 %c, -4
@@ -635,6 +1183,29 @@ define signext i32 @smin_i32_negone(i32 signext %a) {
 ; ZBB-NEXT:    li a1, -1
 ; ZBB-NEXT:    min a0, a0, a1
 ; ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smin_i32_negone:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.ligei a0, a0, 0, -1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smin_i32_negone:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    li a1, -1
+; RV32I-SFB-NEXT:    bltz a0, .LBB26_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a1
+; RV32I-SFB-NEXT:  .LBB26_2:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smin_i32_negone:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    li a1, -1
+; RV64I-SFB-NEXT:    bltz a0, .LBB26_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB26_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i32 @llvm.smin.i32(i32 %a, i32 -1)
   ret i32 %c
 }
@@ -672,6 +1243,34 @@ define i64 @smin_i64_negone(i64 %a) {
 ; RV64ZBB-NEXT:    li a1, -1
 ; RV64ZBB-NEXT:    min a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: smin_i64_negone:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    qc.ligei a0, a1, 0, -1
+; XQCI-NEXT:    qc.ligei a1, a1, 0, -1
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: smin_i64_negone:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    li a2, -1
+; RV32I-SFB-NEXT:    bltz a1, .LBB27_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a0, a2
+; RV32I-SFB-NEXT:  .LBB27_2:
+; RV32I-SFB-NEXT:    bltz a1, .LBB27_4
+; RV32I-SFB-NEXT:  # %bb.3:
+; RV32I-SFB-NEXT:    mv a1, a2
+; RV32I-SFB-NEXT:  .LBB27_4:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: smin_i64_negone:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    li a1, -1
+; RV64I-SFB-NEXT:    bltz a0, .LBB27_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB27_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i64 @llvm.smin.i64(i64 %a, i64 -1)
   ret i64 %c
 }
@@ -720,6 +1319,41 @@ define i64 @umax_i64_one(i64 %a, i64 %b) {
 ; RV64ZBB-NEXT:    li a1, 1
 ; RV64ZBB-NEXT:    maxu a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; XQCI-LABEL: umax_i64_one:
+; XQCI:       # %bb.0:
+; XQCI-NEXT:    mv a2, a1
+; XQCI-NEXT:    qc.selectinei a2, 0, a0, 1
+; XQCI-NEXT:    qc.liltui a0, a0, 2, 1
+; XQCI-NEXT:    qc.mvnei a0, a1, 0, a2
+; XQCI-NEXT:    ret
+;
+; RV32I-SFB-LABEL: umax_i64_one:
+; RV32I-SFB:       # %bb.0:
+; RV32I-SFB-NEXT:    li a2, 1
+; RV32I-SFB-NEXT:    li a3, 1
+; RV32I-SFB-NEXT:    beqz a1, .LBB28_2
+; RV32I-SFB-NEXT:  # %bb.1:
+; RV32I-SFB-NEXT:    mv a3, a0
+; RV32I-SFB-NEXT:  .LBB28_2:
+; RV32I-SFB-NEXT:    bnez a0, .LBB28_4
+; RV32I-SFB-NEXT:  # %bb.3:
+; RV32I-SFB-NEXT:    mv a0, a2
+; RV32I-SFB-NEXT:  .LBB28_4:
+; RV32I-SFB-NEXT:    beqz a1, .LBB28_6
+; RV32I-SFB-NEXT:  # %bb.5:
+; RV32I-SFB-NEXT:    mv a0, a3
+; RV32I-SFB-NEXT:  .LBB28_6:
+; RV32I-SFB-NEXT:    ret
+;
+; RV64I-SFB-LABEL: umax_i64_one:
+; RV64I-SFB:       # %bb.0:
+; RV64I-SFB-NEXT:    li a1, 1
+; RV64I-SFB-NEXT:    bnez a0, .LBB28_2
+; RV64I-SFB-NEXT:  # %bb.1:
+; RV64I-SFB-NEXT:    mv a0, a1
+; RV64I-SFB-NEXT:  .LBB28_2:
+; RV64I-SFB-NEXT:    ret
   %c = call i64 @llvm.umax.i64(i64 %a, i64 1)
   ret i64 %c
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll b/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll
new file mode 100644
index 0000000..d9a49a1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+v -O0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-RV64
+
+define void @matmul() {
+; CHECK-RV64-LABEL: matmul:
+; CHECK-RV64:       # %bb.0: # %entry
+; CHECK-RV64-NEXT:    li a0, 0
+; CHECK-RV64-NEXT:    vsetvli zero, a0, 512
+; CHECK-RV64-NEXT:    sf.vsettm zero, a0
+; CHECK-RV64-NEXT:    sf.vtzero.t mt0
+; CHECK-RV64-NEXT:    ret
+entry:
+  call void @llvm.riscv.sf.vtzero.t.i64(i64 0, i64 0, i64 0, i64 3, i64 1)
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare void @llvm.riscv.sf.vtzero.t.i64(i64 immarg, i64, i64, i64 immarg, i64 immarg) #0
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll
new file mode 100644
index 0000000..9b9a849
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.e4m3.e4m3.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.e4m3.e4m3 mt0, v8, v16
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.mm.e4m3.e4m3.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+   ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll
new file mode 100644
index 0000000..b63974f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.e4m3.e5m2.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.e4m3.e5m2 mt0, v8, v16
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.mm.e4m3.e5m2.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+   ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll
new file mode 100644
index 0000000..62d629b1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.e5m2.e4m3.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.e5m2.e4m3 mt0, v8, v16
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.mm.e5m2.e4m3.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+   ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll
new file mode 100644
index 0000000..7a90c97
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.e5m2.e5m2.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.e5m2.e5m2 mt0, v8, v16
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.mm.e5m2.e5m2.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+   ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll
new file mode 100644
index 0000000..29451c6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+xsfmm32a32f -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+xsfmm32a32f -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv32f16(iXLen, <vscale x 32 x half>, <vscale x 32 x half>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_f_f_w2_f16m8(iXLen %mtd, <vscale x 32 x half> %v1, <vscale x 32 x half> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_f_f_w2_f16m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e16, w2
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.f.f mt0, v8, v16
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.mm.f.f.iXLen.nxv32f16(iXLen 0, <vscale x 32 x half> %v1, <vscale x 32 x half> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 2)
+    ret void
+}
+
+declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv16f32(iXLen, <vscale x 16 x float>, <vscale x 16 x float>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_f_f_w1_f32m8(iXLen %mtd, <vscale x 16 x float> %v1, <vscale x 16 x float> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_f_f_w1_f32m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e32, w1
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.f.f mt0, v8, v16
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.mm.f.f.iXLen.nxv16f32(iXLen 0, <vscale x 16 x float> %v1, <vscale x 16 x float> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 1)
+    ret void
+}
+
+declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv8f64(iXLen, <vscale x 8 x double>, <vscale x 8 x double>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_f_f_w1_f64m8(iXLen %mtd, <vscale x 8 x double> %v1, <vscale x 8 x double> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_f_f_w1_f64m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e64, w1
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.f.f mt0, v8, v16
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.mm.f.f.iXLen.nxv8f64(iXLen 0, <vscale x 8 x double> %v1, <vscale x 8 x double> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 1)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll
new file mode 100644
index 0000000..6a4b29f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.s.s.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_s_s_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_s_s_w4_i8m8_i8m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.s.s mt0, v8, v16
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.mm.s.s.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll
new file mode 100644
index 0000000..79239b0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.s.u.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_s_u_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_s_u_w4_i8m8_i8m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.s.u mt0, v8, v16
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.mm.s.u.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll
new file mode 100644
index 0000000..b0d039b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.u.s.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_u_s_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_u_s_w4_i8m8_i8m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.u.s mt0, v8, v16
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.mm.u.s.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll
new file mode 100644
index 0000000..913c277
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.u.u.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_u_u_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_u_u_w4_i8m8_i8m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT:    sf.vsettm zero, a1
+; CHECK-NEXT:    sf.vsettk zero, a3
+; CHECK-NEXT:    sf.mm.u.u mt0, v8, v16
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.mm.u.u.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll
new file mode 100644
index 0000000..8048dec
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vlte16.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vlte16(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vlte16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e16, w1
+; CHECK-NEXT:    sf.vlte16 a0, (a1)
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vlte16.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll
new file mode 100644
index 0000000..a526dc8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vlte32.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vlte32(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vlte32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e32, w1
+; CHECK-NEXT:    sf.vlte32 a0, (a1)
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vlte32.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll
new file mode 100644
index 0000000..ed0c48a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vlte64.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vlte64(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vlte64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e64, w1
+; CHECK-NEXT:    sf.vlte64 a0, (a1)
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vlte64.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll
new file mode 100644
index 0000000..67b3ed2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vlte8.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vlte8(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vlte8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w1
+; CHECK-NEXT:    sf.vlte8 a0, (a1)
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vlte8.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll
new file mode 100644
index 0000000..4da37fa
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare iXLen @llvm.riscv.sf.vsettk.iXLen(iXLen, iXLen, iXLen)
+
+define iXLen @test_sf_vsettk(iXLen %tk) {
+; CHECK-LABEL: test_sf_vsettk:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt a1, zero, e16, w2
+; CHECK-NEXT:    sf.vsettk a0, a0
+; CHECK-NEXT:    ret
+  entry:
+    %0 = call iXLen @llvm.riscv.sf.vsettk.iXLen(iXLen %tk, iXLen 1, iXLen 2)
+    ret iXLen %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll
new file mode 100644
index 0000000..143c26c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare iXLen @llvm.riscv.sf.vsettm.iXLen(iXLen, iXLen, iXLen)
+
+define iXLen @test_sf_vsettm(iXLen %tm) {
+; CHECK-LABEL: test_sf_vsettm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt a1, zero, e8, w4
+; CHECK-NEXT:    sf.vsettm a0, a0
+; CHECK-NEXT:    ret
+  entry:
+    %0 = call iXLen @llvm.riscv.sf.vsettm.iXLen(iXLen %tm, iXLen 0, iXLen 3)
+    ret iXLen %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll
new file mode 100644
index 0000000..48fa1bc8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen, iXLen, iXLen)
+
+define iXLen @test_sf_vsettnt_e8w1(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e8w1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt a0, a0, e8, w1
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 1)
+   ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e8w2(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e8w2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt a0, a0, e8, w2
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 2)
+   ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e8w4(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e8w4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt a0, a0, e8, w4
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 3)
+   ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e16w1(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e16w1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt a0, a0, e16, w1
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 1)
+   ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e16w2(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e16w2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt a0, a0, e16, w2
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 2)
+   ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e16w4(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e16w4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt a0, a0, e16, w4
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 3)
+   ret iXLen %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll
new file mode 100644
index 0000000..7a76151
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vste16.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vste16(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vste16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e16, w1
+; CHECK-NEXT:    sf.vste16 a0, (a1)
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vste16.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll
new file mode 100644
index 0000000..8ff6e6a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vste32.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vste32(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vste32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e32, w1
+; CHECK-NEXT:    sf.vste32 a0, (a1)
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vste32.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll
new file mode 100644
index 0000000..53990e4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vste64.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vste64(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vste64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e64, w1
+; CHECK-NEXT:    sf.vste64 a0, (a1)
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vste64.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll
new file mode 100644
index 0000000..09b7259
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vste8.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vste8(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vste8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a2, e8, w1
+; CHECK-NEXT:    sf.vste8 a0, (a1)
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vste8.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll
new file mode 100644
index 0000000..394eb60
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vtdiscard()
+
+define dso_local void @test_sf_vtdiscard() {
+; CHECK-LABEL: test_sf_vtdiscard:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vtdiscard
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vtdiscard()
+    ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll
new file mode 100644
index 0000000..66c9d26
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv32bf16.iXLen(iXLen, <vscale x 32 x bfloat>, iXLen)
+
+define void @test_sf_vtmv_t_v_bf16m8(iXLen %tss, <vscale x 32 x bfloat> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_bf16m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT:    sf.vtmv.t.v a0, v8
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.vtmv.t.v.nxv32bf16.iXLen(iXLen %tss, <vscale x 32 x bfloat> %src, iXLen %vl)
+   ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv32f16.iXLen(iXLen, <vscale x 32 x half>, iXLen)
+
+define void @test_sf_vtmv_t_v_f16(iXLen %tss, <vscale x 32 x half> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT:    sf.vtmv.t.v a0, v8
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.vtmv.t.v.nxv32f16.iXLen(iXLen %tss, <vscale x 32 x half> %src, iXLen %vl)
+   ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv16f32.iXLen(iXLen, <vscale x 16 x float>, iXLen)
+
+define void @test_sf_vtmv_t_v_f32(iXLen %tss, <vscale x 16 x float> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e32, w1
+; CHECK-NEXT:    sf.vtmv.t.v a0, v8
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.vtmv.t.v.nxv16f32.iXLen(iXLen %tss, <vscale x 16 x float> %src, iXLen %vl)
+   ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv8f64.iXLen(iXLen, <vscale x 8 x double>, iXLen)
+
+define void @test_sf_vtmv_t_v_f64(iXLen %tss, <vscale x 8 x double> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e64, w1
+; CHECK-NEXT:    sf.vtmv.t.v a0, v8
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.vtmv.t.v.nxv8f64.iXLen(iXLen %tss, <vscale x 8 x double> %src, iXLen %vl)
+   ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv64i8.iXLen(iXLen, <vscale x 64 x i8>, iXLen)
+
+define void @test_sf_vtmv_t_v_i8(iXLen %tss, <vscale x 64 x i8> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e8, w1
+; CHECK-NEXT:    sf.vtmv.t.v a0, v8
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.vtmv.t.v.nxv64i8.iXLen(iXLen %tss, <vscale x 64 x i8> %src, iXLen %vl)
+   ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv32i16.iXLen(iXLen, <vscale x 32 x i16>, iXLen)
+
+define void @test_sf_vtmv_t_v_i16(iXLen %tss, <vscale x 32 x i16> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT:    sf.vtmv.t.v a0, v8
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.vtmv.t.v.nxv32i16.iXLen(iXLen %tss, <vscale x 32 x i16> %src, iXLen %vl)
+   ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv16i32.iXLen(iXLen, <vscale x 16 x i32>, iXLen)
+
+define void @test_sf_vtmv_t_v_i32(iXLen %tss, <vscale x 16 x i32> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e32, w1
+; CHECK-NEXT:    sf.vtmv.t.v a0, v8
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.vtmv.t.v.nxv16i32.iXLen(iXLen %tss, <vscale x 16 x i32> %src, iXLen %vl)
+   ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv8i64.iXLen(iXLen, <vscale x 8 x i64>, iXLen)
+
+define void @test_sf_vtmv_t_v_i64(iXLen %tss, <vscale x 8 x i64> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e64, w1
+; CHECK-NEXT:    sf.vtmv.t.v a0, v8
+; CHECK-NEXT:    ret
+ entry:
+   call void @llvm.riscv.sf.vtmv.t.v.nxv8i64.iXLen(iXLen %tss, <vscale x 8 x i64> %src, iXLen %vl)
+   ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll
new file mode 100644
index 0000000..0dcc2ab
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare <vscale x 32 x bfloat> @llvm.riscv.sf.vtmv.v.t.nxv32bf16.iXLen(iXLen, iXLen)
+
+define <vscale x 32 x bfloat> @test_sf_vtmv_v_t_bf16m8(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_bf16m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT:    sf.vtmv.v.t v8, a0
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call <vscale x 32 x bfloat> @llvm.riscv.sf.vtmv.v.t.nxv32bf16.iXLen(iXLen %tss, iXLen %vl)
+   ret <vscale x 32 x bfloat> %0
+}
+
+declare <vscale x 32 x half> @llvm.riscv.sf.vtmv.v.t.nxv32f16.iXLen(iXLen, iXLen)
+
+define <vscale x 32 x half> @test_sf_vtmv_v_t_f16(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT:    sf.vtmv.v.t v8, a0
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call <vscale x 32 x half> @llvm.riscv.sf.vtmv.v.t.nxv32f16.iXLen(iXLen %tss, iXLen %vl)
+   ret <vscale x 32 x half> %0
+}
+
+declare <vscale x 16 x float> @llvm.riscv.sf.vtmv.v.t.nxv16f32.iXLen(iXLen, iXLen)
+
+define <vscale x 16 x float> @test_sf_vtmv_v_t_f32(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e32, w1
+; CHECK-NEXT:    sf.vtmv.v.t v8, a0
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call <vscale x 16 x float> @llvm.riscv.sf.vtmv.v.t.nxv16f32.iXLen(iXLen %tss, iXLen %vl)
+   ret <vscale x 16 x float> %0
+}
+
+declare <vscale x 8 x double> @llvm.riscv.sf.vtmv.v.t.nxv8f64.iXLen(iXLen, iXLen)
+
+define <vscale x 8 x double> @test_sf_vtmv_v_t_f64(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e64, w1
+; CHECK-NEXT:    sf.vtmv.v.t v8, a0
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call <vscale x 8 x double> @llvm.riscv.sf.vtmv.v.t.nxv8f64.iXLen(iXLen %tss, iXLen %vl)
+   ret <vscale x 8 x double> %0
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.iXLen(iXLen, iXLen)
+
+define <vscale x 64 x i8> @test_sf_vtmv_v_t_i8(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e8, w1
+; CHECK-NEXT:    sf.vtmv.v.t v8, a0
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.iXLen(iXLen %tss, iXLen %vl)
+   ret <vscale x 64 x i8> %0
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.sf.vtmv.v.t.nxv32i16.iXLen(iXLen, iXLen)
+
+define <vscale x 32 x i16> @test_sf_vtmv_v_t_i16(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT:    sf.vtmv.v.t v8, a0
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call <vscale x 32 x i16> @llvm.riscv.sf.vtmv.v.t.nxv32i16.iXLen(iXLen %tss, iXLen %vl)
+   ret <vscale x 32 x i16> %0
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.sf.vtmv.v.t.nxv16i32.iXLen(iXLen, iXLen)
+
+define <vscale x 16 x i32> @test_sf_vtmv_v_t_i32(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e32, w1
+; CHECK-NEXT:    sf.vtmv.v.t v8, a0
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call <vscale x 16 x i32> @llvm.riscv.sf.vtmv.v.t.nxv16i32.iXLen(iXLen %tss, iXLen %vl)
+   ret <vscale x 16 x i32> %0
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.sf.vtmv.v.t.nxv8i64.iXLen(iXLen, iXLen)
+
+define <vscale x 8 x i64> @test_sf_vtmv_v_t_i64(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e64, w1
+; CHECK-NEXT:    sf.vtmv.v.t v8, a0
+; CHECK-NEXT:    ret
+ entry:
+   %0 = call <vscale x 8 x i64> @llvm.riscv.sf.vtmv.v.t.nxv8i64.iXLen(iXLen %tss, iXLen %vl)
+   ret <vscale x 8 x i64> %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll
new file mode 100644
index 0000000..bbccb02
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN:   -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vtzero.t.iXLen(iXLen, iXLen, iXLen, iXLen, iXLen)
+define void @test_sf_vtzero_t(iXLen %tm, iXLen %tn) {
+; CHECK-LABEL: test_sf_vtzero_t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sf.vsettnt zero, a1, e8, w4
+; CHECK-NEXT:    sf.vsettm zero, a0
+; CHECK-NEXT:    sf.vtzero.t mt0
+; CHECK-NEXT:    ret
+  entry:
+    call void @llvm.riscv.sf.vtzero.t.iXLen(iXLen 0, iXLen %tm, iXLen %tn, iXLen 3, iXLen 4)
+    ret void
+}
+
diff --git a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
index 2f03ff9..318268a 100644
--- a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
+++ b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
@@ -15,8 +15,7 @@ define i32 @from_cmpeq(i32 %xx, i32 %y) {
 ;
 ; RV64I-LABEL: from_cmpeq:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    addi a0, a0, -9
+; RV64I-NEXT:    addiw a0, a0, -9
 ; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    ret
@@ -39,8 +38,7 @@ define i32 @from_cmpeq_fail_bad_andmask(i32 %xx, i32 %y) {
 ;
 ; RV64I-LABEL: from_cmpeq_fail_bad_andmask:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    addi a0, a0, -9
+; RV64I-NEXT:    addiw a0, a0, -9
 ; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/setcc-logic.ll b/llvm/test/CodeGen/RISCV/setcc-logic.ll
index fabb573..4e14893 100644
--- a/llvm/test/CodeGen/RISCV/setcc-logic.ll
+++ b/llvm/test/CodeGen/RISCV/setcc-logic.ll
@@ -104,9 +104,8 @@ define i1 @and_icmps_const_not1bit_diff(i32 %x) nounwind {
 ;
 ; RV64I-LABEL: and_icmps_const_not1bit_diff:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    addi a1, a0, -44
-; RV64I-NEXT:    addi a0, a0, -92
+; RV64I-NEXT:    addiw a1, a0, -44
+; RV64I-NEXT:    addiw a0, a0, -92
 ; RV64I-NEXT:    snez a1, a1
 ; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    and a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
index bdbe4ed..07bfbe6 100644
--- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
+++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
@@ -674,8 +674,7 @@ define i32 @sext_of_not_cmp_i32(i32 %x) {
 ;
 ; RV64-LABEL: sext_of_not_cmp_i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    sext.w a0, a0
-; RV64-NEXT:    addi a0, a0, -7
+; RV64-NEXT:    addiw a0, a0, -7
 ; RV64-NEXT:    seqz a0, a0
 ; RV64-NEXT:    addi a0, a0, -1
 ; RV64-NEXT:    ret
@@ -718,8 +717,7 @@ define i32 @dec_of_zexted_cmp_i32(i32 %x) {
 ;
 ; RV64-LABEL: dec_of_zexted_cmp_i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    sext.w a0, a0
-; RV64-NEXT:    addi a0, a0, -7
+; RV64-NEXT:    addiw a0, a0, -7
 ; RV64-NEXT:    seqz a0, a0
 ; RV64-NEXT:    addi a0, a0, -1
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index 2751332c..bf6802d 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -1047,8 +1047,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) {
 ; RV64-LABEL: usubo.i32.constant.lhs:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    li a2, -2
-; RV64-NEXT:    subw a2, a2, a0
-; RV64-NEXT:    addi a0, a2, 1
+; RV64-NEXT:    sub a2, a2, a0
+; RV64-NEXT:    addiw a0, a2, 1
 ; RV64-NEXT:    seqz a0, a0
 ; RV64-NEXT:    sw a2, 0(a1)
 ; RV64-NEXT:    ret
@@ -1065,8 +1065,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) {
 ; RV64ZBA-LABEL: usubo.i32.constant.lhs:
 ; RV64ZBA:       # %bb.0: # %entry
 ; RV64ZBA-NEXT:    li a2, -2
-; RV64ZBA-NEXT:    subw a2, a2, a0
-; RV64ZBA-NEXT:    addi a0, a2, 1
+; RV64ZBA-NEXT:    sub a2, a2, a0
+; RV64ZBA-NEXT:    addiw a0, a2, 1
 ; RV64ZBA-NEXT:    seqz a0, a0
 ; RV64ZBA-NEXT:    sw a2, 0(a1)
 ; RV64ZBA-NEXT:    ret
@@ -1083,8 +1083,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) {
 ; RV64ZICOND-LABEL: usubo.i32.constant.lhs:
 ; RV64ZICOND:       # %bb.0: # %entry
 ; RV64ZICOND-NEXT:    li a2, -2
-; RV64ZICOND-NEXT:    subw a2, a2, a0
-; RV64ZICOND-NEXT:    addi a0, a2, 1
+; RV64ZICOND-NEXT:    sub a2, a2, a0
+; RV64ZICOND-NEXT:    addiw a0, a2, 1
 ; RV64ZICOND-NEXT:    seqz a0, a0
 ; RV64ZICOND-NEXT:    sw a2, 0(a1)
 ; RV64ZICOND-NEXT:    ret
diff --git a/llvm/test/CodeGen/SPIRV/llvm-compiler-used.ll b/llvm/test/CodeGen/SPIRV/llvm-compiler-used.ll
new file mode 100644
index 0000000..ddc2585
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/llvm-compiler-used.ll
@@ -0,0 +1,19 @@
+; RUN: llc -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: llc -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; Verify that llvm.compiler.used is not lowered.
+; CHECK: OpName %{{[0-9]+}} "unused"
+; CHECK-NOT: OpName %{{[0-9]+}} "llvm.compiler.used"
+
+; Check that the type of llvm.compiler.used is not emitted too.
+; CHECK-NOT: OpTypeArray
+
+@unused = private addrspace(3) global i32 0
+@llvm.compiler.used = appending addrspace(2) global [1 x ptr addrspace (4)] [ptr addrspace(4) addrspacecast (ptr addrspace(3) @unused to ptr addrspace(4))]
+
+define spir_func void @foo() {
+entry:
+  ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll b/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll
index c6ee804..07fbed9 100644
--- a/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll
@@ -90,7 +90,7 @@ define i32 @test_tbegin_nofloat4(i32 %pad, ptr %ptr) {
 ; CHECK: tbegin 0, 65292
 ; CHECK: ipm %r2
 ; CHECK: srl %r2, 28
-; CHECK: ciblh %r2, 2, 0(%r14)
+; CHECK: bnhr %r14
 ; CHECK: mvhi 0(%r3), 0
 ; CHECK: br %r14
   %res = call i32 @llvm.s390.tbegin.nofloat(ptr null, i32 65292)
@@ -219,7 +219,7 @@ define i32 @test_tend2(i32 %pad, ptr %ptr) {
 ; CHECK: tend
 ; CHECK: ipm %r2
 ; CHECK: srl %r2, 28
-; CHECK: ciblh %r2, 2, 0(%r14)
+; CHECK: bnhr %r14
 ; CHECK: mvhi 0(%r3), 0
 ; CHECK: br %r14
   %res = call i32 @llvm.s390.tend()
diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll
new file mode 100644
index 0000000..6b8746e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll
@@ -0,0 +1,738 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -O2 | FileCheck %s
+; Test implementation of combining br_ccmask for flag output operand, and
+; optimizing ipm sequence using conditional branches.
+
+declare void @dummy()
+
+; Check a case where the cc is used as an integer.
+; Just (srl (ipm)) sequence without optimization.
+define i32 @test(ptr %a) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    ipm %r2
+; CHECK-NEXT:    srl %r2, 28
+; CHECK-NEXT:    br %r14
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  ret i32 %cc
+}
+
+; Test-1(f1_0_*). Test all 14 valid combinations, where cc is being used for
+; branching.
+
+; Check (cc == 0).
+define void @f1_0_eq_0(ptr %a) {
+; CHECK-LABEL: f1_0_eq_0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jge dummy@PLT
+; CHECK-NEXT:  .LBB1_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp eq i32 %cc, 0
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc != 0).
+define void @f1_0_ne_0(ptr %a) {
+; CHECK-LABEL: f1_0_ne_0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgne dummy@PLT
+; CHECK-NEXT:  .LBB2_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp ugt i32 %cc, 0
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc == 1).
+define void @f1_0_eq_1(ptr %a) {
+; CHECK-LABEL: f1_0_eq_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgl dummy@PLT
+; CHECK-NEXT:  .LBB3_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp eq i32 %cc, 1
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc != 1).
+define void @f1_0_ne_1(ptr %a) {
+; CHECK-LABEL: f1_0_ne_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnl dummy@PLT
+; CHECK-NEXT:  .LBB4_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp ne i32 %cc, 1
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc == 2).
+define void @f1_0_eq_2(ptr %a) {
+; CHECK-LABEL: f1_0_eq_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgh dummy@PLT
+; CHECK-NEXT:  .LBB5_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp eq i32 %cc, 2
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc != 2).
+define void @f1_0_ne_2(ptr %a) {
+; CHECK-LABEL: f1_0_ne_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnh dummy@PLT
+; CHECK-NEXT:  .LBB6_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp ne i32 %cc, 2
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc == 3).
+define void @f1_0_eq_3(ptr %a) {
+; CHECK-LABEL: f1_0_eq_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgo dummy@PLT
+; CHECK-NEXT:  .LBB7_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp eq i32 %cc, 3
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc != 3).
+define void @f1_0_ne_3(ptr %a) {
+; CHECK-LABEL: f1_0_ne_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgno dummy@PLT
+; CHECK-NEXT:  .LBB8_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp ult i32 %cc, 3
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc == 0|1).
+define void @f1_0_01(ptr %a) {
+; CHECK-LABEL: f1_0_01:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgle dummy@PLT
+; CHECK-NEXT:  .LBB9_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp ult i32 %cc, 2
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc == 0|2).
+define void @f1_0_02(ptr %a) {
+; CHECK-LABEL: f1_0_02:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jghe dummy@PLT
+; CHECK-NEXT:  .LBB10_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc == 0|3).
+define void @f1_0_03(ptr %a) {
+; CHECK-LABEL: f1_0_03:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnlh dummy@PLT
+; CHECK-NEXT:  .LBB11_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp0 = icmp ne i32 %cc, 0
+  %cmp3 = icmp ne i32 %cc, 3
+  %cmp.inv = and i1 %cmp0, %cmp3
+  br i1 %cmp.inv, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc == 1|2).
+define void @f1_0_12(ptr %a) {
+; CHECK-LABEL: f1_0_12:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jglh dummy@PLT
+; CHECK-NEXT:  .LBB12_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmpeq1 = icmp eq i32 %cc, 1
+  %cmpeq2 = icmp eq i32 %cc, 2
+  %cmp = or i1 %cmpeq1, %cmpeq2
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc == 1|3).
+define void @f1_0_13(ptr %a) {
+; CHECK-LABEL: f1_0_13:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnhe dummy@PLT
+; CHECK-NEXT:  .LBB13_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmpeq1 = icmp eq i32 %cc, 1
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cmp = or i1 %cmpeq1, %cmpeq3
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check (cc == 2|3).
+define void @f1_0_23(ptr %a) {
+; CHECK-LABEL: f1_0_23:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnle dummy@PLT
+; CHECK-NEXT:  .LBB14_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmp = icmp ugt i32 %cc, 1
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Test-2(f1_1_*/f1_2_*/fl_3_*/f1_4_*).
+; Test Mixed patterns involving Binary Ops.
+
+; Check 'add' for (cc != 0).
+define void @f1_1_1(ptr %a) {
+; CHECK-LABEL: f1_1_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgne dummy@PLT
+; CHECK-NEXT:  .LBB15_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cmp = icmp ult i32 %add, 3
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check 'add' for (cc == 1|2).
+define void @f1_1_2(ptr %a) {
+; CHECK-LABEL: f1_1_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jglh dummy@PLT
+; CHECK-NEXT:  .LBB16_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cmp = icmp ult i32 %add, 2
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check 'add' for (cc == 1|2).
+define void @f1_1_3(ptr %a) {
+; CHECK-LABEL: f1_1_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jglh dummy@PLT
+; CHECK-NEXT:  .LBB17_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -3
+  %cmp.inv = icmp ult i32 %add, -2
+  br i1 %cmp.inv, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define void @f1_2_1(ptr %a) {
+; CHECK-LABEL: f1_2_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnl dummy@PLT
+; CHECK-NEXT:  .LBB18_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpne0 = icmp ne i32 %andcc, 0
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cmp.inv = and i1 %cmpne3, %cmpne0
+  br i1 %cmp.inv, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define void @f1_2_2(ptr %a) {
+; CHECK-LABEL: f1_2_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnh dummy@PLT
+; CHECK-NEXT:  .LBB19_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %ugt1 = icmp samesign ugt i32 %cc, 1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %and.cond.inv = and i1 %ugt1, %cmpne3
+  br i1 %and.cond.inv, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define void @f1_2_3(ptr %a) {
+; CHECK-LABEL: f1_2_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jghe dummy@PLT
+; CHECK-NEXT:  .LBB20_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check  'and/tm' for (cc == 1|3).
+define void @f1_2_4(ptr %a) {
+; CHECK-LABEL: f1_2_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnhe dummy@PLT
+; CHECK-NEXT:  .LBB21_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define void @f1_2_5(ptr %a) {
+; CHECK-LABEL: f1_2_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnl dummy@PLT
+; CHECK-NEXT:  .LBB22_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cmp = xor i1 %cmpne3, %trunc
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define void @f1_3_1(ptr %a) {
+; CHECK-LABEL: f1_3_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnl dummy@PLT
+; CHECK-NEXT:  .LBB23_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmpeq0 = icmp eq i32 %cc, 0
+  %cmpeq2 = icmp eq i32 %cc, 2
+  %xor = xor i1 %cmpeq0, %cmpeq2
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cmp.inv = xor i1 %cmpne3, %xor
+  br i1 %cmp.inv, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define void @f1_3_2(ptr %a) {
+; CHECK-LABEL: f1_3_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnl dummy@PLT
+; CHECK-NEXT:  .LBB24_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cmp.inv = xor i1 %cmpeq3, %trunc
+  br i1 %cmp.inv, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define void @f1_3_3(ptr %a) {
+; CHECK-LABEL: f1_3_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgnh dummy@PLT
+; CHECK-NEXT:  .LBB25_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne0 = icmp ne i32 %cc, 0
+  %cmp.cond.inv = xor i1 %cmpne0, %trunc
+  br i1 %cmp.cond.inv, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check 'or' with both operands are select_ccmask one with TM and other with
+; ICMP(cc == 1).
+define void @f1_4_1(ptr %a) {
+; CHECK-LABEL: f1_4_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgl dummy@PLT
+; CHECK-NEXT:  .LBB26_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpeq0 = icmp eq i32 %andcc, 0
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cmp.cond.inv = or i1 %cmpeq3, %cmpeq0
+  br i1 %cmp.cond.inv, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check 'or' for (cc == 0|1).
+define void @f1_4_2(ptr %a) {
+; CHECK-LABEL: f1_4_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgle dummy@PLT
+; CHECK-NEXT:  .LBB27_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cmp.inv = icmp samesign ugt i32 %or, -3
+  br i1 %cmp.inv, label %exit, label %branch
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
+; Check 'or' for (cc == 0|1).
+define void @f1_4_3(ptr %a) {
+; CHECK-LABEL: f1_4_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    jgle dummy@PLT
+; CHECK-NEXT:  .LBB28_1: # %exit
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cmp = icmp samesign ult i32 %or, -2
+  br i1 %cmp, label %branch, label %exit
+branch:
+  tail call void @dummy()
+  br label %exit
+exit:
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll
new file mode 100644
index 0000000..b9b9a4b
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll
@@ -0,0 +1,1665 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -O2 | FileCheck %s
+; Test implementation of combining select_ccmask for flag output operand and
+; optimizing ipm sequence using conditional branches.
+
+; Test-1(f2_0_*): Both TrueVal and FalseVal non-const(14-valid CCMask).
+
+; Check (cc == 0).
+define i64 @f2_0_eq_0(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_eq_0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    ber %r14
+; CHECK-NEXT:  .LBB0_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp eq i32 %cc, 0
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc != 0).
+define i64 @f2_0_ne_0(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_ne_0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bner %r14
+; CHECK-NEXT:  .LBB1_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp ugt i32 %cc, 0
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc == 1).
+define i64 @f2_0_eq_1(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_eq_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB2_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp eq i32 %cc, 1
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc != 1).
+define i64 @f2_0_ne_1(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_ne_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnlr %r14
+; CHECK-NEXT:  .LBB3_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp ne i32 %cc, 1
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc == 2).
+define i64 @f2_0_eq_2(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_eq_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bhr %r14
+; CHECK-NEXT:  .LBB4_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp eq i32 %cc, 2
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc != 2).
+define i64 @f2_0_ne_2(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_ne_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnhr %r14
+; CHECK-NEXT:  .LBB5_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp ne i32 %cc, 2
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc == 3).
+define i64 @f2_0_eq_3(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_eq_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bor %r14
+; CHECK-NEXT:  .LBB6_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp eq i32 %cc, 3
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc != 3).
+define i64 @f2_0_ne_3(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_ne_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnor %r14
+; CHECK-NEXT:  .LBB7_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp ult i32 %cc, 3
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc == 0|1).
+define i64 @f2_0_01(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_01:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bler %r14
+; CHECK-NEXT:  .LBB8_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp ult i32 %cc, 2
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc == 0|2).
+define i64 @f2_0_02(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_02:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB9_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %and = and i32 %cc, 1
+  %cond = icmp eq i32 %and, 0
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check (cc == 0|3).
+define i64 @f2_0_03(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_0_03:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blhr %r14
+; CHECK-NEXT:  .LBB10_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cmp0 = icmp ne i32 %cc, 0
+  %cmp3 = icmp ne i32 %cc, 3
+  %cond.inv = and i1 %cmp0, %cmp3
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check (cc == 1|2).
+define i64 @f2_0_12(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_0_12:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnlhr %r14
+; CHECK-NEXT:  .LBB11_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %add = add nsw i32 %cc, -3
+  %cond.inv = icmp ult i32 %add, -2
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check (cc == 1|3).
+define i64 @f2_0_13(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_0_13:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB12_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %and = and i32 %cc, 1
+  %cond.inv = icmp eq i32 %and, 0
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check (cc == 2|3).
+define i64 @f2_0_23(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_23:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnler %r14
+; CHECK-NEXT:  .LBB13_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %cond = icmp ugt i32 %cc, 1
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Test-2(f2_1_*/f2_2_*/f2_3_*/f2_4_*).
+; Both TrueVal and FalseVal are non-const with mixed patterns involving
+; Binary Ops.
+
+; Check 'add' for (cc != 0).
+define i64 @f2_1_1(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_1_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bner %r14
+; CHECK-NEXT:  .LBB14_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cond = icmp ult i32 %add, 3
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f2_1_2(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_1_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blhr %r14
+; CHECK-NEXT:  .LBB15_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cond = icmp ult i32 %add, 2
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f2_1_3(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_1_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnlhr %r14
+; CHECK-NEXT:  .LBB16_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -3
+  %cond.inv = icmp ult i32 %add, -2
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define i64 @f2_2_1(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_2_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB17_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpne0 = icmp ne i32 %andcc, 0
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = and i1 %cmpne3, %cmpne0
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define i64 @f2_2_2(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_2_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bhr %r14
+; CHECK-NEXT:  .LBB18_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %ugt1 = icmp samesign ugt i32 %cc, 1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = and i1 %ugt1, %cmpne3
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define i64 @f2_2_3(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_2_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB19_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cond = icmp eq i32 %and, 0
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Check  'and/tm' for (cc == 1|3).
+define i64 @f2_2_4(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_2_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB20_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cond.inv = icmp eq i32 %and, 0
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define i64 @f2_2_5(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_2_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnlr %r14
+; CHECK-NEXT:  .LBB21_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond = xor i1 %cmpne3, %trunc
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define i64 @f2_3_1(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_3_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB22_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmpeq0 = icmp eq i32 %cc, 0
+  %cmpeq2 = icmp eq i32 %cc, 2
+  %xor = xor i1 %cmpeq0, %cmpeq2
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = xor i1 %cmpne3, %xor
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define i64 @f2_3_2(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_3_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB23_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cond.inv = xor i1 %cmpeq3, %trunc
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define i64 @f2_3_3(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_3_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bhr %r14
+; CHECK-NEXT:  .LBB24_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne0 = icmp ne i32 %cc, 0
+  %cond.inv = xor i1 %cmpne0, %trunc
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1).
+define i64 @f2_4_1(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_4_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnlr %r14
+; CHECK-NEXT:  .LBB25_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpeq0 = icmp eq i32 %andcc, 0
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cond.inv = or i1 %cmpeq3, %cmpeq0
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f2_4_2(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_4_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnler %r14
+; CHECK-NEXT:  .LBB26_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cond.inv = icmp samesign ugt i32 %or, -3
+  %res = select i1 %cond.inv, i64 %y, i64 %x
+  ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f2_4_3(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_4_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bler %r14
+; CHECK-NEXT:  .LBB27_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cond = icmp samesign ult i32 %or, -2
+  %res = select i1 %cond, i64 %x, i64 %y
+  ret i64 %res
+}
+
+; Test-3(f3_1_*/f3_2_*/f3_3_*/f3_4_*).
+; TrueVal is non-const and FalseVal is const with mixed patterns involving
+; Binary Ops.
+
+; Check 'add' for (cc != 0).
+define i64 @f3_1_1(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_1_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bner %r14
+; CHECK-NEXT:  .LBB28_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cond = icmp ult i32 %add, 3
+  %res = select i1 %cond, i64 %x, i64 5
+  ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f3_1_2(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_1_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blhr %r14
+; CHECK-NEXT:  .LBB29_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cond = icmp ult i32 %add, 2
+  %res = select i1 %cond, i64 %x, i64 5
+  ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f3_1_3(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_1_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bnlhr %r14
+; CHECK-NEXT:  .LBB30_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -3
+  %cond.inv = icmp ult i32 %add, -2
+  %res = select i1 %cond.inv, i64 5, i64 %x
+  ret i64 %res
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define i64 @f3_2_1(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_2_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB31_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpne0 = icmp ne i32 %andcc, 0
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = and i1 %cmpne3, %cmpne0
+  %res = select i1 %cond.inv, i64 5, i64 %x
+  ret i64 %res
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define i64 @f3_2_2(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_2_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bhr %r14
+; CHECK-NEXT:  .LBB32_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %ugt1 = icmp samesign ugt i32 %cc, 1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = and i1 %ugt1, %cmpne3
+  %res = select i1 %cond.inv, i64 5, i64 %x
+  ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define i64 @f3_2_3(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_2_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB33_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cond = icmp eq i32 %and, 0
+  %res = select i1 %cond, i64 %x, i64 5
+  ret i64 %res
+}
+
+; Check  'and/tm' for (cc == 1|3).
+define i64 @f3_2_4(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_2_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB34_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cond.inv = icmp eq i32 %and, 0
+  %res = select i1 %cond.inv, i64 5, i64 %x
+  ret i64 %res
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define i64 @f3_2_5(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_2_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnlr %r14
+; CHECK-NEXT:  .LBB35_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond = xor i1 %cmpne3, %trunc
+  %res = select i1 %cond, i64 %x, i64 5
+  ret i64 %res
+}
+
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define i64 @f3_3_1(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_3_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB36_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmpeq0 = icmp eq i32 %cc, 0
+  %cmpeq2 = icmp eq i32 %cc, 2
+  %xor = xor i1 %cmpeq0, %cmpeq2
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = xor i1 %cmpne3, %xor
+  %res = select i1 %cond.inv, i64 5, i64 %x
+  ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define i64 @f3_3_2(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_3_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB37_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cond.inv = xor i1 %cmpeq3, %trunc
+  %res = select i1 %cond.inv, i64 5, i64 %x
+  ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define i64 @f3_3_3(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_3_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bhr %r14
+; CHECK-NEXT:  .LBB38_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne0 = icmp ne i32 %cc, 0
+  %cond.inv = xor i1 %cmpne0, %trunc
+  %res = select i1 %cond.inv, i64 5, i64 %x
+  ret i64 %res
+}
+
+; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1).
+define i64 @f3_4_1(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_4_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bnlr %r14
+; CHECK-NEXT:  .LBB39_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpeq0 = icmp eq i32 %andcc, 0
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cond.inv = or i1 %cmpeq3, %cmpeq0
+  %res = select i1 %cond.inv, i64 5, i64 %x
+  ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f3_4_2(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_4_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bnler %r14
+; CHECK-NEXT:  .LBB40_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cond.inv = icmp samesign ugt i32 %or, -3
+  %res = select i1 %cond.inv, i64 5, i64 %x
+  ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f3_4_3(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_4_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bler %r14
+; CHECK-NEXT:  .LBB41_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cond = icmp samesign ult i32 %or, -2
+  %res = select i1 %cond, i64 %x, i64 5
+  ret i64 %res
+}
+
+
+; Test-4(f4_1_*/f4_2_*/f4_3_*/f4_4_*).
+; TrueVal is const and FalseVal is non-const with mixed patterns involving
+; Binary Ops.
+
+; Check 'add' for (cc != 0).
+define i64 @f4_1_1(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_1_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    bner %r14
+; CHECK-NEXT:  .LBB42_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cond = icmp ult i32 %add, 3
+  %res = select i1 %cond, i64 15, i64 %y
+  ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f4_1_2(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_1_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    blhr %r14
+; CHECK-NEXT:  .LBB43_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cond = icmp ult i32 %add, 2
+  %res = select i1 %cond, i64 15, i64 %y
+  ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f4_1_3(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_1_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnlhr %r14
+; CHECK-NEXT:  .LBB44_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -3
+  %cond.inv = icmp ult i32 %add, -2
+  %res = select i1 %cond.inv, i64 %y, i64 15
+  ret i64 %res
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define i64 @f4_2_1(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_2_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB45_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpne0 = icmp ne i32 %andcc, 0
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = and i1 %cmpne3, %cmpne0
+  %res = select i1 %cond.inv, i64 %y, i64 15
+  ret i64 %res
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define i64 @f4_2_2(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_2_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bhr %r14
+; CHECK-NEXT:  .LBB46_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %ugt1 = icmp samesign ugt i32 %cc, 1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = and i1 %ugt1, %cmpne3
+  %res = select i1 %cond.inv, i64 %y, i64 15
+  ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define i64 @f4_2_3(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_2_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB47_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cond = icmp eq i32 %and, 0
+  %res = select i1 %cond, i64 15, i64 %y
+  ret i64 %res
+}
+
+; Check  'and/tm' for (cc == 1|3).
+define i64 @f4_2_4(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_2_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB48_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cond.inv = icmp eq i32 %and, 0
+  %res = select i1 %cond.inv, i64 %y, i64 15
+  ret i64 %res
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define i64 @f4_2_5(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_2_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    bnlr %r14
+; CHECK-NEXT:  .LBB49_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond = xor i1 %cmpne3, %trunc
+  %res = select i1 %cond, i64 15, i64 %y
+  ret i64 %res
+}
+
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define i64 @f4_3_1(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_3_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB50_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmpeq0 = icmp eq i32 %cc, 0
+  %cmpeq2 = icmp eq i32 %cc, 2
+  %xor = xor i1 %cmpeq0, %cmpeq2
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = xor i1 %cmpne3, %xor
+  %res = select i1 %cond.inv, i64 %y, i64 15
+  ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define i64 @f4_3_2(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_3_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB51_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cond.inv = xor i1 %cmpeq3, %trunc
+  %res = select i1 %cond.inv, i64 %y, i64 15
+  ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define i64 @f4_3_3(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_3_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bhr %r14
+; CHECK-NEXT:  .LBB52_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne0 = icmp ne i32 %cc, 0
+  %cond.inv = xor i1 %cmpne0, %trunc
+  %res = select i1 %cond.inv, i64 %y, i64 15
+  ret i64 %res
+}
+
+; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1).
+define i64 @f4_4_1(i64 %y,ptr %a) {
+; CHECK-LABEL: f4_4_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnlr %r14
+; CHECK-NEXT:  .LBB53_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpeq0 = icmp eq i32 %andcc, 0
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cond.inv = or i1 %cmpeq3, %cmpeq0
+  %res = select i1 %cond.inv, i64 %y, i64 15
+  ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f4_4_2(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_4_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bnler %r14
+; CHECK-NEXT:  .LBB54_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cond.inv = icmp samesign ugt i32 %or, -3
+  %res = select i1 %cond.inv, i64 %y, i64 15
+  ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f4_4_3(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_4_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    bler %r14
+; CHECK-NEXT:  .LBB55_1: # %entry
+; CHECK-NEXT:    lgr %r2, %r3
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cond = icmp samesign ult i32 %or, -2
+  %res = select i1 %cond, i64 15, i64 %y
+  ret i64 %res
+}
+
+; Test-5(f5_1_*/f5_2_*/f5_3_*/f5_4_*).
+; Both TrueVal and FalseVal are const with mixed patterns involving
+; Binary Ops.
+
+
+; Check 'add' for (cc != 0).
+define i64 @f5_1_1(ptr %a) {
+; CHECK-LABEL: f5_1_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    bner %r14
+; CHECK-NEXT:  .LBB56_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cond = icmp ult i32 %add, 3
+  %res = select i1 %cond, i64 15, i64 5
+  ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f5_1_2(ptr %a) {
+; CHECK-LABEL: f5_1_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    blhr %r14
+; CHECK-NEXT:  .LBB57_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -1
+  %cond = icmp ult i32 %add, 2
+  %res = select i1 %cond, i64 15, i64 5
+  ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f5_1_3(ptr %a) {
+; CHECK-LABEL: f5_1_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bnlhr %r14
+; CHECK-NEXT:  .LBB58_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %add = add nsw i32 %cc, -3
+  %cond.inv = icmp ult i32 %add, -2
+  %res = select i1 %cond.inv, i64 5, i64 15
+  ret i64 %res
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define i64 @f5_2_1(ptr %a) {
+; CHECK-LABEL: f5_2_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB59_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpne0 = icmp ne i32 %andcc, 0
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = and i1 %cmpne3, %cmpne0
+  %res = select i1 %cond.inv, i64 5, i64 15
+  ret i64 %res
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define i64 @f5_2_2(ptr %a) {
+; CHECK-LABEL: f5_2_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bhr %r14
+; CHECK-NEXT:  .LBB60_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %ugt1 = icmp samesign ugt i32 %cc, 1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = and i1 %ugt1, %cmpne3
+  %res = select i1 %cond.inv, i64 5, i64 15
+  ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define i64 @f5_2_3(ptr %a) {
+; CHECK-LABEL: f5_2_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB61_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cond = icmp eq i32 %and, 0
+  %res = select i1 %cond, i64 15, i64 5
+  ret i64 %res
+}
+
+; Check  'and/tm' for (cc == 1|3).
+define i64 @f5_2_4(ptr %a) {
+; CHECK-LABEL: f5_2_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB62_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %and = and i32 %cc, 1
+  %cond.inv = icmp eq i32 %and, 0
+  %res = select i1 %cond.inv, i64 5, i64 15
+  ret i64 %res
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define i64 @f5_2_5(ptr %a) {
+; CHECK-LABEL: f5_2_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    bnlr %r14
+; CHECK-NEXT:  .LBB63_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond = xor i1 %cmpne3, %trunc
+  %res = select i1 %cond, i64 15, i64 5
+  ret i64 %res
+}
+
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define i64 @f5_3_1(ptr %a) {
+; CHECK-LABEL: f5_3_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB64_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %cmpeq0 = icmp eq i32 %cc, 0
+  %cmpeq2 = icmp eq i32 %cc, 2
+  %xor = xor i1 %cmpeq0, %cmpeq2
+  %cmpne3 = icmp ne i32 %cc, 3
+  %cond.inv = xor i1 %cmpne3, %xor
+  %res = select i1 %cond.inv, i64 5, i64 15
+  ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define i64 @f5_3_2(ptr %a) {
+; CHECK-LABEL: f5_3_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    blr %r14
+; CHECK-NEXT:  .LBB65_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cond.inv = xor i1 %cmpeq3, %trunc
+  %res = select i1 %cond.inv, i64 5, i64 15
+  ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define i64 @f5_3_3(ptr %a) {
+; CHECK-LABEL: f5_3_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bhr %r14
+; CHECK-NEXT:  .LBB66_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %trunc =  trunc i32 %cc to i1
+  %cmpne0 = icmp ne i32 %cc, 0
+  %cond.inv = xor i1 %cmpne0, %trunc
+  %res = select i1 %cond.inv, i64 5, i64 15
+  ret i64 %res
+}
+
+; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1).
+define i64 @f5_4_1(ptr %a) {
+; CHECK-LABEL: f5_4_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bnlr %r14
+; CHECK-NEXT:  .LBB67_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %andcc =  and i32 %cc, 1
+  %cmpeq0 = icmp eq i32 %andcc, 0
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %cond.inv = or i1 %cmpeq3, %cmpeq0
+  %res = select i1 %cond.inv, i64 5, i64 15
+  ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f5_4_2(ptr %a) {
+; CHECK-LABEL: f5_4_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    bnler %r14
+; CHECK-NEXT:  .LBB68_1: # %entry
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cond.inv = icmp samesign ugt i32 %or, -3
+  %res = select i1 %cond.inv, i64 5, i64 15
+  ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f5_4_3(ptr %a) {
+; CHECK-LABEL: f5_4_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    bler %r14
+; CHECK-NEXT:  .LBB69_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %tmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %tmp)
+  %or = or disjoint i32 %cc, -4
+  %cond = icmp samesign ult i32 %or, -2
+  %res = select i1 %cond, i64 15, i64 5
+  ret i64 %res
+}
+
+; Nested select_ccmask with TrueVal and FalseVal swapped with each other.
+define i64 @f6_1(ptr %a) {
+; CHECK-LABEL: f6_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    lghi %r2, 15
+; CHECK-NEXT:    bher %r14
+; CHECK-NEXT:  .LBB70_1: # %entry
+; CHECK-NEXT:    lghi %r2, 5
+; CHECK-NEXT:    br %r14
+entry:
+  %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr  elementtype(i32) %a,  ptr elementtype(i32) %a)
+  %cmp = icmp ult i32 %cc, 4
+  tail call void @llvm.assume(i1 %cmp)
+  %andcc = and i32 %cc, 1
+  %cmpeq0 = icmp eq i32 %andcc, 0
+  %cmpeq3 = icmp eq i32 %cc, 3
+  %select = select i1 %cmpeq3, i64 5, i64 15
+  %res = select i1 %cmpeq0, i64 %select, i64 5
+  ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
new file mode 100644
index 0000000..abbd953
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=mvp -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s
+
+; This test ensures that loads and stores generated for small memcpy et al use
+; constant offset folding.
+
+
+target triple = "wasm32-unknown-unknown"
+
+define void @call_memset(ptr) #0 {
+; CHECK-LABEL: call_memset:
+; CHECK:         .functype call_memset (i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    i64.const $push0=, 0
+; CHECK-NEXT:    i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT:    i64.const $push1=, 0
+; CHECK-NEXT:    i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT:    # fallthrough-return
+    call void @llvm.memset.p0.i32(ptr align 1 %0, i8 0, i32 16, i1 false)
+    ret void
+}
+
+define void @call_memcpy(ptr %dst, ptr %src) #0 {
+; CHECK-LABEL: call_memcpy:
+; CHECK:         .functype call_memcpy (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    i64.load $push0=, 8($1):p2align=0
+; CHECK-NEXT:    i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT:    i64.load $push1=, 0($1):p2align=0
+; CHECK-NEXT:    i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT:    # fallthrough-return
+    call void @llvm.memcpy.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false)
+    ret void
+}
+
+
+define void @call_memmove(ptr %dst, ptr %src) #0 {
+; CHECK-LABEL: call_memmove:
+; CHECK:         .functype call_memmove (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    i64.load $2=, 0($1):p2align=0
+; CHECK-NEXT:    i64.load $push0=, 8($1):p2align=0
+; CHECK-NEXT:    i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT:    i64.store 0($0):p2align=0, $2
+; CHECK-NEXT:    # fallthrough-return
+    call void @llvm.memmove.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false)
+    ret void
+}
diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index 28b4541..7bdc4e1 100644
--- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
 ; CHECK-NEXT:    callq __ubyte_convert_to_ctype
 ; CHECK-NEXT:    testl %eax, %eax
-; CHECK-NEXT:    js LBB0_6
+; CHECK-NEXT:    js LBB0_4
 ; CHECK-NEXT:  ## %bb.1: ## %cond_next.i
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
 ; CHECK-NEXT:    movq %rbx, %rdi
@@ -53,84 +53,81 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sarl $31, %ecx
 ; CHECK-NEXT:    andl %eax, %ecx
 ; CHECK-NEXT:    cmpl $-2, %ecx
-; CHECK-NEXT:    je LBB0_10
+; CHECK-NEXT:    je LBB0_8
 ; CHECK-NEXT:  ## %bb.2: ## %cond_next.i
 ; CHECK-NEXT:    cmpl $-1, %ecx
-; CHECK-NEXT:    jne LBB0_3
-; CHECK-NEXT:  LBB0_8: ## %bb4
+; CHECK-NEXT:    jne LBB0_6
+; CHECK-NEXT:  LBB0_3: ## %bb4
 ; CHECK-NEXT:    movq _PyArray_API@GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movq (%rax), %rax
 ; CHECK-NEXT:    movq 16(%rax), %rax
-; CHECK-NEXT:    jmp LBB0_9
-; CHECK-NEXT:  LBB0_6: ## %_ubyte_convert2_to_ctypes.exit
+; CHECK-NEXT:    jmp LBB0_10
+; CHECK-NEXT:  LBB0_4: ## %_ubyte_convert2_to_ctypes.exit
 ; CHECK-NEXT:    cmpl $-2, %eax
-; CHECK-NEXT:    je LBB0_10
-; CHECK-NEXT:  ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit
-; CHECK-NEXT:    cmpl $-1, %eax
 ; CHECK-NEXT:    je LBB0_8
-; CHECK-NEXT:  LBB0_3: ## %bb35
+; CHECK-NEXT:  ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit
+; CHECK-NEXT:    cmpl $-1, %eax
+; CHECK-NEXT:    je LBB0_3
+; CHECK-NEXT:  LBB0_6: ## %bb35
 ; CHECK-NEXT:    movq _PyUFunc_API@GOTPCREL(%rip), %r14
 ; CHECK-NEXT:    movq (%r14), %rax
 ; CHECK-NEXT:    callq *216(%rax)
 ; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    je LBB0_4
-; CHECK-NEXT:  ## %bb.12: ## %cond_false.i
-; CHECK-NEXT:    setne %dil
+; CHECK-NEXT:    je LBB0_11
+; CHECK-NEXT:  ## %bb.7: ## %cond_false.i
 ; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
 ; CHECK-NEXT:    movzbl %sil, %ecx
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    divb %dl
 ; CHECK-NEXT:    movl %eax, %r15d
 ; CHECK-NEXT:    testb %cl, %cl
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    testb %dil, %al
-; CHECK-NEXT:    jne LBB0_5
-; CHECK-NEXT:  LBB0_13: ## %cond_true.i200
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    jne LBB0_15
-; CHECK-NEXT:  ## %bb.14: ## %cond_true14.i
-; CHECK-NEXT:    movl $4, %edi
-; CHECK-NEXT:    callq _feraiseexcept
-; CHECK-NEXT:  LBB0_15: ## %ubyte_ctype_remainder.exit
-; CHECK-NEXT:    xorl %ebx, %ebx
-; CHECK-NEXT:    jmp LBB0_16
-; CHECK-NEXT:  LBB0_10: ## %bb17
+; CHECK-NEXT:    jne LBB0_12
+; CHECK-NEXT:    jmp LBB0_14
+; CHECK-NEXT:  LBB0_8: ## %bb17
 ; CHECK-NEXT:    callq _PyErr_Occurred
 ; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    jne LBB0_23
-; CHECK-NEXT:  ## %bb.11: ## %cond_next
+; CHECK-NEXT:    jne LBB0_27
+; CHECK-NEXT:  ## %bb.9: ## %cond_next
 ; CHECK-NEXT:    movq _PyArray_API@GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movq (%rax), %rax
 ; CHECK-NEXT:    movq 80(%rax), %rax
-; CHECK-NEXT:  LBB0_9: ## %bb4
+; CHECK-NEXT:  LBB0_10: ## %bb4
 ; CHECK-NEXT:    movq 96(%rax), %rax
 ; CHECK-NEXT:    movq %r14, %rdi
 ; CHECK-NEXT:    movq %rbx, %rsi
 ; CHECK-NEXT:    callq *40(%rax)
-; CHECK-NEXT:    jmp LBB0_24
-; CHECK-NEXT:  LBB0_4: ## %cond_true.i
+; CHECK-NEXT:    jmp LBB0_28
+; CHECK-NEXT:  LBB0_11: ## %cond_true.i
 ; CHECK-NEXT:    movl $4, %edi
 ; CHECK-NEXT:    callq _feraiseexcept
 ; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
 ; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-NEXT:    xorl %r15d, %r15d
 ; CHECK-NEXT:    testb %sil, %sil
-; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    je LBB0_14
+; CHECK-NEXT:  LBB0_12: ## %cond_false.i
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    sete %cl
-; CHECK-NEXT:    xorl %r15d, %r15d
-; CHECK-NEXT:    orb %al, %cl
-; CHECK-NEXT:    jne LBB0_13
-; CHECK-NEXT:  LBB0_5: ## %cond_next17.i
+; CHECK-NEXT:    je LBB0_14
+; CHECK-NEXT:  ## %bb.13: ## %cond_next17.i
 ; CHECK-NEXT:    movzbl %sil, %eax
 ; CHECK-NEXT:    divb %dl
 ; CHECK-NEXT:    movzbl %ah, %ebx
-; CHECK-NEXT:  LBB0_16: ## %ubyte_ctype_remainder.exit
+; CHECK-NEXT:    jmp LBB0_18
+; CHECK-NEXT:  LBB0_14: ## %cond_true.i200
+; CHECK-NEXT:    testb %dl, %dl
+; CHECK-NEXT:    jne LBB0_17
+; CHECK-NEXT:  ## %bb.16: ## %cond_true14.i
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    callq _feraiseexcept
+; CHECK-NEXT:  LBB0_17: ## %ubyte_ctype_remainder.exit
+; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:  LBB0_18: ## %ubyte_ctype_remainder.exit
 ; CHECK-NEXT:    movq (%r14), %rax
 ; CHECK-NEXT:    callq *224(%rax)
 ; CHECK-NEXT:    testl %eax, %eax
-; CHECK-NEXT:    je LBB0_19
-; CHECK-NEXT:  ## %bb.17: ## %cond_true61
+; CHECK-NEXT:    je LBB0_21
+; CHECK-NEXT:  ## %bb.19: ## %cond_true61
 ; CHECK-NEXT:    movl %eax, %ebp
 ; CHECK-NEXT:    movq (%r14), %rax
 ; CHECK-NEXT:    movq _.str5@GOTPCREL(%rip), %rdi
@@ -139,8 +136,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    callq *200(%rax)
 ; CHECK-NEXT:    testl %eax, %eax
-; CHECK-NEXT:    js LBB0_23
-; CHECK-NEXT:  ## %bb.18: ## %cond_next73
+; CHECK-NEXT:    js LBB0_27
+; CHECK-NEXT:  ## %bb.20: ## %cond_next73
 ; CHECK-NEXT:    movl $1, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq (%r14), %rax
 ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
@@ -149,13 +146,13 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    movl %ebp, %edx
 ; CHECK-NEXT:    callq *232(%rax)
 ; CHECK-NEXT:    testl %eax, %eax
-; CHECK-NEXT:    jne LBB0_23
-; CHECK-NEXT:  LBB0_19: ## %cond_next89
+; CHECK-NEXT:    jne LBB0_27
+; CHECK-NEXT:  LBB0_21: ## %cond_next89
 ; CHECK-NEXT:    movl $2, %edi
 ; CHECK-NEXT:    callq _PyTuple_New
 ; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    je LBB0_23
-; CHECK-NEXT:  ## %bb.20: ## %cond_next97
+; CHECK-NEXT:    je LBB0_27
+; CHECK-NEXT:  ## %bb.22: ## %cond_next97
 ; CHECK-NEXT:    movq %rax, %r14
 ; CHECK-NEXT:    movq _PyArray_API@GOTPCREL(%rip), %r12
 ; CHECK-NEXT:    movq (%r12), %rax
@@ -163,8 +160,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    callq *304(%rdi)
 ; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    je LBB0_21
-; CHECK-NEXT:  ## %bb.25: ## %cond_next135
+; CHECK-NEXT:    je LBB0_25
+; CHECK-NEXT:  ## %bb.23: ## %cond_next135
 ; CHECK-NEXT:    movb %r15b, 16(%rax)
 ; CHECK-NEXT:    movq %rax, 24(%r14)
 ; CHECK-NEXT:    movq (%r12), %rax
@@ -172,22 +169,22 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    callq *304(%rdi)
 ; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    je LBB0_21
-; CHECK-NEXT:  ## %bb.26: ## %cond_next182
+; CHECK-NEXT:    je LBB0_25
+; CHECK-NEXT:  ## %bb.24: ## %cond_next182
 ; CHECK-NEXT:    movb %bl, 16(%rax)
 ; CHECK-NEXT:    movq %rax, 32(%r14)
 ; CHECK-NEXT:    movq %r14, %rax
-; CHECK-NEXT:    jmp LBB0_24
-; CHECK-NEXT:  LBB0_21: ## %cond_true113
+; CHECK-NEXT:    jmp LBB0_28
+; CHECK-NEXT:  LBB0_25: ## %cond_true113
 ; CHECK-NEXT:    decq (%r14)
-; CHECK-NEXT:    jne LBB0_23
-; CHECK-NEXT:  ## %bb.22: ## %cond_true126
+; CHECK-NEXT:    jne LBB0_27
+; CHECK-NEXT:  ## %bb.26: ## %cond_true126
 ; CHECK-NEXT:    movq 8(%r14), %rax
 ; CHECK-NEXT:    movq %r14, %rdi
 ; CHECK-NEXT:    callq *48(%rax)
-; CHECK-NEXT:  LBB0_23: ## %UnifiedReturnBlock
+; CHECK-NEXT:  LBB0_27: ## %UnifiedReturnBlock
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:  LBB0_24: ## %UnifiedReturnBlock
+; CHECK-NEXT:  LBB0_28: ## %UnifiedReturnBlock
 ; CHECK-NEXT:    addq $32, %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r12
diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll
index df04b67..c2b7068 100644
--- a/llvm/test/CodeGen/X86/isel-fpclass.ll
+++ b/llvm/test/CodeGen/X86/isel-fpclass.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86,X86-SDAGISEL
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL
 ; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1  | FileCheck %s -check-prefixes=X86-FASTISEL
 ; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1  | FileCheck %s -check-prefixes=X64,X64-FASTISEL
-; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=2  | FileCheck %s -check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=2  | FileCheck %s -check-prefixes=X64,X64-GISEL
+; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=1  | FileCheck %s -check-prefixes=X86,X86-GISEL
+; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=1  | FileCheck %s -check-prefixes=X64-GISEL
 
 define i1 @isnone_f(float %x) nounwind {
 ; X86-LABEL: isnone_f:
@@ -23,6 +23,11 @@ define i1 @isnone_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    fstp %st(0)
 ; X86-FASTISEL-NEXT:    xorl %eax, %eax
 ; X86-FASTISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: isnone_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    xorl %eax, %eax
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0)
   ret i1 %0
@@ -45,22 +50,27 @@ define i1 @isany_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    fstp %st(0)
 ; X86-FASTISEL-NEXT:    movb $1, %al
 ; X86-FASTISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: isany_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    movb $1, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023)
   ret i1 %0
 }
 
 define i1 @issignaling_f(float %x) nounwind {
-; X86-LABEL: issignaling_f:
-; X86:       # %bb.0:
-; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
-; X86-NEXT:    setl %cl
-; X86-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
-; X86-NEXT:    setge %al
-; X86-NEXT:    andb %cl, %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: issignaling_f:
+; X86-SDAGISEL:       # %bb.0:
+; X86-SDAGISEL-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT:    setl %cl
+; X86-SDAGISEL-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-SDAGISEL-NEXT:    setge %al
+; X86-SDAGISEL-NEXT:    andb %cl, %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: issignaling_f:
 ; X64:       # %bb.0:
@@ -87,18 +97,44 @@ define i1 @issignaling_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    andb %cl, %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: issignaling_f:
+; X86-GISEL:       # %bb.0:
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    seta %dl
+; X86-GISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT:    setb %al
+; X86-GISEL-NEXT:    andb %dl, %al
+; X86-GISEL-NEXT:    orb %cl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: issignaling_f:
+; X64-GISEL:       # %bb.0:
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    seta %dl
+; X64-GISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GISEL-NEXT:    setb %al
+; X64-GISEL-NEXT:    andb %dl, %al
+; X64-GISEL-NEXT:    orb %cl, %al
+; X64-GISEL-NEXT:    retq
    %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1)  ; "snan"
    ret i1 %a0
 }
 
  define i1 @isquiet_f(float %x) nounwind {
-; X86-LABEL: isquiet_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
-; X86-NEXT:    setge %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: isquiet_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT:    setge %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: isquiet_f:
 ; X64:       # %bb.0: # %entry
@@ -119,19 +155,39 @@ define i1 @issignaling_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    setge %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: isquiet_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT:    setae %al
+; X86-GISEL-NEXT:    orb %cl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: isquiet_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GISEL-NEXT:    setae %al
+; X64-GISEL-NEXT:    orb %cl, %al
+; X64-GISEL-NEXT:    retq
  entry:
    %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2)  ; "qnan"
    ret i1 %0
 }
 
 define i1 @not_isquiet_f(float %x) nounwind {
-; X86-LABEL: not_isquiet_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
-; X86-NEXT:    setl %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: not_isquiet_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT:    setl %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: not_isquiet_f:
 ; X64:       # %bb.0: # %entry
@@ -152,19 +208,57 @@ define i1 @not_isquiet_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    setl %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: not_isquiet_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    setb %dl
+; X86-GISEL-NEXT:    orb %cl, %dl
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    sete %cl
+; X86-GISEL-NEXT:    orb %dl, %cl
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    seta %dl
+; X86-GISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT:    setb %al
+; X86-GISEL-NEXT:    andb %dl, %al
+; X86-GISEL-NEXT:    orb %cl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: not_isquiet_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    setb %dl
+; X64-GISEL-NEXT:    orb %cl, %dl
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    sete %cl
+; X64-GISEL-NEXT:    orb %dl, %cl
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    seta %dl
+; X64-GISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GISEL-NEXT:    setb %al
+; X64-GISEL-NEXT:    andb %dl, %al
+; X64-GISEL-NEXT:    orb %cl, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021)  ; ~"qnan"
   ret i1 %0
 }
 
 define i1 @isinf_f(float %x) nounwind {
-; X86-LABEL: isinf_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT:    sete %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: isinf_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT:    sete %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: isinf_f:
 ; X64:       # %bb.0: # %entry
@@ -185,19 +279,39 @@ define i1 @isinf_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    sete %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: isinf_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    sete %al
+; X86-GISEL-NEXT:    orb %cl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: isinf_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    sete %al
+; X64-GISEL-NEXT:    orb %cl, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516)  ; 0x204 = "inf"
   ret i1 %0
 }
 
 define i1 @not_isinf_f(float %x) nounwind {
-; X86-LABEL: not_isinf_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT:    setne %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: not_isinf_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT:    setne %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: not_isinf_f:
 ; X64:       # %bb.0: # %entry
@@ -218,17 +332,43 @@ define i1 @not_isinf_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    setne %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: not_isinf_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    setb %dl
+; X86-GISEL-NEXT:    orb %cl, %dl
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    seta %al
+; X86-GISEL-NEXT:    orb %dl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: not_isinf_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    setb %dl
+; X64-GISEL-NEXT:    orb %cl, %dl
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    seta %al
+; X64-GISEL-NEXT:    orb %dl, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507)  ; ~0x204 = "~inf"
   ret i1 %0
 }
 
 define i1 @is_plus_inf_f(float %x) nounwind {
-; X86-LABEL: is_plus_inf_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
-; X86-NEXT:    sete %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: is_plus_inf_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT:    sete %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: is_plus_inf_f:
 ; X64:       # %bb.0: # %entry
@@ -246,17 +386,34 @@ define i1 @is_plus_inf_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    sete %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: is_plus_inf_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-GISEL-NEXT:    sete %al
+; X86-GISEL-NEXT:    orb %cl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: is_plus_inf_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    sete %al
+; X64-GISEL-NEXT:    orb %cl, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512)  ; 0x200 = "+inf"
   ret i1 %0
 }
 
 define i1 @is_minus_inf_f(float %x) nounwind {
-; X86-LABEL: is_minus_inf_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
-; X86-NEXT:    sete %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: is_minus_inf_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT:    sete %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: is_minus_inf_f:
 ; X64:       # %bb.0: # %entry
@@ -274,17 +431,34 @@ define i1 @is_minus_inf_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    sete %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: is_minus_inf_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-GISEL-NEXT:    sete %al
+; X86-GISEL-NEXT:    orb %cl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: is_minus_inf_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-GISEL-NEXT:    sete %al
+; X64-GISEL-NEXT:    orb %cl, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4)  ; "-inf"
   ret i1 %0
 }
 
 define i1 @not_is_minus_inf_f(float %x) nounwind {
-; X86-LABEL: not_is_minus_inf_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
-; X86-NEXT:    setne %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: not_is_minus_inf_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT:    setne %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: not_is_minus_inf_f:
 ; X64:       # %bb.0: # %entry
@@ -302,19 +476,55 @@ define i1 @not_is_minus_inf_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    setne %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: not_is_minus_inf_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    pushl %ebx
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    movl %eax, %ecx
+; X86-GISEL-NEXT:    andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    xorl %edx, %edx
+; X86-GISEL-NEXT:    cmpl $2139095040, %ecx # imm = 0x7F800000
+; X86-GISEL-NEXT:    setb %bl
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    sete %ah
+; X86-GISEL-NEXT:    orb %dl, %ah
+; X86-GISEL-NEXT:    orb %bl, %ah
+; X86-GISEL-NEXT:    cmpl $2139095040, %ecx # imm = 0x7F800000
+; X86-GISEL-NEXT:    seta %al
+; X86-GISEL-NEXT:    orb %ah, %al
+; X86-GISEL-NEXT:    popl %ebx
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: not_is_minus_inf_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    movl %eax, %ecx
+; X64-GISEL-NEXT:    andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT:    xorl %edx, %edx
+; X64-GISEL-NEXT:    cmpl $2139095040, %ecx # imm = 0x7F800000
+; X64-GISEL-NEXT:    setb %sil
+; X64-GISEL-NEXT:    orb %dl, %sil
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    sete %dl
+; X64-GISEL-NEXT:    cmpl $2139095040, %ecx # imm = 0x7F800000
+; X64-GISEL-NEXT:    seta %al
+; X64-GISEL-NEXT:    orb %dl, %al
+; X64-GISEL-NEXT:    orb %sil, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019)  ; ~"-inf"
   ret i1 %0
 }
 
 define i1 @isfinite_f(float %x) nounwind {
-; X86-LABEL: isfinite_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT:    setl %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: isfinite_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT:    setl %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: isfinite_f:
 ; X64:       # %bb.0: # %entry
@@ -335,19 +545,39 @@ define i1 @isfinite_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    setl %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: isfinite_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    setb %al
+; X86-GISEL-NEXT:    orb %cl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: isfinite_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    setb %al
+; X64-GISEL-NEXT:    orb %cl, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504)  ; 0x1f8 = "finite"
   ret i1 %0
 }
 
 define i1 @not_isfinite_f(float %x) nounwind {
-; X86-LABEL: not_isfinite_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT:    setge %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: not_isfinite_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT:    setge %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: not_isfinite_f:
 ; X64:       # %bb.0: # %entry
@@ -368,17 +598,43 @@ define i1 @not_isfinite_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    setge %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: not_isfinite_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    sete %dl
+; X86-GISEL-NEXT:    orb %cl, %dl
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    seta %al
+; X86-GISEL-NEXT:    orb %dl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: not_isfinite_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    sete %dl
+; X64-GISEL-NEXT:    orb %cl, %dl
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    seta %al
+; X64-GISEL-NEXT:    orb %dl, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519)  ; ~0x1f8 = "~finite"
   ret i1 %0
 }
 
 define i1 @is_plus_finite_f(float %x) nounwind {
-; X86-LABEL: is_plus_finite_f:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
-; X86-NEXT:    setb %al
-; X86-NEXT:    retl
+; X86-SDAGISEL-LABEL: is_plus_finite_f:
+; X86-SDAGISEL:       # %bb.0: # %entry
+; X86-SDAGISEL-NEXT:    cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT:    setb %al
+; X86-SDAGISEL-NEXT:    retl
 ;
 ; X64-LABEL: is_plus_finite_f:
 ; X64:       # %bb.0: # %entry
@@ -396,6 +652,23 @@ define i1 @is_plus_finite_f(float %x) nounwind {
 ; X86-FASTISEL-NEXT:    setb %al
 ; X86-FASTISEL-NEXT:    popl %ecx
 ; X86-FASTISEL-NEXT:    retl
+;
+; X86-GISEL-LABEL: is_plus_finite_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    xorl %ecx, %ecx
+; X86-GISEL-NEXT:    cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-GISEL-NEXT:    setb %al
+; X86-GISEL-NEXT:    orb %cl, %al
+; X86-GISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: is_plus_finite_f:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    xorl %ecx, %ecx
+; X64-GISEL-NEXT:    movd %xmm0, %eax
+; X64-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT:    setb %al
+; X64-GISEL-NEXT:    orb %cl, %al
+; X64-GISEL-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448)  ; 0x1c0 = "+finite"
   ret i1 %0
@@ -418,6 +691,11 @@ define i1 @isnone_d(double %x) nounwind {
 ; X86-FASTISEL-NEXT:    fstp %st(0)
 ; X86-FASTISEL-NEXT:    xorl %eax, %eax
 ; X86-FASTISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: isnone_d:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    xorl %eax, %eax
+; X64-GISEL-NEXT:    retq
 entry:
     %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0)
     ret i1 %0
@@ -440,6 +718,11 @@ define i1 @isany_d(double %x) nounwind {
 ; X86-FASTISEL-NEXT:    fstp %st(0)
 ; X86-FASTISEL-NEXT:    movb $1, %al
 ; X86-FASTISEL-NEXT:    retl
+;
+; X64-GISEL-LABEL: isany_d:
+; X64-GISEL:       # %bb.0: # %entry
+; X64-GISEL-NEXT:    movb $1, %al
+; X64-GISEL-NEXT:    retq
 entry:
     %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023)
     ret i1 %0
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 4cde581..cdf6bdd 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4765,6 +4765,33 @@ define void @scaleidx_scatter_outofrange(<8 x float> %value, ptr %base, <8 x i32
 }
 declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>)
 
+define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) {
+; X64-LABEL: pr163023:
+; X64:       # %bb.0:
+; X64-NEXT:    kxnorw %k0, %k0, %k1
+; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vpgatherdd (%rdi,%zmm0), %zmm1 {%k1}
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: pr163023:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kxnorw %k0, %k0, %k1
+; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; X86-NEXT:    vpgatherdd (%eax,%zmm0), %zmm1 {%k1}
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0
+; X86-NEXT:    retl
+  %addr.p = ptrtoint ptr %a0 to i64
+  %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0
+  %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer
+  %ofs = sext <16 x i32> %a1 to <16 x i64>
+  %addr = add nuw <16 x i64> %addr.splat, %ofs
+  %ptr = inttoptr <16 x i64> %addr to <16 x ptr>
+  %gather = tail call fastcc <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison)
+  ret <16 x i32> %gather
+}
+
 ;
 ; PR45906
 ; This used to cause fast-isel to generate bad copy instructions that would
diff --git a/llvm/test/CodeGen/X86/pr160612.ll b/llvm/test/CodeGen/X86/pr160612.ll
new file mode 100644
index 0000000..6572c42
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr160612.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s
+
+; Test for issue #160612: OR conditions in branches should use multiple branches
+; instead of materializing booleans with SETCC when no special optimizations apply.
+
+declare void @subroutine_foo()
+declare void @subroutine_bar()
+
+; Original issue: (x == 0 || y == 0) was generating SETCC + TEST + BRANCH
+; instead of using two conditional branches directly.
+define void @func_a(i32 noundef %x, i32 noundef %y) {
+; CHECK-LABEL: func_a:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    je subroutine_foo@PLT # TAILCALL
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    testl %esi, %esi
+; CHECK-NEXT:    jne subroutine_bar@PLT # TAILCALL
+; CHECK-NEXT:  # %bb.2: # %if.then
+; CHECK-NEXT:    jmp subroutine_foo@PLT # TAILCALL
+entry:
+  %cmp = icmp eq i32 %x, 0
+  %cmp1 = icmp eq i32 %y, 0
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.else
+
+if.then:
+  tail call void @subroutine_foo()
+  br label %if.end
+
+if.else:
+  tail call void @subroutine_bar()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; Reference implementation that already generated optimal code.
+; This should continue to generate the same optimal code.
+define void @func_b(i32 noundef %x, i32 noundef %y) {
+; CHECK-LABEL: func_b:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    je subroutine_foo@PLT # TAILCALL
+; CHECK-NEXT:  # %bb.1: # %if.else
+; CHECK-NEXT:    testl %esi, %esi
+; CHECK-NEXT:    je subroutine_foo@PLT # TAILCALL
+; CHECK-NEXT:  # %bb.2: # %if.else3
+; CHECK-NEXT:    jmp subroutine_bar@PLT # TAILCALL
+entry:
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @subroutine_foo()
+  br label %if.end4
+
+if.else:
+  %cmp1 = icmp eq i32 %y, 0
+  br i1 %cmp1, label %if.then2, label %if.else3
+
+if.then2:
+  tail call void @subroutine_foo()
+  br label %if.end4
+
+if.else3:
+  tail call void @subroutine_bar()
+  br label %if.end4
+
+if.end4:
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll
index 69abf6e..d018c53 100644
--- a/llvm/test/CodeGen/X86/setcc-wide-types.ll
+++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll
@@ -1493,15 +1493,23 @@ define i1 @allbits_i128_load_arg(ptr %w) {
 }
 
 define i1 @anybits_i256_load_arg(ptr %w) {
-; ANY-LABEL: anybits_i256_load_arg:
-; ANY:       # %bb.0:
-; ANY-NEXT:    movq (%rdi), %rax
-; ANY-NEXT:    movq 8(%rdi), %rcx
-; ANY-NEXT:    orq 24(%rdi), %rcx
-; ANY-NEXT:    orq 16(%rdi), %rax
-; ANY-NEXT:    orq %rcx, %rax
-; ANY-NEXT:    setne %al
-; ANY-NEXT:    retq
+; SSE-LABEL: anybits_i256_load_arg:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq (%rdi), %rax
+; SSE-NEXT:    movq 8(%rdi), %rcx
+; SSE-NEXT:    orq 24(%rdi), %rcx
+; SSE-NEXT:    orq 16(%rdi), %rax
+; SSE-NEXT:    orq %rcx, %rax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVXANY-LABEL: anybits_i256_load_arg:
+; AVXANY:       # %bb.0:
+; AVXANY-NEXT:    vmovdqu (%rdi), %ymm0
+; AVXANY-NEXT:    vptest %ymm0, %ymm0
+; AVXANY-NEXT:    setne %al
+; AVXANY-NEXT:    vzeroupper
+; AVXANY-NEXT:    retq
   %ld = load i256, ptr %w
   %cmp = icmp ne i256 %ld, 0
   ret i1 %cmp
@@ -1552,21 +1560,30 @@ define i1 @allbits_i256_load_arg(ptr %w) {
 }
 
 define i1 @anybits_i512_load_arg(ptr %w) {
-; ANY-LABEL: anybits_i512_load_arg:
-; ANY:       # %bb.0:
-; ANY-NEXT:    movq 16(%rdi), %rax
-; ANY-NEXT:    movq (%rdi), %rcx
-; ANY-NEXT:    movq 8(%rdi), %rdx
-; ANY-NEXT:    movq 24(%rdi), %rsi
-; ANY-NEXT:    orq 56(%rdi), %rsi
-; ANY-NEXT:    orq 40(%rdi), %rdx
-; ANY-NEXT:    orq %rsi, %rdx
-; ANY-NEXT:    orq 48(%rdi), %rax
-; ANY-NEXT:    orq 32(%rdi), %rcx
-; ANY-NEXT:    orq %rax, %rcx
-; ANY-NEXT:    orq %rdx, %rcx
-; ANY-NEXT:    setne %al
-; ANY-NEXT:    retq
+; NO512-LABEL: anybits_i512_load_arg:
+; NO512:       # %bb.0:
+; NO512-NEXT:    movq 16(%rdi), %rax
+; NO512-NEXT:    movq (%rdi), %rcx
+; NO512-NEXT:    movq 8(%rdi), %rdx
+; NO512-NEXT:    movq 24(%rdi), %rsi
+; NO512-NEXT:    orq 56(%rdi), %rsi
+; NO512-NEXT:    orq 40(%rdi), %rdx
+; NO512-NEXT:    orq %rsi, %rdx
+; NO512-NEXT:    orq 48(%rdi), %rax
+; NO512-NEXT:    orq 32(%rdi), %rcx
+; NO512-NEXT:    orq %rax, %rcx
+; NO512-NEXT:    orq %rdx, %rcx
+; NO512-NEXT:    setne %al
+; NO512-NEXT:    retq
+;
+; AVX512-LABEL: anybits_i512_load_arg:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
+; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %ld = load i512, ptr %w
   %cmp = icmp ne i512 %ld, 0
   ret i1 %cmp
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
index b2064b1..02d4d88 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
@@ -181,40 +181,38 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key
 ; CHECK-LABEL: segmentedStack:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    cmpq %gs:816, %rsp
-; CHECK-NEXT:    jbe LBB3_6
+; CHECK-NEXT:    jbe LBB3_7
 ; CHECK-NEXT:  LBB3_1: ## %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    testq %rdi, %rdi
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    testq %rsi, %rsi
-; CHECK-NEXT:    sete %cl
-; CHECK-NEXT:    orb %al, %cl
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    orq %rsi, %rax
 ; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    testb %cl, %cl
-; CHECK-NEXT:    jne LBB3_4
-; CHECK-NEXT:  ## %bb.2: ## %if.end4.i
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je LBB3_5
+; CHECK-NEXT:  ## %bb.2: ## %entry
+; CHECK-NEXT:    testq %rsi, %rsi
+; CHECK-NEXT:    je LBB3_5
+; CHECK-NEXT:  ## %bb.3: ## %if.end4.i
 ; CHECK-NEXT:    movq 8(%rdi), %rdx
 ; CHECK-NEXT:    cmpq 8(%rsi), %rdx
-; CHECK-NEXT:    jne LBB3_5
-; CHECK-NEXT:  ## %bb.3: ## %land.rhs.i.i
+; CHECK-NEXT:    jne LBB3_6
+; CHECK-NEXT:  ## %bb.4: ## %land.rhs.i.i
 ; CHECK-NEXT:    movq (%rsi), %rsi
 ; CHECK-NEXT:    movq (%rdi), %rdi
 ; CHECK-NEXT:    callq _memcmp
 ; CHECK-NEXT:    testl %eax, %eax
 ; CHECK-NEXT:    sete %al
-; CHECK-NEXT:  LBB3_4: ## %__go_ptr_strings_equal.exit
+; CHECK-NEXT:  LBB3_5: ## %__go_ptr_strings_equal.exit
 ; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    retq
-; CHECK-NEXT:  LBB3_5:
+; CHECK-NEXT:  LBB3_6:
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    retq
-; CHECK-NEXT:  LBB3_6:
+; CHECK-NEXT:  LBB3_7:
 ; CHECK-NEXT:    movl $8, %r10d
 ; CHECK-NEXT:    movl $0, %r11d
 ; CHECK-NEXT:    callq ___morestack
@@ -224,43 +222,41 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key
 ; NOCOMPACTUNWIND-LABEL: segmentedStack:
 ; NOCOMPACTUNWIND:       # %bb.0:
 ; NOCOMPACTUNWIND-NEXT:    cmpq %fs:112, %rsp
-; NOCOMPACTUNWIND-NEXT:    jbe .LBB3_6
+; NOCOMPACTUNWIND-NEXT:    jbe .LBB3_7
 ; NOCOMPACTUNWIND-NEXT:  .LBB3_1: # %entry
 ; NOCOMPACTUNWIND-NEXT:    pushq %rax
 ; NOCOMPACTUNWIND-NEXT:    .cfi_def_cfa_offset 16
-; NOCOMPACTUNWIND-NEXT:    testq %rdi, %rdi
-; NOCOMPACTUNWIND-NEXT:    sete %al
-; NOCOMPACTUNWIND-NEXT:    testq %rsi, %rsi
-; NOCOMPACTUNWIND-NEXT:    sete %cl
-; NOCOMPACTUNWIND-NEXT:    orb %al, %cl
 ; NOCOMPACTUNWIND-NEXT:    movq %rdi, %rax
 ; NOCOMPACTUNWIND-NEXT:    orq %rsi, %rax
 ; NOCOMPACTUNWIND-NEXT:    sete %al
-; NOCOMPACTUNWIND-NEXT:    testb %cl, %cl
-; NOCOMPACTUNWIND-NEXT:    jne .LBB3_4
-; NOCOMPACTUNWIND-NEXT:  # %bb.2: # %if.end4.i
+; NOCOMPACTUNWIND-NEXT:    testq %rdi, %rdi
+; NOCOMPACTUNWIND-NEXT:    je .LBB3_5
+; NOCOMPACTUNWIND-NEXT:  # %bb.2: # %entry
+; NOCOMPACTUNWIND-NEXT:    testq %rsi, %rsi
+; NOCOMPACTUNWIND-NEXT:    je .LBB3_5
+; NOCOMPACTUNWIND-NEXT:  # %bb.3: # %if.end4.i
 ; NOCOMPACTUNWIND-NEXT:    movq 8(%rdi), %rdx
 ; NOCOMPACTUNWIND-NEXT:    cmpq 8(%rsi), %rdx
-; NOCOMPACTUNWIND-NEXT:    jne .LBB3_5
-; NOCOMPACTUNWIND-NEXT:  # %bb.3: # %land.rhs.i.i
+; NOCOMPACTUNWIND-NEXT:    jne .LBB3_6
+; NOCOMPACTUNWIND-NEXT:  # %bb.4: # %land.rhs.i.i
 ; NOCOMPACTUNWIND-NEXT:    movq (%rsi), %rsi
 ; NOCOMPACTUNWIND-NEXT:    movq (%rdi), %rdi
 ; NOCOMPACTUNWIND-NEXT:    callq memcmp@PLT
 ; NOCOMPACTUNWIND-NEXT:    testl %eax, %eax
 ; NOCOMPACTUNWIND-NEXT:    sete %al
-; NOCOMPACTUNWIND-NEXT:  .LBB3_4: # %__go_ptr_strings_equal.exit
+; NOCOMPACTUNWIND-NEXT:  .LBB3_5: # %__go_ptr_strings_equal.exit
 ; NOCOMPACTUNWIND-NEXT:    # kill: def $al killed $al killed $eax
 ; NOCOMPACTUNWIND-NEXT:    popq %rcx
 ; NOCOMPACTUNWIND-NEXT:    .cfi_def_cfa_offset 8
 ; NOCOMPACTUNWIND-NEXT:    retq
-; NOCOMPACTUNWIND-NEXT:  .LBB3_5:
+; NOCOMPACTUNWIND-NEXT:  .LBB3_6:
 ; NOCOMPACTUNWIND-NEXT:    .cfi_def_cfa_offset 16
 ; NOCOMPACTUNWIND-NEXT:    xorl %eax, %eax
 ; NOCOMPACTUNWIND-NEXT:    # kill: def $al killed $al killed $eax
 ; NOCOMPACTUNWIND-NEXT:    popq %rcx
 ; NOCOMPACTUNWIND-NEXT:    .cfi_def_cfa_offset 8
 ; NOCOMPACTUNWIND-NEXT:    retq
-; NOCOMPACTUNWIND-NEXT:  .LBB3_6:
+; NOCOMPACTUNWIND-NEXT:  .LBB3_7:
 ; NOCOMPACTUNWIND-NEXT:    movl $8, %r10d
 ; NOCOMPACTUNWIND-NEXT:    movl $0, %r11d
 ; NOCOMPACTUNWIND-NEXT:    callq __morestack
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-funclet.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-funclet.ll
index ae8b2b3..2a7216f 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-funclet.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-funclet.ll
@@ -23,7 +23,7 @@ declare i32 @dummyPersonality(...)
 
 define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr @__CxxFrameHandler3 {
 ; CHECK-INLINE-LABEL: define void @FuncletPersonality(
-; CHECK-INLINE-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__CxxFrameHandler3 {
+; CHECK-INLINE-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__CxxFrameHandler3 {
 ; CHECK-INLINE-NEXT:  entry:
 ; CHECK-INLINE-NEXT:    [[TMP0:%.*]] = alloca i64, align 32
 ; CHECK-INLINE-NEXT:    store i64 0, ptr [[TMP0]], align 8
@@ -37,33 +37,26 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-INLINE-NEXT:    br label [[TMP6]]
 ; CHECK-INLINE:       6:
 ; CHECK-INLINE-NEXT:    [[TMP7:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[TMP4]] ]
+; CHECK-INLINE-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-INLINE-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
 ; CHECK-INLINE-NEXT:    br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]]
-; CHECK-INLINE:       9:
+; CHECK-INLINE:       10:
 ; CHECK-INLINE-NEXT:    [[MYALLOCA:%.*]] = alloca i8, i64 8544, align 32
-; CHECK-INLINE-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
 ; CHECK-INLINE-NEXT:    br label [[TMP11]]
 ; CHECK-INLINE:       11:
-; CHECK-INLINE-NEXT:    [[TMP12:%.*]] = phi i64 [ [[TMP7]], [[TMP6]] ], [ [[TMP10]], [[TMP9]] ]
-; CHECK-INLINE-NEXT:    store i64 [[TMP12]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
-; CHECK-INLINE-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 32
-; CHECK-INLINE-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
-; CHECK-INLINE-NEXT:    [[TMP15:%.*]] = add i64 [[TMP12]], 8480
-; CHECK-INLINE-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
-; CHECK-INLINE-NEXT:    [[TMP17:%.*]] = add i64 [[TMP12]], 8496
-; CHECK-INLINE-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
-; CHECK-INLINE-NEXT:    [[TMP19:%.*]] = add i64 [[TMP12]], 8512
-; CHECK-INLINE-NEXT:    [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr
-; CHECK-INLINE-NEXT:    [[TMP21:%.*]] = add i64 [[TMP12]], 8528
-; CHECK-INLINE-NEXT:    [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr
-; CHECK-INLINE-NEXT:    [[TMP23:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-INLINE-NEXT:    [[TMP23:%.*]] = phi ptr [ [[TMP10]], [[TMP6]] ], [ [[MYALLOCA]], [[TMP9]] ]
+; CHECK-INLINE-NEXT:    store ptr [[TMP23]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
+; CHECK-INLINE-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP23]], i64 32
+; CHECK-INLINE-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8480
+; CHECK-INLINE-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8496
+; CHECK-INLINE-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8512
+; CHECK-INLINE-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8528
 ; CHECK-INLINE-NEXT:    store i64 1102416563, ptr [[TMP23]], align 8
-; CHECK-INLINE-NEXT:    [[TMP24:%.*]] = add i64 [[TMP12]], 8
-; CHECK-INLINE-NEXT:    [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr
+; CHECK-INLINE-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8
 ; CHECK-INLINE-NEXT:    store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP25]], align 8
-; CHECK-INLINE-NEXT:    [[TMP26:%.*]] = add i64 [[TMP12]], 16
-; CHECK-INLINE-NEXT:    [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr
+; CHECK-INLINE-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[TMP23]], i64 16
 ; CHECK-INLINE-NEXT:    store i64 ptrtoint (ptr @FuncletPersonality to i64), ptr [[TMP27]], align 8
+; CHECK-INLINE-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[TMP23]] to i64
 ; CHECK-INLINE-NEXT:    [[TMP28:%.*]] = lshr i64 [[TMP12]], 3
 ; CHECK-INLINE-NEXT:    [[TMP29:%.*]] = add i64 [[TMP28]], [[TMP1]]
 ; CHECK-INLINE-NEXT:    call void @__asan_set_shadow_f1(i64 [[TMP29]], i64 4)
@@ -87,21 +80,22 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-INLINE-NEXT:    call void @__asan_set_shadow_f3(i64 [[TMP38]], i64 1)
 ; CHECK-INLINE-NEXT:    [[TMP39:%.*]] = add i64 [[TMP29]], 1066
 ; CHECK-INLINE-NEXT:    call void @__asan_set_shadow_04(i64 [[TMP39]], i64 1)
+; CHECK-INLINE-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP22]] to i64
 ; CHECK-INLINE-NEXT:    [[TMP40:%.*]] = lshr i64 [[TMP21]], 3
 ; CHECK-INLINE-NEXT:    [[TMP41:%.*]] = add i64 [[TMP40]], [[TMP1]]
 ; CHECK-INLINE-NEXT:    [[TMP42:%.*]] = inttoptr i64 [[TMP41]] to ptr
 ; CHECK-INLINE-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1
 ; CHECK-INLINE-NEXT:    [[TMP44:%.*]] = icmp ne i8 [[TMP43]], 0
 ; CHECK-INLINE-NEXT:    br i1 [[TMP44]], label [[TMP45:%.*]], label [[TMP50:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK-INLINE:       45:
+; CHECK-INLINE:       39:
 ; CHECK-INLINE-NEXT:    [[TMP46:%.*]] = and i64 [[TMP21]], 7
 ; CHECK-INLINE-NEXT:    [[TMP47:%.*]] = trunc i64 [[TMP46]] to i8
 ; CHECK-INLINE-NEXT:    [[TMP48:%.*]] = icmp sge i8 [[TMP47]], [[TMP43]]
 ; CHECK-INLINE-NEXT:    br i1 [[TMP48]], label [[TMP49:%.*]], label [[TMP50]]
-; CHECK-INLINE:       49:
+; CHECK-INLINE:       43:
 ; CHECK-INLINE-NEXT:    call void @__asan_report_store1(i64 [[TMP21]]) #[[ATTR7:[0-9]+]]
 ; CHECK-INLINE-NEXT:    unreachable
-; CHECK-INLINE:       50:
+; CHECK-INLINE:       44:
 ; CHECK-INLINE-NEXT:    store volatile i8 0, ptr [[TMP22]], align 1
 ; CHECK-INLINE-NEXT:    [[TMP51:%.*]] = add i64 [[TMP29]], 1066
 ; CHECK-INLINE-NEXT:    call void @__asan_set_shadow_f8(i64 [[TMP51]], i64 1)
@@ -125,10 +119,10 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-INLINE-NEXT:    [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1
 ; CHECK-INLINE-NEXT:    [[TMP66:%.*]] = icmp ne i8 [[TMP65]], 0
 ; CHECK-INLINE-NEXT:    br i1 [[TMP66]], label [[TMP67:%.*]], label [[TMP68:%.*]]
-; CHECK-INLINE:       67:
+; CHECK-INLINE:       61:
 ; CHECK-INLINE-NEXT:    call void @__asan_report_store8(i64 [[TMP59]]) #[[ATTR7]]
 ; CHECK-INLINE-NEXT:    unreachable
-; CHECK-INLINE:       68:
+; CHECK-INLINE:       62:
 ; CHECK-INLINE-NEXT:    store volatile i64 0, ptr [[TMP61]], align 8
 ; CHECK-INLINE-NEXT:    [[TMPCOPYI64:%.*]] = load i64, ptr [[TMP61]], align 8
 ; CHECK-INLINE-NEXT:    [[TMP69:%.*]] = and i64 [[TMPCOPYI64]], 31
@@ -150,15 +144,15 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-INLINE-NEXT:    [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1
 ; CHECK-INLINE-NEXT:    [[TMP84:%.*]] = icmp ne i8 [[TMP83]], 0
 ; CHECK-INLINE-NEXT:    br i1 [[TMP84]], label [[TMP85:%.*]], label [[TMP90:%.*]], !prof [[PROF1]]
-; CHECK-INLINE:       85:
+; CHECK-INLINE:       79:
 ; CHECK-INLINE-NEXT:    [[TMP86:%.*]] = and i64 [[TMP77]], 7
 ; CHECK-INLINE-NEXT:    [[TMP87:%.*]] = trunc i64 [[TMP86]] to i8
 ; CHECK-INLINE-NEXT:    [[TMP88:%.*]] = icmp sge i8 [[TMP87]], [[TMP83]]
 ; CHECK-INLINE-NEXT:    br i1 [[TMP88]], label [[TMP89:%.*]], label [[TMP90]]
-; CHECK-INLINE:       89:
+; CHECK-INLINE:       83:
 ; CHECK-INLINE-NEXT:    call void @__asan_report_store1(i64 [[TMP77]]) #[[ATTR7]]
 ; CHECK-INLINE-NEXT:    unreachable
-; CHECK-INLINE:       90:
+; CHECK-INLINE:       84:
 ; CHECK-INLINE-NEXT:    store volatile i8 0, ptr [[TMP79]], align 1
 ; CHECK-INLINE-NEXT:    invoke void @MayThrowFunc()
 ; CHECK-INLINE-NEXT:            to label [[INVOKE_CONT:%.*]] unwind label [[EHCLEANUP:%.*]]
@@ -170,15 +164,15 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-INLINE-NEXT:    store i64 1172321806, ptr [[TMP23]], align 8
 ; CHECK-INLINE-NEXT:    [[TMP93:%.*]] = icmp ne i64 [[TMP7]], 0
 ; CHECK-INLINE-NEXT:    br i1 [[TMP93]], label [[TMP94:%.*]], label [[TMP95:%.*]]
-; CHECK-INLINE:       94:
+; CHECK-INLINE:       88:
 ; CHECK-INLINE-NEXT:    call void @__asan_stack_free_8(i64 [[TMP7]], i64 8544)
 ; CHECK-INLINE-NEXT:    br label [[TMP97:%.*]]
-; CHECK-INLINE:       95:
+; CHECK-INLINE:       89:
 ; CHECK-INLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP29]], i64 4)
 ; CHECK-INLINE-NEXT:    [[TMP96:%.*]] = add i64 [[TMP29]], 1028
 ; CHECK-INLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP96]], i64 40)
 ; CHECK-INLINE-NEXT:    br label [[TMP97]]
-; CHECK-INLINE:       97:
+; CHECK-INLINE:       91:
 ; CHECK-INLINE-NEXT:    ret void
 ; CHECK-INLINE:       ehcleanup:
 ; CHECK-INLINE-NEXT:    [[TMP98:%.*]] = cleanuppad within none []
@@ -189,23 +183,27 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-INLINE-NEXT:    [[TMP102:%.*]] = load i8, ptr [[TMP101]], align 1
 ; CHECK-INLINE-NEXT:    [[TMP103:%.*]] = icmp ne i8 [[TMP102]], 0
 ; CHECK-INLINE-NEXT:    br i1 [[TMP103]], label [[TMP104:%.*]], label [[TMP109:%.*]], !prof [[PROF1]]
-; CHECK-INLINE:       104:
+; CHECK-INLINE:       98:
 ; CHECK-INLINE-NEXT:    [[TMP105:%.*]] = and i64 [[TMP54]], 7
 ; CHECK-INLINE-NEXT:    [[TMP106:%.*]] = trunc i64 [[TMP105]] to i8
 ; CHECK-INLINE-NEXT:    [[TMP107:%.*]] = icmp sge i8 [[TMP106]], [[TMP102]]
 ; CHECK-INLINE-NEXT:    br i1 [[TMP107]], label [[TMP108:%.*]], label [[TMP109]]
-; CHECK-INLINE:       108:
+; CHECK-INLINE:       102:
 ; CHECK-INLINE-NEXT:    call void @__asan_report_store1(i64 [[TMP54]]) #[[ATTR7]] [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    unreachable
-; CHECK-INLINE:       109:
+; CHECK-INLINE:       103:
 ; CHECK-INLINE-NEXT:    store volatile i8 0, ptr [[TMP56]], align 1
 ; CHECK-INLINE-NEXT:    call void @__asan_poison_stack_memory(i64 [[TMP54]], i64 4) [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    call void @DeInit(ptr [[TMP14]]) [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    [[TMP110:%.*]] = call ptr @__asan_memset(ptr [[TMP16]], i32 0, i64 4) [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    [[TMP111:%.*]] = call ptr @__asan_memcpy(ptr [[TMP18]], ptr [[TMP16]], i64 4) [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    [[TMP112:%.*]] = call ptr @__asan_memmove(ptr [[TMP20]], ptr [[TMP16]], i64 4) [ "funclet"(token [[TMP98]]) ]
+; CHECK-INLINE-NEXT:    [[TMP15:%.*]] = ptrtoint ptr [[TMP16]] to i64
+; CHECK-INLINE-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[TMP18]] to i64
 ; CHECK-INLINE-NEXT:    call void @__sanitizer_ptr_cmp(i64 [[TMP15]], i64 [[TMP17]]) [ "funclet"(token [[TMP98]]) ]
-; CHECK-INLINE-NEXT:    call void @__sanitizer_ptr_sub(i64 [[TMP15]], i64 [[TMP17]]) [ "funclet"(token [[TMP98]]) ]
+; CHECK-INLINE-NEXT:    [[ADDR1:%.*]] = ptrtoint ptr [[TMP16]] to i64
+; CHECK-INLINE-NEXT:    [[ADDR2:%.*]] = ptrtoint ptr [[TMP18]] to i64
+; CHECK-INLINE-NEXT:    call void @__sanitizer_ptr_sub(i64 [[ADDR1]], i64 [[ADDR2]]) [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    [[TMP113:%.*]] = ptrtoint ptr [[PTRPARAM]] to i64
 ; CHECK-INLINE-NEXT:    [[TMP114:%.*]] = add i64 [[TMP113]], 7
 ; CHECK-INLINE-NEXT:    [[TMP115:%.*]] = inttoptr i64 [[TMP114]] to ptr
@@ -216,27 +214,27 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-INLINE-NEXT:    [[TMP120:%.*]] = load i8, ptr [[TMP119]], align 1
 ; CHECK-INLINE-NEXT:    [[TMP121:%.*]] = icmp ne i8 [[TMP120]], 0
 ; CHECK-INLINE-NEXT:    br i1 [[TMP121]], label [[TMP122:%.*]], label [[TMP127:%.*]], !prof [[PROF1]]
-; CHECK-INLINE:       122:
+; CHECK-INLINE:       118:
 ; CHECK-INLINE-NEXT:    [[TMP123:%.*]] = and i64 [[TMP116]], 7
 ; CHECK-INLINE-NEXT:    [[TMP124:%.*]] = trunc i64 [[TMP123]] to i8
 ; CHECK-INLINE-NEXT:    [[TMP125:%.*]] = icmp sge i8 [[TMP124]], [[TMP120]]
 ; CHECK-INLINE-NEXT:    br i1 [[TMP125]], label [[TMP126:%.*]], label [[TMP127]]
-; CHECK-INLINE:       126:
+; CHECK-INLINE:       122:
 ; CHECK-INLINE-NEXT:    call void @__asan_report_store_n(i64 [[TMP116]], i64 8) #[[ATTR7]] [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    unreachable
-; CHECK-INLINE:       127:
+; CHECK-INLINE:       123:
 ; CHECK-INLINE-NEXT:    [[TMP128:%.*]] = lshr i64 [[TMP114]], 3
 ; CHECK-INLINE-NEXT:    [[TMP129:%.*]] = add i64 [[TMP128]], [[TMP1]]
 ; CHECK-INLINE-NEXT:    [[TMP130:%.*]] = inttoptr i64 [[TMP129]] to ptr
 ; CHECK-INLINE-NEXT:    [[TMP131:%.*]] = load i8, ptr [[TMP130]], align 1
 ; CHECK-INLINE-NEXT:    [[TMP132:%.*]] = icmp ne i8 [[TMP131]], 0
 ; CHECK-INLINE-NEXT:    br i1 [[TMP132]], label [[TMP133:%.*]], label [[EHEXIT:%.*]], !prof [[PROF1]]
-; CHECK-INLINE:       133:
+; CHECK-INLINE:       129:
 ; CHECK-INLINE-NEXT:    [[TMP134:%.*]] = and i64 [[TMP114]], 7
 ; CHECK-INLINE-NEXT:    [[TMP135:%.*]] = trunc i64 [[TMP134]] to i8
 ; CHECK-INLINE-NEXT:    [[TMP136:%.*]] = icmp sge i8 [[TMP135]], [[TMP131]]
 ; CHECK-INLINE-NEXT:    br i1 [[TMP136]], label [[TMP137:%.*]], label [[EHEXIT]]
-; CHECK-INLINE:       137:
+; CHECK-INLINE:       133:
 ; CHECK-INLINE-NEXT:    call void @__asan_report_store_n(i64 [[TMP114]], i64 8) #[[ATTR7]] [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    unreachable
 ; CHECK-INLINE:       ehexit:
@@ -249,19 +247,19 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-INLINE-NEXT:    store i64 1172321806, ptr [[TMP23]], align 8
 ; CHECK-INLINE-NEXT:    [[TMP142:%.*]] = icmp ne i64 [[TMP7]], 0
 ; CHECK-INLINE-NEXT:    br i1 [[TMP142]], label [[TMP143:%.*]], label [[TMP144:%.*]]
-; CHECK-INLINE:       143:
+; CHECK-INLINE:       139:
 ; CHECK-INLINE-NEXT:    call void @__asan_stack_free_8(i64 [[TMP7]], i64 8544) [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    br label [[TMP146:%.*]]
-; CHECK-INLINE:       144:
+; CHECK-INLINE:       140:
 ; CHECK-INLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP29]], i64 4) [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    [[TMP145:%.*]] = add i64 [[TMP29]], 1028
 ; CHECK-INLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP145]], i64 40) [ "funclet"(token [[TMP98]]) ]
 ; CHECK-INLINE-NEXT:    br label [[TMP146]]
-; CHECK-INLINE:       146:
+; CHECK-INLINE:       142:
 ; CHECK-INLINE-NEXT:    cleanupret from [[TMP98]] unwind to caller
 ;
 ; CHECK-OUTLINE-LABEL: define void @FuncletPersonality(
-; CHECK-OUTLINE-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__CxxFrameHandler3 {
+; CHECK-OUTLINE-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__CxxFrameHandler3 {
 ; CHECK-OUTLINE-NEXT:  entry:
 ; CHECK-OUTLINE-NEXT:    [[TMP0:%.*]] = alloca i64, align 32
 ; CHECK-OUTLINE-NEXT:    store i64 0, ptr [[TMP0]], align 8
@@ -275,37 +273,28 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-OUTLINE-NEXT:    br label [[TMP6]]
 ; CHECK-OUTLINE:       6:
 ; CHECK-OUTLINE-NEXT:    [[TMP7:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[TMP4]] ]
+; CHECK-OUTLINE-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-OUTLINE-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
 ; CHECK-OUTLINE-NEXT:    br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]]
-; CHECK-OUTLINE:       9:
+; CHECK-OUTLINE:       10:
 ; CHECK-OUTLINE-NEXT:    [[MYALLOCA:%.*]] = alloca i8, i64 8608, align 32
-; CHECK-OUTLINE-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
 ; CHECK-OUTLINE-NEXT:    br label [[TMP11]]
 ; CHECK-OUTLINE:       11:
-; CHECK-OUTLINE-NEXT:    [[TMP12:%.*]] = phi i64 [ [[TMP7]], [[TMP6]] ], [ [[TMP10]], [[TMP9]] ]
-; CHECK-OUTLINE-NEXT:    store i64 [[TMP12]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
-; CHECK-OUTLINE-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 32
-; CHECK-OUTLINE-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
-; CHECK-OUTLINE-NEXT:    [[TMP15:%.*]] = add i64 [[TMP12]], 8480
-; CHECK-OUTLINE-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
-; CHECK-OUTLINE-NEXT:    [[TMP17:%.*]] = add i64 [[TMP12]], 8496
-; CHECK-OUTLINE-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
-; CHECK-OUTLINE-NEXT:    [[TMP19:%.*]] = add i64 [[TMP12]], 8512
-; CHECK-OUTLINE-NEXT:    [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr
-; CHECK-OUTLINE-NEXT:    [[TMP21:%.*]] = add i64 [[TMP12]], 8528
-; CHECK-OUTLINE-NEXT:    [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr
-; CHECK-OUTLINE-NEXT:    [[TMP23:%.*]] = add i64 [[TMP12]], 8544
-; CHECK-OUTLINE-NEXT:    [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
-; CHECK-OUTLINE-NEXT:    [[TMP25:%.*]] = add i64 [[TMP12]], 8560
-; CHECK-OUTLINE-NEXT:    [[TMP26:%.*]] = inttoptr i64 [[TMP25]] to ptr
-; CHECK-OUTLINE-NEXT:    [[TMP27:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-OUTLINE-NEXT:    [[TMP27:%.*]] = phi ptr [ [[TMP10]], [[TMP6]] ], [ [[MYALLOCA]], [[TMP9]] ]
+; CHECK-OUTLINE-NEXT:    store ptr [[TMP27]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
+; CHECK-OUTLINE-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP27]], i64 32
+; CHECK-OUTLINE-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8480
+; CHECK-OUTLINE-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8496
+; CHECK-OUTLINE-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8512
+; CHECK-OUTLINE-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8528
+; CHECK-OUTLINE-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8544
+; CHECK-OUTLINE-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8560
 ; CHECK-OUTLINE-NEXT:    store i64 1102416563, ptr [[TMP27]], align 8
-; CHECK-OUTLINE-NEXT:    [[TMP28:%.*]] = add i64 [[TMP12]], 8
-; CHECK-OUTLINE-NEXT:    [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr
+; CHECK-OUTLINE-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8
 ; CHECK-OUTLINE-NEXT:    store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP29]], align 8
-; CHECK-OUTLINE-NEXT:    [[TMP30:%.*]] = add i64 [[TMP12]], 16
-; CHECK-OUTLINE-NEXT:    [[TMP31:%.*]] = inttoptr i64 [[TMP30]] to ptr
+; CHECK-OUTLINE-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[TMP27]], i64 16
 ; CHECK-OUTLINE-NEXT:    store i64 ptrtoint (ptr @FuncletPersonality to i64), ptr [[TMP31]], align 8
+; CHECK-OUTLINE-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[TMP27]] to i64
 ; CHECK-OUTLINE-NEXT:    [[TMP32:%.*]] = lshr i64 [[TMP12]], 3
 ; CHECK-OUTLINE-NEXT:    [[TMP33:%.*]] = add i64 [[TMP32]], [[TMP1]]
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_f1(i64 [[TMP33]], i64 4)
@@ -335,10 +324,12 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_f3(i64 [[TMP45]], i64 5)
 ; CHECK-OUTLINE-NEXT:    [[TMP46:%.*]] = add i64 [[TMP33]], 1066
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_04(i64 [[TMP46]], i64 1)
+; CHECK-OUTLINE-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP22]] to i64
 ; CHECK-OUTLINE-NEXT:    call void @__asan_store1(i64 [[TMP21]])
 ; CHECK-OUTLINE-NEXT:    store volatile i8 0, ptr [[TMP22]], align 1
 ; CHECK-OUTLINE-NEXT:    [[TMP47:%.*]] = add i64 [[TMP33]], 1066
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_f8(i64 [[TMP47]], i64 1)
+; CHECK-OUTLINE-NEXT:    [[TMP25:%.*]] = ptrtoint ptr [[TMP26]] to i64
 ; CHECK-OUTLINE-NEXT:    call void @__asan_store8(i64 [[TMP25]])
 ; CHECK-OUTLINE-NEXT:    store volatile i64 0, ptr [[TMP26]], align 8
 ; CHECK-OUTLINE-NEXT:    [[TMPCOPYI64:%.*]] = load i64, ptr [[TMP26]], align 8
@@ -367,22 +358,23 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-OUTLINE-NEXT:    store i64 1172321806, ptr [[TMP27]], align 8
 ; CHECK-OUTLINE-NEXT:    [[TMP61:%.*]] = icmp ne i64 [[TMP7]], 0
 ; CHECK-OUTLINE-NEXT:    br i1 [[TMP61]], label [[TMP62:%.*]], label [[TMP63:%.*]]
-; CHECK-OUTLINE:       62:
+; CHECK-OUTLINE:       55:
 ; CHECK-OUTLINE-NEXT:    call void @__asan_stack_free_8(i64 [[TMP7]], i64 8608)
 ; CHECK-OUTLINE-NEXT:    br label [[TMP66:%.*]]
-; CHECK-OUTLINE:       63:
+; CHECK-OUTLINE:       56:
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP33]], i64 4)
 ; CHECK-OUTLINE-NEXT:    [[TMP64:%.*]] = add i64 [[TMP33]], 1028
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP64]], i64 42)
 ; CHECK-OUTLINE-NEXT:    [[TMP65:%.*]] = add i64 [[TMP33]], 1071
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP65]], i64 5)
 ; CHECK-OUTLINE-NEXT:    br label [[TMP66]]
-; CHECK-OUTLINE:       66:
+; CHECK-OUTLINE:       59:
 ; CHECK-OUTLINE-NEXT:    ret void
 ; CHECK-OUTLINE:       ehcleanup:
 ; CHECK-OUTLINE-NEXT:    [[TMP67:%.*]] = cleanuppad within none []
 ; CHECK-OUTLINE-NEXT:    [[TMP68:%.*]] = add i64 [[TMP33]], 1068
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_04(i64 [[TMP68]], i64 1) [ "funclet"(token [[TMP67]]) ]
+; CHECK-OUTLINE-NEXT:    [[TMP23:%.*]] = ptrtoint ptr [[TMP24]] to i64
 ; CHECK-OUTLINE-NEXT:    call void @__asan_store1(i64 [[TMP23]]) [ "funclet"(token [[TMP67]]) ]
 ; CHECK-OUTLINE-NEXT:    store volatile i8 0, ptr [[TMP24]], align 1
 ; CHECK-OUTLINE-NEXT:    [[TMP69:%.*]] = add i64 [[TMP33]], 1068
@@ -391,8 +383,12 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-OUTLINE-NEXT:    [[TMP70:%.*]] = call ptr @__asan_memset(ptr [[TMP16]], i32 0, i64 4) [ "funclet"(token [[TMP67]]) ]
 ; CHECK-OUTLINE-NEXT:    [[TMP71:%.*]] = call ptr @__asan_memcpy(ptr [[TMP18]], ptr [[TMP16]], i64 4) [ "funclet"(token [[TMP67]]) ]
 ; CHECK-OUTLINE-NEXT:    [[TMP72:%.*]] = call ptr @__asan_memmove(ptr [[TMP20]], ptr [[TMP16]], i64 4) [ "funclet"(token [[TMP67]]) ]
+; CHECK-OUTLINE-NEXT:    [[TMP15:%.*]] = ptrtoint ptr [[TMP16]] to i64
+; CHECK-OUTLINE-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[TMP18]] to i64
 ; CHECK-OUTLINE-NEXT:    call void @__sanitizer_ptr_cmp(i64 [[TMP15]], i64 [[TMP17]]) [ "funclet"(token [[TMP67]]) ]
-; CHECK-OUTLINE-NEXT:    call void @__sanitizer_ptr_sub(i64 [[TMP15]], i64 [[TMP17]]) [ "funclet"(token [[TMP67]]) ]
+; CHECK-OUTLINE-NEXT:    [[ADDR1:%.*]] = ptrtoint ptr [[TMP16]] to i64
+; CHECK-OUTLINE-NEXT:    [[ADDR2:%.*]] = ptrtoint ptr [[TMP18]] to i64
+; CHECK-OUTLINE-NEXT:    call void @__sanitizer_ptr_sub(i64 [[ADDR1]], i64 [[ADDR2]]) [ "funclet"(token [[TMP67]]) ]
 ; CHECK-OUTLINE-NEXT:    [[TMP73:%.*]] = ptrtoint ptr [[PTRPARAM]] to i64
 ; CHECK-OUTLINE-NEXT:    call void @__asan_storeN(i64 [[TMP73]], i64 8) [ "funclet"(token [[TMP67]]) ]
 ; CHECK-OUTLINE-NEXT:    store i64 0, ptr [[PTRPARAM]], align 1
@@ -404,17 +400,17 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr
 ; CHECK-OUTLINE-NEXT:    store i64 1172321806, ptr [[TMP27]], align 8
 ; CHECK-OUTLINE-NEXT:    [[TMP78:%.*]] = icmp ne i64 [[TMP7]], 0
 ; CHECK-OUTLINE-NEXT:    br i1 [[TMP78]], label [[TMP79:%.*]], label [[TMP80:%.*]]
-; CHECK-OUTLINE:       79:
+; CHECK-OUTLINE:       75:
 ; CHECK-OUTLINE-NEXT:    call void @__asan_stack_free_8(i64 [[TMP7]], i64 8608) [ "funclet"(token [[TMP67]]) ]
 ; CHECK-OUTLINE-NEXT:    br label [[TMP83:%.*]]
-; CHECK-OUTLINE:       80:
+; CHECK-OUTLINE:       76:
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP33]], i64 4) [ "funclet"(token [[TMP67]]) ]
 ; CHECK-OUTLINE-NEXT:    [[TMP81:%.*]] = add i64 [[TMP33]], 1028
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP81]], i64 42) [ "funclet"(token [[TMP67]]) ]
 ; CHECK-OUTLINE-NEXT:    [[TMP82:%.*]] = add i64 [[TMP33]], 1071
 ; CHECK-OUTLINE-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP82]], i64 5) [ "funclet"(token [[TMP67]]) ]
 ; CHECK-OUTLINE-NEXT:    br label [[TMP83]]
-; CHECK-OUTLINE:       83:
+; CHECK-OUTLINE:       79:
 ; CHECK-OUTLINE-NEXT:    cleanupret from [[TMP67]] unwind to caller
 ;
 
@@ -487,7 +483,7 @@ nopredecessor:
 ; Non-Windows personality, ensure no funclet gets attached to asan runtime call.
 define void @OtherPersonality(ptr %ptrParam) sanitize_address personality ptr @dummyPersonality {
 ; CHECK-LABEL: define void @OtherPersonality(
-; CHECK-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @dummyPersonality {
+; CHECK-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @dummyPersonality {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
 ; CHECK-NEXT:    [[ASAN_LOCAL_STACK_BASE:%.*]] = alloca i64, align 8
@@ -499,25 +495,22 @@ define void @OtherPersonality(ptr %ptrParam) sanitize_address personality ptr @d
 ; CHECK-NEXT:    br label [[TMP5]]
 ; CHECK:       5:
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP4]], [[TMP3]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[TMP6]], 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP10:%.*]]
-; CHECK:       8:
+; CHECK:       9:
 ; CHECK-NEXT:    [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32
-; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
 ; CHECK-NEXT:    br label [[TMP10]]
 ; CHECK:       10:
-; CHECK-NEXT:    [[TMP11:%.*]] = phi i64 [ [[TMP6]], [[TMP5]] ], [ [[TMP9]], [[TMP8]] ]
-; CHECK-NEXT:    store i64 [[TMP11]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
-; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 32
-; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
-; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    [[TMP14:%.*]] = phi ptr [ [[TMP9]], [[TMP5]] ], [ [[MYALLOCA]], [[TMP8]] ]
+; CHECK-NEXT:    store ptr [[TMP14]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP14]], i64 32
 ; CHECK-NEXT:    store i64 1102416563, ptr [[TMP14]], align 8
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP11]], 8
-; CHECK-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP14]], i64 8
 ; CHECK-NEXT:    store i64 ptrtoint (ptr @___asan_gen_stack.1 to i64), ptr [[TMP16]], align 8
-; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[TMP11]], 16
-; CHECK-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP14]], i64 16
 ; CHECK-NEXT:    store i64 ptrtoint (ptr @OtherPersonality to i64), ptr [[TMP18]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP14]] to i64
 ; CHECK-NEXT:    [[TMP19:%.*]] = lshr i64 [[TMP11]], 3
 ; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[TMP19]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[TMP20]], 0
@@ -532,14 +525,14 @@ define void @OtherPersonality(ptr %ptrParam) sanitize_address personality ptr @d
 ; CHECK-NEXT:    store i64 1172321806, ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = icmp ne i64 [[TMP6]], 0
 ; CHECK-NEXT:    br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP26:%.*]]
-; CHECK:       25:
+; CHECK:       22:
 ; CHECK-NEXT:    call void @__asan_stack_free_0(i64 [[TMP6]], i64 64)
 ; CHECK-NEXT:    br label [[TMP28:%.*]]
-; CHECK:       26:
+; CHECK:       23:
 ; CHECK-NEXT:    [[TMP27:%.*]] = add i64 [[TMP20]], 0
 ; CHECK-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP27]], i64 8)
 ; CHECK-NEXT:    br label [[TMP28]]
-; CHECK:       28:
+; CHECK:       25:
 ; CHECK-NEXT:    ret void
 ; CHECK:       ehcleanup:
 ; CHECK-NEXT:    [[TMP29:%.*]] = cleanuppad within none []
@@ -547,14 +540,14 @@ define void @OtherPersonality(ptr %ptrParam) sanitize_address personality ptr @d
 ; CHECK-NEXT:    store i64 1172321806, ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[TMP31:%.*]] = icmp ne i64 [[TMP6]], 0
 ; CHECK-NEXT:    br i1 [[TMP31]], label [[TMP32:%.*]], label [[TMP33:%.*]]
-; CHECK:       32:
+; CHECK:       29:
 ; CHECK-NEXT:    call void @__asan_stack_free_0(i64 [[TMP6]], i64 64)
 ; CHECK-NEXT:    br label [[TMP35:%.*]]
-; CHECK:       33:
+; CHECK:       30:
 ; CHECK-NEXT:    [[TMP34:%.*]] = add i64 [[TMP20]], 0
 ; CHECK-NEXT:    call void @__asan_set_shadow_00(i64 [[TMP34]], i64 8)
 ; CHECK-NEXT:    br label [[TMP35]]
-; CHECK:       35:
+; CHECK:       32:
 ; CHECK-NEXT:    cleanupret from [[TMP29]] unwind to caller
 ;
 entry:
diff --git a/llvm/test/Instrumentation/AddressSanitizer/fake-stack.ll b/llvm/test/Instrumentation/AddressSanitizer/fake-stack.ll
index 3cccabb1..1b00cd8 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/fake-stack.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/fake-stack.ll
@@ -20,24 +20,20 @@ define void @Simple() uwtable sanitize_address {
 ; NEVER-LABEL: @Simple(
 ; NEVER-NEXT:  entry:
 ; NEVER-NEXT:    [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32
-; NEVER-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
-; NEVER-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], 32
-; NEVER-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr
-; NEVER-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP0]] to ptr
-; NEVER-NEXT:    store i64 1102416563, ptr [[TMP3]], align 8
-; NEVER-NEXT:    [[TMP4:%.*]] = add i64 [[TMP0]], 8
-; NEVER-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; NEVER-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 32
+; NEVER-NEXT:    store i64 1102416563, ptr [[MYALLOCA]], align 8
+; NEVER-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 8
 ; NEVER-NEXT:    store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP5]], align 8
-; NEVER-NEXT:    [[TMP6:%.*]] = add i64 [[TMP0]], 16
-; NEVER-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; NEVER-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 16
 ; NEVER-NEXT:    store i64 ptrtoint (ptr @Simple to i64), ptr [[TMP7]], align 8
+; NEVER-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
 ; NEVER-NEXT:    [[TMP8:%.*]] = lshr i64 [[TMP0]], 3
 ; NEVER-NEXT:    [[TMP9:%.*]] = add i64 [[TMP8]], 2147450880
 ; NEVER-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 0
 ; NEVER-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
 ; NEVER-NEXT:    store i64 -868083113472691727, ptr [[TMP11]], align 1
 ; NEVER-NEXT:    call void @Foo(ptr [[TMP2]])
-; NEVER-NEXT:    store i64 1172321806, ptr [[TMP3]], align 8
+; NEVER-NEXT:    store i64 1172321806, ptr [[MYALLOCA]], align 8
 ; NEVER-NEXT:    [[TMP12:%.*]] = add i64 [[TMP9]], 0
 ; NEVER-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
 ; NEVER-NEXT:    store i64 0, ptr [[TMP13]], align 1
@@ -54,25 +50,22 @@ define void @Simple() uwtable sanitize_address {
 ; RUNTIME-NEXT:    br label [[TMP4]]
 ; RUNTIME:       4:
 ; RUNTIME-NEXT:    [[TMP5:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[TMP2]] ]
+; RUNTIME-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; RUNTIME-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 0
 ; RUNTIME-NEXT:    br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP9:%.*]]
-; RUNTIME:       7:
+; RUNTIME:       8:
 ; RUNTIME-NEXT:    [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32
-; RUNTIME-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
 ; RUNTIME-NEXT:    br label [[TMP9]]
 ; RUNTIME:       9:
-; RUNTIME-NEXT:    [[TMP10:%.*]] = phi i64 [ [[TMP5]], [[TMP4]] ], [ [[TMP8]], [[TMP7]] ]
-; RUNTIME-NEXT:    store i64 [[TMP10]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
-; RUNTIME-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 32
-; RUNTIME-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
-; RUNTIME-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; RUNTIME-NEXT:    [[TMP13:%.*]] = phi ptr [ [[TMP8]], [[TMP4]] ], [ [[MYALLOCA]], [[TMP7]] ]
+; RUNTIME-NEXT:    store ptr [[TMP13]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
+; RUNTIME-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP13]], i64 32
 ; RUNTIME-NEXT:    store i64 1102416563, ptr [[TMP13]], align 8
-; RUNTIME-NEXT:    [[TMP14:%.*]] = add i64 [[TMP10]], 8
-; RUNTIME-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; RUNTIME-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8
 ; RUNTIME-NEXT:    store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP15]], align 8
-; RUNTIME-NEXT:    [[TMP16:%.*]] = add i64 [[TMP10]], 16
-; RUNTIME-NEXT:    [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr
+; RUNTIME-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 16
 ; RUNTIME-NEXT:    store i64 ptrtoint (ptr @Simple to i64), ptr [[TMP17]], align 8
+; RUNTIME-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP13]] to i64
 ; RUNTIME-NEXT:    [[TMP18:%.*]] = lshr i64 [[TMP10]], 3
 ; RUNTIME-NEXT:    [[TMP19:%.*]] = add i64 [[TMP18]], 2147450880
 ; RUNTIME-NEXT:    [[TMP20:%.*]] = add i64 [[TMP19]], 0
@@ -82,47 +75,43 @@ define void @Simple() uwtable sanitize_address {
 ; RUNTIME-NEXT:    store i64 1172321806, ptr [[TMP13]], align 8
 ; RUNTIME-NEXT:    [[TMP22:%.*]] = icmp ne i64 [[TMP5]], 0
 ; RUNTIME-NEXT:    br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP30:%.*]]
-; RUNTIME:       23:
+; RUNTIME:       20:
 ; RUNTIME-NEXT:    [[TMP24:%.*]] = add i64 [[TMP19]], 0
 ; RUNTIME-NEXT:    [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr
 ; RUNTIME-NEXT:    store i64 -723401728380766731, ptr [[TMP25]], align 1
-; RUNTIME-NEXT:    [[TMP26:%.*]] = add i64 [[TMP5]], 56
-; RUNTIME-NEXT:    [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr
+; RUNTIME-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[TMP8]], i64 56
 ; RUNTIME-NEXT:    [[TMP28:%.*]] = load i64, ptr [[TMP27]], align 8
 ; RUNTIME-NEXT:    [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr
 ; RUNTIME-NEXT:    store i8 0, ptr [[TMP29]], align 1
 ; RUNTIME-NEXT:    br label [[TMP33:%.*]]
-; RUNTIME:       30:
+; RUNTIME:       26:
 ; RUNTIME-NEXT:    [[TMP31:%.*]] = add i64 [[TMP19]], 0
 ; RUNTIME-NEXT:    [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr
 ; RUNTIME-NEXT:    store i64 0, ptr [[TMP32]], align 1
 ; RUNTIME-NEXT:    br label [[TMP33]]
-; RUNTIME:       33:
+; RUNTIME:       29:
 ; RUNTIME-NEXT:    ret void
 ;
 ; ALWAYS-LABEL: @Simple(
 ; ALWAYS-NEXT:  entry:
 ; ALWAYS-NEXT:    [[ASAN_LOCAL_STACK_BASE:%.*]] = alloca i64, align 8
 ; ALWAYS-NEXT:    [[TMP0:%.*]] = call i64 @__asan_stack_malloc_always_0(i64 64)
+; ALWAYS-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP0]] to ptr
 ; ALWAYS-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 0
 ; ALWAYS-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP4:%.*]]
-; ALWAYS:       2:
+; ALWAYS:       3:
 ; ALWAYS-NEXT:    [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32
-; ALWAYS-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
 ; ALWAYS-NEXT:    br label [[TMP4]]
 ; ALWAYS:       4:
-; ALWAYS-NEXT:    [[TMP5:%.*]] = phi i64 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP3]], [[TMP2]] ]
-; ALWAYS-NEXT:    store i64 [[TMP5]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
-; ALWAYS-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 32
-; ALWAYS-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; ALWAYS-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; ALWAYS-NEXT:    [[TMP8:%.*]] = phi ptr [ [[TMP3]], [[ENTRY:%.*]] ], [ [[MYALLOCA]], [[TMP2]] ]
+; ALWAYS-NEXT:    store ptr [[TMP8]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8
+; ALWAYS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP8]], i64 32
 ; ALWAYS-NEXT:    store i64 1102416563, ptr [[TMP8]], align 8
-; ALWAYS-NEXT:    [[TMP9:%.*]] = add i64 [[TMP5]], 8
-; ALWAYS-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; ALWAYS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i64 8
 ; ALWAYS-NEXT:    store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP10]], align 8
-; ALWAYS-NEXT:    [[TMP11:%.*]] = add i64 [[TMP5]], 16
-; ALWAYS-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; ALWAYS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16
 ; ALWAYS-NEXT:    store i64 ptrtoint (ptr @Simple to i64), ptr [[TMP12]], align 8
+; ALWAYS-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[TMP8]] to i64
 ; ALWAYS-NEXT:    [[TMP13:%.*]] = lshr i64 [[TMP5]], 3
 ; ALWAYS-NEXT:    [[TMP14:%.*]] = add i64 [[TMP13]], 2147450880
 ; ALWAYS-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], 0
@@ -132,22 +121,21 @@ define void @Simple() uwtable sanitize_address {
 ; ALWAYS-NEXT:    store i64 1172321806, ptr [[TMP8]], align 8
 ; ALWAYS-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP0]], 0
 ; ALWAYS-NEXT:    br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP25:%.*]]
-; ALWAYS:       18:
+; ALWAYS:       15:
 ; ALWAYS-NEXT:    [[TMP19:%.*]] = add i64 [[TMP14]], 0
 ; ALWAYS-NEXT:    [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr
 ; ALWAYS-NEXT:    store i64 -723401728380766731, ptr [[TMP20]], align 1
-; ALWAYS-NEXT:    [[TMP21:%.*]] = add i64 [[TMP0]], 56
-; ALWAYS-NEXT:    [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr
+; ALWAYS-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP3]], i64 56
 ; ALWAYS-NEXT:    [[TMP23:%.*]] = load i64, ptr [[TMP22]], align 8
 ; ALWAYS-NEXT:    [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
 ; ALWAYS-NEXT:    store i8 0, ptr [[TMP24]], align 1
 ; ALWAYS-NEXT:    br label [[TMP28:%.*]]
-; ALWAYS:       25:
+; ALWAYS:       21:
 ; ALWAYS-NEXT:    [[TMP26:%.*]] = add i64 [[TMP14]], 0
 ; ALWAYS-NEXT:    [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr
 ; ALWAYS-NEXT:    store i64 0, ptr [[TMP27]], align 1
 ; ALWAYS-NEXT:    br label [[TMP28]]
-; ALWAYS:       28:
+; ALWAYS:       24:
 ; ALWAYS-NEXT:    ret void
 ;
 entry:
@@ -160,17 +148,13 @@ define void @Huge() uwtable sanitize_address {
 ; CHECK-LABEL: @Huge(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MYALLOCA:%.*]] = alloca i8, i64 100288, align 32
-; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], 32
-; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr
-; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP0]] to ptr
-; CHECK-NEXT:    store i64 1102416563, ptr [[TMP3]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP0]], 8
-; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 32
+; CHECK-NEXT:    store i64 1102416563, ptr [[MYALLOCA]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 8
 ; CHECK-NEXT:    store i64 ptrtoint (ptr @___asan_gen_stack.1 to i64), ptr [[TMP5]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP0]], 16
-; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 16
 ; CHECK-NEXT:    store i64 ptrtoint (ptr @Huge to i64), ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
 ; CHECK-NEXT:    [[TMP8:%.*]] = lshr i64 [[TMP0]], 3
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP8]], 2147450880
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 0
@@ -189,7 +173,7 @@ define void @Huge() uwtable sanitize_address {
 ; CHECK-NEXT:    [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
 ; CHECK-NEXT:    store i64 -868082074056920077, ptr [[TMP19]], align 1
 ; CHECK-NEXT:    call void @Foo(ptr [[TMP2]])
-; CHECK-NEXT:    store i64 1172321806, ptr [[TMP3]], align 8
+; CHECK-NEXT:    store i64 1172321806, ptr [[MYALLOCA]], align 8
 ; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[TMP9]], 0
 ; CHECK-NEXT:    [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr
 ; CHECK-NEXT:    store i32 0, ptr [[TMP21]], align 1
diff --git a/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll b/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll
index d1e0180..82e114e 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll
@@ -16,17 +16,13 @@ define void @lifetime() sanitize_address {
 ; CHECK-DEFAULT-NEXT:    [[TMP1:%.*]] = alloca i64, align 32
 ; CHECK-DEFAULT-NEXT:    store i64 0, ptr [[TMP1]], align 8
 ; CHECK-DEFAULT-NEXT:    [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32
-; CHECK-DEFAULT-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
-; CHECK-DEFAULT-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], 32
-; CHECK-DEFAULT-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-DEFAULT-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP2]] to ptr
-; CHECK-DEFAULT-NEXT:    store i64 1102416563, ptr [[TMP5]], align 8
-; CHECK-DEFAULT-NEXT:    [[TMP6:%.*]] = add i64 [[TMP2]], 8
-; CHECK-DEFAULT-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 32
+; CHECK-DEFAULT-NEXT:    store i64 1102416563, ptr [[MYALLOCA]], align 8
+; CHECK-DEFAULT-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 8
 ; CHECK-DEFAULT-NEXT:    store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP7]], align 8
-; CHECK-DEFAULT-NEXT:    [[TMP8:%.*]] = add i64 [[TMP2]], 16
-; CHECK-DEFAULT-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 16
 ; CHECK-DEFAULT-NEXT:    store i64 ptrtoint (ptr @lifetime to i64), ptr [[TMP9]], align 8
+; CHECK-DEFAULT-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
 ; CHECK-DEFAULT-NEXT:    [[TMP10:%.*]] = lshr i64 [[TMP2]], 3
 ; CHECK-DEFAULT-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 2147450880
 ; CHECK-DEFAULT-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 0
@@ -41,16 +37,16 @@ define void @lifetime() sanitize_address {
 ; CHECK-DEFAULT-NEXT:    [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
 ; CHECK-DEFAULT-NEXT:    [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1
 ; CHECK-DEFAULT-NEXT:    [[TMP21:%.*]] = icmp ne i8 [[TMP20]], 0
-; CHECK-DEFAULT-NEXT:    br i1 [[TMP21]], label %[[BB22:.*]], label %[[BB27:.*]], !prof [[PROF1:![0-9]+]]
-; CHECK-DEFAULT:       [[BB22]]:
+; CHECK-DEFAULT-NEXT:    br i1 [[TMP21]], label %[[BB18:.*]], label %[[BB23:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-DEFAULT:       [[BB18]]:
 ; CHECK-DEFAULT-NEXT:    [[TMP23:%.*]] = and i64 [[TMP16]], 7
 ; CHECK-DEFAULT-NEXT:    [[TMP24:%.*]] = trunc i64 [[TMP23]] to i8
 ; CHECK-DEFAULT-NEXT:    [[TMP25:%.*]] = icmp sge i8 [[TMP24]], [[TMP20]]
-; CHECK-DEFAULT-NEXT:    br i1 [[TMP25]], label %[[BB26:.*]], label %[[BB27]]
-; CHECK-DEFAULT:       [[BB26]]:
+; CHECK-DEFAULT-NEXT:    br i1 [[TMP25]], label %[[BB22:.*]], label %[[BB23]]
+; CHECK-DEFAULT:       [[BB22]]:
 ; CHECK-DEFAULT-NEXT:    call void @__asan_report_store1(i64 [[TMP16]]) #[[ATTR4:[0-9]+]]
 ; CHECK-DEFAULT-NEXT:    unreachable
-; CHECK-DEFAULT:       [[BB27]]:
+; CHECK-DEFAULT:       [[BB23]]:
 ; CHECK-DEFAULT-NEXT:    store volatile i8 0, ptr [[TMP4]], align 1
 ; CHECK-DEFAULT-NEXT:    [[TMP28:%.*]] = add i64 [[TMP11]], 4
 ; CHECK-DEFAULT-NEXT:    [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr
@@ -73,16 +69,16 @@ define void @lifetime() sanitize_address {
 ; CHECK-DEFAULT-NEXT:    [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr
 ; CHECK-DEFAULT-NEXT:    [[TMP42:%.*]] = load i8, ptr [[TMP41]], align 1
 ; CHECK-DEFAULT-NEXT:    [[TMP43:%.*]] = icmp ne i8 [[TMP42]], 0
-; CHECK-DEFAULT-NEXT:    br i1 [[TMP43]], label %[[BB44:.*]], label %[[BB49:.*]], !prof [[PROF1]]
-; CHECK-DEFAULT:       [[BB44]]:
+; CHECK-DEFAULT-NEXT:    br i1 [[TMP43]], label %[[BB40:.*]], label %[[BB45:.*]], !prof [[PROF1]]
+; CHECK-DEFAULT:       [[BB40]]:
 ; CHECK-DEFAULT-NEXT:    [[TMP45:%.*]] = and i64 [[TMP38]], 7
 ; CHECK-DEFAULT-NEXT:    [[TMP46:%.*]] = trunc i64 [[TMP45]] to i8
 ; CHECK-DEFAULT-NEXT:    [[TMP47:%.*]] = icmp sge i8 [[TMP46]], [[TMP42]]
-; CHECK-DEFAULT-NEXT:    br i1 [[TMP47]], label %[[BB48:.*]], label %[[BB49]]
-; CHECK-DEFAULT:       [[BB48]]:
+; CHECK-DEFAULT-NEXT:    br i1 [[TMP47]], label %[[BB44:.*]], label %[[BB45]]
+; CHECK-DEFAULT:       [[BB44]]:
 ; CHECK-DEFAULT-NEXT:    call void @__asan_report_store1(i64 [[TMP38]]) #[[ATTR4]]
 ; CHECK-DEFAULT-NEXT:    unreachable
-; CHECK-DEFAULT:       [[BB49]]:
+; CHECK-DEFAULT:       [[BB45]]:
 ; CHECK-DEFAULT-NEXT:    store volatile i8 0, ptr [[TMP36]], align 1
 ; CHECK-DEFAULT-NEXT:    [[TMP50:%.*]] = ptrtoint ptr [[TMP36]] to i64
 ; CHECK-DEFAULT-NEXT:    call void @__asan_poison_stack_memory(i64 [[TMP50]], i64 40)
@@ -95,16 +91,16 @@ define void @lifetime() sanitize_address {
 ; CHECK-DEFAULT-NEXT:    [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr
 ; CHECK-DEFAULT-NEXT:    [[TMP57:%.*]] = load i8, ptr [[TMP56]], align 1
 ; CHECK-DEFAULT-NEXT:    [[TMP58:%.*]] = icmp ne i8 [[TMP57]], 0
-; CHECK-DEFAULT-NEXT:    br i1 [[TMP58]], label %[[BB59:.*]], label %[[BB64:.*]], !prof [[PROF1]]
-; CHECK-DEFAULT:       [[BB59]]:
+; CHECK-DEFAULT-NEXT:    br i1 [[TMP58]], label %[[BB55:.*]], label %[[BB60:.*]], !prof [[PROF1]]
+; CHECK-DEFAULT:       [[BB55]]:
 ; CHECK-DEFAULT-NEXT:    [[TMP60:%.*]] = and i64 [[TMP53]], 7
 ; CHECK-DEFAULT-NEXT:    [[TMP61:%.*]] = trunc i64 [[TMP60]] to i8
 ; CHECK-DEFAULT-NEXT:    [[TMP62:%.*]] = icmp sge i8 [[TMP61]], [[TMP57]]
-; CHECK-DEFAULT-NEXT:    br i1 [[TMP62]], label %[[BB63:.*]], label %[[BB64]]
-; CHECK-DEFAULT:       [[BB63]]:
+; CHECK-DEFAULT-NEXT:    br i1 [[TMP62]], label %[[BB59:.*]], label %[[BB60]]
+; CHECK-DEFAULT:       [[BB59]]:
 ; CHECK-DEFAULT-NEXT:    call void @__asan_report_store1(i64 [[TMP53]]) #[[ATTR4]]
 ; CHECK-DEFAULT-NEXT:    unreachable
-; CHECK-DEFAULT:       [[BB64]]:
+; CHECK-DEFAULT:       [[BB60]]:
 ; CHECK-DEFAULT-NEXT:    store volatile i8 0, ptr [[TMP4]], align 1
 ; CHECK-DEFAULT-NEXT:    [[TMP65:%.*]] = add i64 [[TMP11]], 4
 ; CHECK-DEFAULT-NEXT:    [[TMP66:%.*]] = inttoptr i64 [[TMP65]] to ptr
@@ -112,7 +108,7 @@ define void @lifetime() sanitize_address {
 ; CHECK-DEFAULT-NEXT:    [[TMP67:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; CHECK-DEFAULT-NEXT:    [[TMP68:%.*]] = load i64, ptr [[TMP1]], align 8
 ; CHECK-DEFAULT-NEXT:    call void @__asan_allocas_unpoison(i64 [[TMP68]], i64 [[TMP67]])
-; CHECK-DEFAULT-NEXT:    store i64 1172321806, ptr [[TMP5]], align 8
+; CHECK-DEFAULT-NEXT:    store i64 1172321806, ptr [[MYALLOCA]], align 8
 ; CHECK-DEFAULT-NEXT:    [[TMP69:%.*]] = add i64 [[TMP11]], 0
 ; CHECK-DEFAULT-NEXT:    [[TMP70:%.*]] = inttoptr i64 [[TMP69]] to ptr
 ; CHECK-DEFAULT-NEXT:    store i64 0, ptr [[TMP70]], align 1
@@ -121,17 +117,13 @@ define void @lifetime() sanitize_address {
 ; CHECK-NO-DYNAMIC-LABEL: define void @lifetime(
 ; CHECK-NO-DYNAMIC-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NO-DYNAMIC-NEXT:    [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32
-; CHECK-NO-DYNAMIC-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
-; CHECK-NO-DYNAMIC-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 32
-; CHECK-NO-DYNAMIC-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
-; CHECK-NO-DYNAMIC-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP1]] to ptr
-; CHECK-NO-DYNAMIC-NEXT:    store i64 1102416563, ptr [[TMP4]], align 8
-; CHECK-NO-DYNAMIC-NEXT:    [[TMP5:%.*]] = add i64 [[TMP1]], 8
-; CHECK-NO-DYNAMIC-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NO-DYNAMIC-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 32
+; CHECK-NO-DYNAMIC-NEXT:    store i64 1102416563, ptr [[MYALLOCA]], align 8
+; CHECK-NO-DYNAMIC-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 8
 ; CHECK-NO-DYNAMIC-NEXT:    store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP6]], align 8
-; CHECK-NO-DYNAMIC-NEXT:    [[TMP7:%.*]] = add i64 [[TMP1]], 16
-; CHECK-NO-DYNAMIC-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NO-DYNAMIC-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 16
 ; CHECK-NO-DYNAMIC-NEXT:    store i64 ptrtoint (ptr @lifetime to i64), ptr [[TMP8]], align 8
+; CHECK-NO-DYNAMIC-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP1]], 3
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 2147450880
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
@@ -146,16 +138,16 @@ define void @lifetime() sanitize_address {
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP20:%.*]] = icmp ne i8 [[TMP19]], 0
-; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP20]], label %[[BB21:.*]], label %[[BB26:.*]], !prof [[PROF1:![0-9]+]]
-; CHECK-NO-DYNAMIC:       [[BB21]]:
+; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP20]], label %[[BB17:.*]], label %[[BB22:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NO-DYNAMIC:       [[BB17]]:
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP22:%.*]] = and i64 [[TMP15]], 7
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP23:%.*]] = trunc i64 [[TMP22]] to i8
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP24:%.*]] = icmp sge i8 [[TMP23]], [[TMP19]]
-; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP24]], label %[[BB25:.*]], label %[[BB26]]
-; CHECK-NO-DYNAMIC:       [[BB25]]:
+; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP24]], label %[[BB21:.*]], label %[[BB22]]
+; CHECK-NO-DYNAMIC:       [[BB21]]:
 ; CHECK-NO-DYNAMIC-NEXT:    call void @__asan_report_store1(i64 [[TMP15]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NO-DYNAMIC-NEXT:    unreachable
-; CHECK-NO-DYNAMIC:       [[BB26]]:
+; CHECK-NO-DYNAMIC:       [[BB22]]:
 ; CHECK-NO-DYNAMIC-NEXT:    store volatile i8 0, ptr [[TMP3]], align 1
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP27:%.*]] = add i64 [[TMP10]], 4
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP28:%.*]] = inttoptr i64 [[TMP27]] to ptr
@@ -171,16 +163,16 @@ define void @lifetime() sanitize_address {
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP36:%.*]] = icmp ne i8 [[TMP35]], 0
-; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP36]], label %[[BB37:.*]], label %[[BB42:.*]], !prof [[PROF1]]
-; CHECK-NO-DYNAMIC:       [[BB37]]:
+; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP36]], label %[[BB33:.*]], label %[[BB38:.*]], !prof [[PROF1]]
+; CHECK-NO-DYNAMIC:       [[BB33]]:
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP38:%.*]] = and i64 [[TMP31]], 7
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP39:%.*]] = trunc i64 [[TMP38]] to i8
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP40:%.*]] = icmp sge i8 [[TMP39]], [[TMP35]]
-; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42]]
-; CHECK-NO-DYNAMIC:       [[BB41]]:
+; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP40]], label %[[BB37:.*]], label %[[BB38]]
+; CHECK-NO-DYNAMIC:       [[BB37]]:
 ; CHECK-NO-DYNAMIC-NEXT:    call void @__asan_report_store1(i64 [[TMP31]]) #[[ATTR4]]
 ; CHECK-NO-DYNAMIC-NEXT:    unreachable
-; CHECK-NO-DYNAMIC:       [[BB42]]:
+; CHECK-NO-DYNAMIC:       [[BB38]]:
 ; CHECK-NO-DYNAMIC-NEXT:    store volatile i8 0, ptr [[ARR]], align 1
 ; CHECK-NO-DYNAMIC-NEXT:    call void @llvm.lifetime.end.p0(ptr [[ARR]])
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP43:%.*]] = add i64 [[TMP10]], 4
@@ -192,21 +184,21 @@ define void @lifetime() sanitize_address {
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP50:%.*]] = icmp ne i8 [[TMP49]], 0
-; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP50]], label %[[BB51:.*]], label %[[BB56:.*]], !prof [[PROF1]]
-; CHECK-NO-DYNAMIC:       [[BB51]]:
+; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP50]], label %[[BB47:.*]], label %[[BB52:.*]], !prof [[PROF1]]
+; CHECK-NO-DYNAMIC:       [[BB47]]:
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP52:%.*]] = and i64 [[TMP45]], 7
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP53:%.*]] = trunc i64 [[TMP52]] to i8
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP54:%.*]] = icmp sge i8 [[TMP53]], [[TMP49]]
-; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP54]], label %[[BB55:.*]], label %[[BB56]]
-; CHECK-NO-DYNAMIC:       [[BB55]]:
+; CHECK-NO-DYNAMIC-NEXT:    br i1 [[TMP54]], label %[[BB51:.*]], label %[[BB52]]
+; CHECK-NO-DYNAMIC:       [[BB51]]:
 ; CHECK-NO-DYNAMIC-NEXT:    call void @__asan_report_store1(i64 [[TMP45]]) #[[ATTR4]]
 ; CHECK-NO-DYNAMIC-NEXT:    unreachable
-; CHECK-NO-DYNAMIC:       [[BB56]]:
+; CHECK-NO-DYNAMIC:       [[BB52]]:
 ; CHECK-NO-DYNAMIC-NEXT:    store volatile i8 0, ptr [[TMP3]], align 1
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP57:%.*]] = add i64 [[TMP10]], 4
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP58:%.*]] = inttoptr i64 [[TMP57]] to ptr
 ; CHECK-NO-DYNAMIC-NEXT:    store i8 -8, ptr [[TMP58]], align 1
-; CHECK-NO-DYNAMIC-NEXT:    store i64 1172321806, ptr [[TMP4]], align 8
+; CHECK-NO-DYNAMIC-NEXT:    store i64 1172321806, ptr [[MYALLOCA]], align 8
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP59:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NO-DYNAMIC-NEXT:    [[TMP60:%.*]] = inttoptr i64 [[TMP59]] to ptr
 ; CHECK-NO-DYNAMIC-NEXT:    store i64 0, ptr [[TMP60]], align 1
diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll
index afa46e44..8b0940d 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll
@@ -16,9 +16,8 @@ entry:
   call void @llvm.dbg.declare(metadata ptr %i.addr, metadata !12, metadata !DIExpression()), !dbg !13
 
   ; CHECK: %asan_local_stack_base = alloca i64
-  ; CHECK: %[[ALLOCA:.*]] = ptrtoint ptr %MyAlloca to i64
-  ; CHECK: %[[PHI:.*]] = phi i64 {{.*}} %[[ALLOCA]],
-  ; CHECK: store i64 %[[PHI]], ptr %asan_local_stack_base
+  ; CHECK: %[[PHI:.*]] = phi ptr {{.*}} %MyAlloca
+  ; CHECK: store ptr %[[PHI]], ptr %asan_local_stack_base
 ; CHECK: #dbg_declare(ptr %asan_local_stack_base, [[VAR_I:![0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 32), [[LOC_I:![0-9]+]]
   %0 = load i32, ptr %i.addr, align 4, !dbg !14
   %add = add nsw i32 %0, 2, !dbg !15
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll
index d85f217..7e14987 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll
@@ -19,8 +19,7 @@ entry:
 ; CHECK-LABEL: foo
 ; CHECK: call i64 @__asan_stack_malloc
 ; CHECK: alloca i8, i64 {{.*}} align 64
-; CHECK: [[copyPtr:%[^ \t]+]] = inttoptr i64 %{{[^ \t]+}} to ptr
-; CHECK: call void @llvm.memcpy{{[^%]+}}[[copyPtr]]{{[^%]+}} align 64 %a,{{[^,]+}},
+; CHECK: call void @llvm.memcpy{{[^%]+}}[[copyPtr:%[0-9]+]]{{[^%]+}} align 64 %a,{{[^,]+}},
 ; CHECK: call i32 @bar(ptr [[copyPtr]])
 ; CHECK: ret void
 
@@ -38,8 +37,7 @@ entry:
 ; CHECK-LABEL: baz
 ; CHECK: call i64 @__asan_stack_malloc
 ; CHECK: alloca i8, i64 {{.*}} align 32
-; CHECK: [[copyPtr:%[^ \t]+]] = inttoptr i64 %{{[^ \t]+}} to ptr
-; CHECK: call void @llvm.memcpy{{[^%]+}}[[copyPtr]]{{[^%]+}} align 4 %0,{{[^,]+}}
+; CHECK: call void @llvm.memcpy{{[^%]+}}[[copyPtr:%[0-9]+]]{{[^%]+}} align 4 %0,{{[^,]+}}
 ; CHECK: call i32 @bar(ptr [[copyPtr]])
 ; CHECK: ret void
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
index d56cd34..c8478c8 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
@@ -19,15 +19,16 @@ entry:
 
 ; CHECK-RUNTIME: [[FAKE_STACK_BB:^[0-9]+]]:
 ; CHECK-RUNTIME: [[FAKE_STACK:%[0-9]+]] = phi i64 [ 0, %entry ], [ [[FAKE_STACK_RT]], %[[UAR_ENABLED_BB]] ]
+; CHECK-RUNTIME: [[FAKE_STACK_PTR:%[0-9]+]] = inttoptr i64 [[FAKE_STACK]] to ptr
+; CHECK-ALWAYS: [[FAKE_STACK_PTR:%[0-9]+]] = inttoptr i64 [[FAKE_STACK_RT]] to ptr
 ; CHECK-RUNTIME: icmp eq i64 [[FAKE_STACK]], 0
 ; CHECK-ALWAYS: icmp eq i64 [[FAKE_STACK_RT]], 0
 
 ; CHECK: [[NO_FAKE_STACK_BB:^[0-9]+]]:
 ; CHECK: %MyAlloca = alloca i8, i64
-; CHECK: [[ALLOCA:%[0-9]+]] = ptrtoint ptr %MyAlloca
 
-; CHECK-RUNTIME: phi i64 [ [[FAKE_STACK]], %[[FAKE_STACK_BB]] ], [ [[ALLOCA]], %[[NO_FAKE_STACK_BB]] ]
-; CHECK-ALWAYS: phi i64 [ [[FAKE_STACK_RT]], %entry ], [ [[ALLOCA]], %[[NO_FAKE_STACK_BB]] ]
+; CHECK-RUNTIME: phi ptr [ [[FAKE_STACK_PTR]], %[[FAKE_STACK_BB]] ], [ %MyAlloca, %[[NO_FAKE_STACK_BB]] ]
+; CHECK-ALWAYS: phi ptr [ [[FAKE_STACK_PTR]], %entry ], [ %MyAlloca, %[[NO_FAKE_STACK_BB]] ]
 
 ; CHECK: ret void
 
diff --git a/llvm/test/MC/AArch64/data-directive-specifier.s b/llvm/test/MC/AArch64/data-directive-specifier.s
index 2cb7eb3..2d1ec4f 100644
--- a/llvm/test/MC/AArch64/data-directive-specifier.s
+++ b/llvm/test/MC/AArch64/data-directive-specifier.s
@@ -12,6 +12,7 @@ l:
 # CHECK-NEXT:   0x8 R_AARCH64_PLT32 extern 0x4
 # CHECK-NEXT:   0xC R_AARCH64_PLT32 g 0x8
 # CHECK-NEXT:   0x10 R_AARCH64_PLT32 g 0x18
+# CHECK-NEXT:   0x14 R_AARCH64_FUNCINIT64 .text 0x0
 # CHECK-NEXT: }
 .data
 .word l@plt - .
@@ -21,6 +22,8 @@ l:
 .word g@plt - . + 8
 .word g@plt - .data + 8
 
+.quad l@funcinit
+
 # CHECK:      Section ({{.*}}) .rela.data1 {
 # CHECK-NEXT:   0x0 R_AARCH64_GOTPCREL32 data1 0x0
 # CHECK-NEXT:   0x4 R_AARCH64_GOTPCREL32 extern 0x4
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll
index fdab67a..afc98ce 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=dfa-jump-threading,sccp,simplifycfg %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading,sccp,simplifycfg -verify-dom-info=1 %s | FileCheck %s
 
 ; This test checks that a constant propagation is applied for a basic loop.
 ; Related to bug 44679.
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
index f45798b..5076517 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
-; RUN: opt -S -passes=dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s
-; RUN: opt -S -passes=dfa-jump-threading -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE
 
 ; This test checks that the analysis identifies all threadable paths in a
 ; simple CFG. A threadable path includes a list of basic blocks, the exit
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
index 092c854..426b51e 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
-; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s
 
 ; These tests check that the DFA jump threading transformation is applied
 ; properly to two CFGs. It checks that blocks are cloned, branches are updated,
@@ -445,9 +445,67 @@ bb2:                                              ; preds = %select.unfold
   unreachable
 }
 
+
+define i16 @DTU_update_crash() {
+; CHECK-LABEL: @DTU_update_crash(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY_SELECTBLOCK:%.*]]
+; CHECK:       for.body.selectblock:
+; CHECK-NEXT:    br i1 false, label [[SWITCHBLOCK_JT0:%.*]], label [[SEL_SI_UNFOLD_FALSE_JT0:%.*]]
+; CHECK:       sel.si.unfold.false:
+; CHECK-NEXT:    br label [[SWITCHBLOCK:%.*]]
+; CHECK:       sel.si.unfold.false.jt0:
+; CHECK-NEXT:    [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ]
+; CHECK-NEXT:    br label [[SWITCHBLOCK_JT0]]
+; CHECK:       switchblock:
+; CHECK-NEXT:    [[SWITCHBLOCK_PHI:%.*]] = phi i32 [ poison, [[SEL_SI_UNFOLD_FALSE:%.*]] ]
+; CHECK-NEXT:    [[P_24_ADDR_3:%.*]] = phi i32 [ 0, [[SEL_SI_UNFOLD_FALSE]] ]
+; CHECK-NEXT:    switch i32 [[SWITCHBLOCK_PHI]], label [[CLEANUP:%.*]] [
+; CHECK-NEXT:      i32 0, label [[FOR_INC:%.*]]
+; CHECK-NEXT:      i32 1, label [[CLEANUP]]
+; CHECK-NEXT:      i32 5, label [[FOR_BODY_SELECTBLOCK]]
+; CHECK-NEXT:    ]
+; CHECK:       switchblock.jt0:
+; CHECK-NEXT:    [[SWITCHBLOCK_PHI_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SEL_SI_UNFOLD_FALSE_JT0]] ]
+; CHECK-NEXT:    [[P_24_ADDR_3_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ], [ 0, [[SEL_SI_UNFOLD_FALSE_JT0]] ]
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    br i1 false, label [[FOR_BODY_SELECTBLOCK]], label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    call void (...) @llvm.fake.use(i32 [[P_24_ADDR_3_JT0]])
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  br label %for.body.selectblock
+
+for.body.selectblock:                                        ; preds = %for.inc, %switchblock, %entry
+  %sel = select i1 false, i32 0, i32 0
+  br label %switchblock
+
+switchblock:                                       ; preds = %for.body.selectblock
+  %switchblock.phi = phi i32 [ %sel, %for.body.selectblock ]
+  %p_24.addr.3 = phi i32 [ 0, %for.body.selectblock ]
+  switch i32 %switchblock.phi, label %cleanup [
+  i32 0, label %for.inc
+  i32 1, label %cleanup
+  i32 5, label %for.body.selectblock
+  ]
+
+for.inc:                                       ; preds = %switchblock
+  br i1 false, label %for.body.selectblock, label %cleanup
+
+cleanup:                                       ; preds = %for.inc, %switchblock, %switchblock
+  call void (...) @llvm.fake.use(i32 %p_24.addr.3)
+  ret i16 0
+}
+
+declare void @llvm.fake.use(...)
+
 !0 = !{!"function_entry_count", i32 10}
 !1 = !{!"branch_weights", i32 3, i32 5}
 ;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+;.
 ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
 ; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
 ;.
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
index de38752..95d3ffa 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false -verify-dom-info=1 %s | FileCheck %s
 
 ; These tests check if selects are unfolded properly for jump threading
 ; opportunities. There are three different patterns to consider:
diff --git a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll
index 4555dfb..71a469d 100644
--- a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1  %s | FileCheck %s
 
 declare void @do_something()
 declare void @user(i32)
diff --git a/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll b/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll
index 00500a7..cc117e7 100644
--- a/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s
 
 define void @pr60254() {
 ; CHECK-LABEL: define void @pr60254() {
diff --git a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll
index 89b132e..9371fe2 100644
--- a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll
@@ -18,7 +18,7 @@ define i64 @test_ptr_compare_guard(ptr %start, ptr %end) {
 ; CHECK-NEXT:    br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[EXIT_LOOPEXIT:.*]]
 ; CHECK:       [[LOOP_LATCH]]:
 ; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1
-; CHECK-NEXT:    [[I64_IV_NEXT]] = add i64 [[I64_IV]], 1
+; CHECK-NEXT:    [[I64_IV_NEXT]] = add nuw i64 [[I64_IV]], 1
 ; CHECK-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
 ; CHECK-NEXT:    br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]]
 ; CHECK:       [[EXIT_LOOPEXIT]]:
diff --git a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll
index ba34930..bc988a9 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 define i1 @a_true_implies_b_true(i8 %z, i1 %X, i1 %Y) {
@@ -34,15 +34,15 @@ define <2 x i1> @a_true_implies_b_true_vec(i8 %z0, <2 x i1> %X, <2 x i1> %Y) {
   ret <2 x i1> %res
 }
 
-define i1 @a_true_implies_b_true2(i8 %z, i1 %X, i1 %Y) {
+define i1 @a_true_implies_b_true2(i8 %z, i1 %X, i1 %Y) !prof !0 {
 ; CHECK-LABEL: @a_true_implies_b_true2(
 ; CHECK-NEXT:    [[A:%.*]] = icmp ugt i8 [[Z:%.*]], 20
-; CHECK-NEXT:    [[RES:%.*]] = select i1 [[A]], i1 [[X:%.*]], i1 false
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[A]], i1 [[X:%.*]], i1 false, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
   %a = icmp ugt i8 %z, 20
   %b = icmp ugt i8 %z, 10
-  %sel = select i1 %b, i1 %X, i1 %Y
+  %sel = select i1 %b, i1 %X, i1 %Y, !prof !1
   %res = and i1 %a, %sel
   ret i1 %res
 }
@@ -258,3 +258,10 @@ define i1 @neg_icmp_eq_implies_trunc(i8 %x, i1 %c) {
   %sel2 = select i1 %cmp, i1 true, i1 %sel1
   ret i1 %sel2
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll
index 5e7c636..a3483af 100644
--- a/llvm/test/Transforms/InstSimplify/ptrmask.ll
+++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll
@@ -158,6 +158,26 @@ define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32(ptr addrspace(1) %p) {
   ret ptr addrspace(1) %r
 }
 
+define ptr @ptrmask_simplify_ptrtoaddr(ptr %p) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_ptrtoaddr
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    ret ptr [[P]]
+;
+  %m = ptrtoaddr ptr %p to i64
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m)
+  ret ptr %r
+}
+
+define ptr addrspace(1) @ptrmask_simplify_ptrtoaddr_i32(ptr addrspace(1) %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_ptrtoaddr_i32
+; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:    ret ptr addrspace(1) [[P]]
+;
+  %m = ptrtoaddr ptr addrspace(1) %p to i32
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m)
+  ret ptr addrspace(1) %r
+}
+
 define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) {
 ; CHECK-LABEL: define ptr @ptrmask_simplify_aligned_unused
 ; CHECK-SAME: (ptr align 64 [[P:%.*]]) {
diff --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
index fa55eab..bc63f79 100644
--- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
@@ -465,8 +465,7 @@ define void @peel_int_eq_condition(i32 %start) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[C_0:%.*]] = icmp eq i32 [[IV]], [[START]]
-; CHECK-NEXT:    br i1 [[C_0]], label [[IF_THEN:%.*]], label [[LOOP_LATCH]]
+; CHECK-NEXT:    br i1 false, label [[IF_THEN:%.*]], label [[LOOP_LATCH]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    call void @fn(i32 [[IV]])
 ; CHECK-NEXT:    br label [[LOOP_LATCH]]
diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll
index 616617b..5d5a610 100644
--- a/llvm/test/Transforms/SROA/phi-and-select.ll
+++ b/llvm/test/Transforms/SROA/phi-and-select.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
 ; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
@@ -36,11 +36,11 @@ exit:
   ret i32 %result
 }
 
-define i32 @test2() {
+define i32 @test2() !prof !0 {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND:%.*]] = icmp sle i32 0, 1
-; CHECK-NEXT:    [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], i32 1, i32 0
+; CHECK-NEXT:    [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], i32 1, i32 0, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[RESULT_SROA_SPECULATED]]
 ;
 entry:
@@ -53,7 +53,7 @@ entry:
   %v1 = load i32, ptr %a1
 
   %cond = icmp sle i32 %v0, %v1
-  %select = select i1 %cond, ptr %a1, ptr %a
+  %select = select i1 %cond, ptr %a1, ptr %a, !prof !1
 
   %result = load i32, ptr %select
   ret i32 %result
@@ -870,3 +870,17 @@ define i8 @volatile_select(ptr %p, i1 %b) {
   %v2 = load i8, ptr %px
   ret i8 %v2
 }
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 3, i32 5}
+;.
+; CHECK-PRESERVE-CFG: attributes #[[ATTR0:[0-9]+]] = { sanitize_address }
+;.
+; CHECK-MODIFY-CFG: attributes #[[ATTR0:[0-9]+]] = { sanitize_address }
+;.
+; CHECK-PRESERVE-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK-PRESERVE-CFG: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
+;.
+; CHECK-MODIFY-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK-MODIFY-CFG: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
+;.
diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll
index 776624c..45c3bbd 100644
--- a/llvm/test/Transforms/SROA/phi-gep.ll
+++ b/llvm/test/Transforms/SROA/phi-gep.ll
@@ -1,9 +1,12 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -passes='sroa<preserve-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
 ; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
 
 %pair = type { i32, i32 }
 
+;.
+; CHECK: @g = global %pair zeroinitializer, align 4
+;.
 define i32 @test_sroa_phi_gep(i1 %cond) {
 ; CHECK-LABEL: @test_sroa_phi_gep(
 ; CHECK-NEXT:  entry:
@@ -334,18 +337,18 @@ exit:
   unreachable
 }
 
-define void @test_sroa_gep_phi_select_same_block(i1 %c1, i1 %c2, ptr %ptr) {
+define void @test_sroa_gep_phi_select_same_block(i1 %c1, i1 %c2, ptr %ptr) !prof !0 {
 ; CHECK-LABEL: @test_sroa_gep_phi_select_same_block(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [[PAIR:%.*]], align 8
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[SELECT]] = select i1 [[C1:%.*]], ptr [[PHI]], ptr [[PTR:%.*]]
+; CHECK-NEXT:    [[SELECT]] = select i1 [[C1:%.*]], ptr [[PHI]], ptr [[PTR:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    [[PHI_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PHI]], i64 1
 ; CHECK-NEXT:    [[PTR_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PTR]], i64 1
-; CHECK-NEXT:    [[SELECT_SROA_SEL:%.*]] = select i1 [[C1]], ptr [[PHI_SROA_GEP]], ptr [[PTR_SROA_GEP]]
-; CHECK-NEXT:    br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[WHILE_BODY]]
+; CHECK-NEXT:    [[SELECT_SROA_SEL:%.*]] = select i1 [[C1]], ptr [[PHI_SROA_GEP]], ptr [[PTR_SROA_GEP]], !prof [[PROF1]]
+; CHECK-NEXT:    br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[WHILE_BODY]], !prof [[PROF2:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
@@ -355,9 +358,9 @@ entry:
 
 while.body:
   %phi = phi ptr [ %alloca, %entry ], [ %select, %while.body ]
-  %select = select i1 %c1, ptr %phi, ptr %ptr
+  %select = select i1 %c1, ptr %phi, ptr %ptr, !prof !1
   %gep = getelementptr inbounds %pair, ptr %select, i64 1
-  br i1 %c2, label %exit, label %while.body
+  br i1 %c2, label %exit, label %while.body, !prof !2
 
 exit:
   ret void
@@ -747,6 +750,18 @@ declare ptr @foo()
 declare i32 @__gxx_personality_v0(...)
 
 declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 3, i32 5}
+!2 = !{!"branch_weights", i32 7, i32 11}
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 7, i32 11}
+;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK-MODIFY-CFG: {{.*}}
 ; CHECK-PRESERVE-CFG: {{.*}}
diff --git a/llvm/test/Transforms/SROA/select-gep.ll b/llvm/test/Transforms/SROA/select-gep.ll
index b48b0f7..a701d78 100644
--- a/llvm/test/Transforms/SROA/select-gep.ll
+++ b/llvm/test/Transforms/SROA/select-gep.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -passes='sroa<preserve-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
 ; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
 
@@ -203,10 +203,10 @@ define i32 @test_select_idx_mem2reg(i1 %c) {
 
 ; Test gep with a select-like zext index unfolding on an alloca that is
 ; splittable and promotable.
-define i64 @test_select_like_zext_idx_mem2reg(i1 %c) {
+define i64 @test_select_like_zext_idx_mem2reg(i1 %c) !prof !0 {
 ; CHECK-LABEL: @test_select_like_zext_idx_mem2reg(
 ; CHECK-NEXT:    [[IDX:%.*]] = zext i1 [[C:%.*]] to i64
-; CHECK-NEXT:    [[RES:%.*]] = select i1 [[C]], i64 2, i64 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[C]], i64 2, i64 1, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    ret i64 [[RES]]
 ;
   %alloca = alloca [2 x i64], align 8
@@ -352,3 +352,16 @@ define i32 @test_select_idx_not_constant3(i1 %c, ptr %p, i64 %arg) {
   %res = load i32, ptr %gep, align 4
   ret i32 %res
 }
+
+!0 = !{!"function_entry_count", i32 10}
+;.
+; CHECK-PRESERVE-CFG: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+;.
+; CHECK-MODIFY-CFG: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+;.
+; CHECK-PRESERVE-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK-PRESERVE-CFG: [[PROF1]] = !{!"unknown", !"sroa"}
+;.
+; CHECK-MODIFY-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK-MODIFY-CFG: [[PROF1]] = !{!"unknown", !"sroa"}
+;.
diff --git a/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml
new file mode 100644
index 0000000..e748eecf
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml
@@ -0,0 +1,27 @@
+# RUN: yaml2obj %s -o %t.dxbc
+# RUN: not llvm-objcopy --dump-section=FKE0=%t.fek0 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-ZEROSIZE -DFILE=%t.fek0
+# RUN: not llvm-objcopy --dump-section=FKE3=%t.fek1 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING -DFILE=%t.fek1
+# RUN: not llvm-objcopy --dump-section=FKE2=%t/does_not_exist/.fek2 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-BAD-PATH -DFILE=%t/does_not_exist/.fek2 -DMSG=%errc_ENOENT
+
+# CHECK-ZEROSIZE: error: '[[FILE]]': part 'FKE0' is empty
+# CHECK-MISSING: error: '[[FILE]]': part 'FKE3' not found
+# CHECK-BAD-PATH: error: '[[FILE]]': [[MSG]]
+
+--- !dxcontainer
+Header:
+  Hash:        [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ]
+  Version:
+    Major:     1
+    Minor:     0
+  FileSize:    108
+  PartCount:   3
+  PartOffsets: [ 60, 68, 76 ]
+Parts:
+  - Name: FKE0
+    Size: 0
+  - Name: FKE1
+    Size: 0
+  - Name: FKE2
+    Size: 8
+...
diff --git a/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml
new file mode 100644
index 0000000..7d80a2c
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml
@@ -0,0 +1,278 @@
+# RUN: yaml2obj %s -o %t.dxbc
+# RUN: llvm-objcopy --dump-section=DXIL=%t.bc %t.dxbc
+# RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=BITCODE
+# RUN: wc -c %t.bc | FileCheck %s --check-prefix=DXIL-SIZE
+
+## Verify that when dumping the DXIL part we get a valid bitcode file.
+# BITCODE: define void @main()
+## Verify the size of the bitcode data.
+# DXIL-SIZE: 1708
+
+## Dump the PSV0 part and verify its size.
+# RUN: llvm-objcopy --dump-section=PSV0=%t.psv0 %t.dxbc
+# RUN: wc -c %t.psv0 | FileCheck %s --check-prefix=PSV0-SIZE
+# RUN: od -v -Ax -t x1 %t.psv0 | FileCheck %s --check-prefix=PSV0-CONTENTS
+# PSV0-SIZE: 76
+
+# For a compute shader the structure size is encoded followed by a bunch of 00'd
+# bytes until you get to the unused wave size min and max (0xffff), followed by
+# the shader stage (5 for compute).
+# TODO: Update this test to use objdump or obj2yaml once we support
+# --add-section in objcopy. See issue:
+# https://github.com/llvm/llvm-project/issues/162159.
+# PSV0-CONTENTS: 0000 34 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+# PSV0-CONTENTS: 0010 00 00 00 00 00 00 00 00 ff ff ff ff 05 00 00 00
+
+--- !dxcontainer
+Header:
+  Hash:            [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                     0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ]
+  Version:
+    Major:           1
+    Minor:           0
+  FileSize:        1872
+  PartCount:       2
+  PartOffsets:     [ 40, 1780 ]
+Parts:
+  - Name:            DXIL
+    Size:            1732
+    Program:
+      MajorVersion:    6
+      MinorVersion:    0
+      ShaderKind:      5
+      Size:            433
+      DXILMajorVersion: 1
+      DXILMinorVersion: 0
+      DXILSize:        1708
+      DXIL:            [ 0x42, 0x43, 0xC0, 0xDE, 0x21, 0xC, 0x0, 0x0, 0xA8,
+                         0x1, 0x0, 0x0, 0xB, 0x82, 0x20, 0x0, 0x2, 0x0,
+                         0x0, 0x0, 0x13, 0x0, 0x0, 0x0, 0x7, 0x81, 0x23,
+                         0x91, 0x41, 0xC8, 0x4, 0x49, 0x6, 0x10, 0x32,
+                         0x39, 0x92, 0x1, 0x84, 0xC, 0x25, 0x5, 0x8, 0x19,
+                         0x1E, 0x4, 0x8B, 0x62, 0x80, 0x10, 0x45, 0x2,
+                         0x42, 0x92, 0xB, 0x42, 0x84, 0x10, 0x32, 0x14,
+                         0x38, 0x8, 0x18, 0x4B, 0xA, 0x32, 0x42, 0x88,
+                         0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xA5,
+                         0x0, 0x19, 0x32, 0x42, 0xE4, 0x48, 0xE, 0x90,
+                         0x11, 0x22, 0xC4, 0x50, 0x41, 0x51, 0x81, 0x8C,
+                         0xE1, 0x83, 0xE5, 0x8A, 0x4, 0x21, 0x46, 0x6,
+                         0x51, 0x18, 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x1B,
+                         0x90, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xC0,
+                         0x1, 0x24, 0x80, 0x2, 0x0, 0x0, 0x0, 0x49, 0x18,
+                         0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x13, 0x82, 0x0,
+                         0x0, 0x89, 0x20, 0x0, 0x0, 0x11, 0x0, 0x0, 0x0,
+                         0x32, 0x22, 0x8, 0x9, 0x20, 0x64, 0x85, 0x4, 0x13,
+                         0x22, 0xA4, 0x84, 0x4, 0x13, 0x22, 0xE3, 0x84,
+                         0xA1, 0x90, 0x14, 0x12, 0x4C, 0x88, 0x8C, 0xB,
+                         0x84, 0x84, 0x4C, 0x10, 0x20, 0x73, 0x4, 0x8,
+                         0xC1, 0x65, 0xC3, 0x85, 0x2C, 0xE8, 0x3, 0x40,
+                         0x14, 0x91, 0x4E, 0xD1, 0x4A, 0x48, 0x44, 0x54,
+                         0x11, 0xC3, 0x9, 0x30, 0xC4, 0x18, 0x1, 0x30,
+                         0x2, 0x50, 0x82, 0x21, 0x1A, 0x8, 0x98, 0x23,
+                         0x0, 0x3, 0x0, 0x13, 0x14, 0x72, 0xC0, 0x87, 0x74,
+                         0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x3,
+                         0x72, 0xC0, 0x87, 0xD, 0xAE, 0x50, 0xE, 0x6D,
+                         0xD0, 0xE, 0x7A, 0x50, 0xE, 0x6D, 0x0, 0xF, 0x7A,
+                         0x30, 0x7, 0x72, 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D,
+                         0x90, 0xE, 0x71, 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D,
+                         0x90, 0xE, 0x78, 0xA0, 0x7, 0x78, 0xD0, 0x6, 0xE9,
+                         0x10, 0x7, 0x76, 0xA0, 0x7, 0x71, 0x60, 0x7, 0x6D,
+                         0x90, 0xE, 0x73, 0x20, 0x7, 0x7A, 0x30, 0x7, 0x72,
+                         0xD0, 0x6, 0xE9, 0x60, 0x7, 0x74, 0xA0, 0x7, 0x76,
+                         0x40, 0x7, 0x6D, 0x60, 0xE, 0x71, 0x60, 0x7, 0x7A,
+                         0x10, 0x7, 0x76, 0xD0, 0x6, 0xE6, 0x30, 0x7, 0x72,
+                         0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D, 0x60, 0xE, 0x76,
+                         0x40, 0x7, 0x7A, 0x60, 0x7, 0x74, 0xD0, 0x6, 0xEE,
+                         0x80, 0x7, 0x7A, 0x10, 0x7, 0x76, 0xA0, 0x7, 0x73,
+                         0x20, 0x7, 0x7A, 0x60, 0x7, 0x74, 0x30, 0xE4,
+                         0x21, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x2, 0x0,
+                         0x0, 0x0, 0x20, 0xB, 0x4, 0x7, 0x0, 0x0, 0x0,
+                         0x32, 0x1E, 0x98, 0xC, 0x19, 0x11, 0x4C, 0x90,
+                         0x8C, 0x9, 0x26, 0x47, 0xC6, 0x4, 0x43, 0xBA,
+                         0x12, 0x28, 0x88, 0x62, 0x18, 0x1, 0x28, 0x84,
+                         0x22, 0x0, 0x0, 0x0, 0x79, 0x18, 0x0, 0x0, 0xE5,
+                         0x0, 0x0, 0x0, 0x33, 0x8, 0x80, 0x1C, 0xC4, 0xE1,
+                         0x1C, 0x66, 0x14, 0x1, 0x3D, 0x88, 0x43, 0x38,
+                         0x84, 0xC3, 0x8C, 0x42, 0x80, 0x7, 0x79, 0x78,
+                         0x7, 0x73, 0x98, 0x71, 0xC, 0xE6, 0x0, 0xF, 0xED,
+                         0x10, 0xE, 0xF4, 0x80, 0xE, 0x33, 0xC, 0x42, 0x1E,
+                         0xC2, 0xC1, 0x1D, 0xCE, 0xA1, 0x1C, 0x66, 0x30,
+                         0x5, 0x3D, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1B,
+                         0xCC, 0x3, 0x3D, 0xC8, 0x43, 0x3D, 0x8C, 0x3,
+                         0x3D, 0xCC, 0x78, 0x8C, 0x74, 0x70, 0x7, 0x7B,
+                         0x8, 0x7, 0x79, 0x48, 0x87, 0x70, 0x70, 0x7, 0x7A,
+                         0x70, 0x3, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87,
+                         0x19, 0xCC, 0x11, 0xE, 0xEC, 0x90, 0xE, 0xE1,
+                         0x30, 0xF, 0x6E, 0x30, 0xF, 0xE3, 0xF0, 0xE, 0xF0,
+                         0x50, 0xE, 0x33, 0x10, 0xC4, 0x1D, 0xDE, 0x21,
+                         0x1C, 0xD8, 0x21, 0x1D, 0xC2, 0x61, 0x1E, 0x66,
+                         0x30, 0x89, 0x3B, 0xBC, 0x83, 0x3B, 0xD0, 0x43,
+                         0x39, 0xB4, 0x3, 0x3C, 0xBC, 0x83, 0x3C, 0x84,
+                         0x3, 0x3B, 0xCC, 0xF0, 0x14, 0x76, 0x60, 0x7,
+                         0x7B, 0x68, 0x7, 0x37, 0x68, 0x87, 0x72, 0x68,
+                         0x7, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70,
+                         0x60, 0x7, 0x76, 0x28, 0x7, 0x76, 0xF8, 0x5, 0x76,
+                         0x78, 0x87, 0x77, 0x80, 0x87, 0x5F, 0x8, 0x87,
+                         0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98,
+                         0x81, 0x2C, 0xEE, 0xF0, 0xE, 0xEE, 0xE0, 0xE,
+                         0xF5, 0xC0, 0xE, 0xEC, 0x30, 0x3, 0x62, 0xC8,
+                         0xA1, 0x1C, 0xE4, 0xA1, 0x1C, 0xCC, 0xA1, 0x1C,
+                         0xE4, 0xA1, 0x1C, 0xDC, 0x61, 0x1C, 0xCA, 0x21,
+                         0x1C, 0xC4, 0x81, 0x1D, 0xCA, 0x61, 0x6, 0xD6,
+                         0x90, 0x43, 0x39, 0xC8, 0x43, 0x39, 0x98, 0x43,
+                         0x39, 0xC8, 0x43, 0x39, 0xB8, 0xC3, 0x38, 0x94,
+                         0x43, 0x38, 0x88, 0x3, 0x3B, 0x94, 0xC3, 0x2F,
+                         0xBC, 0x83, 0x3C, 0xFC, 0x82, 0x3B, 0xD4, 0x3,
+                         0x3B, 0xB0, 0xC3, 0xC, 0xC7, 0x69, 0x87, 0x70,
+                         0x58, 0x87, 0x72, 0x70, 0x83, 0x74, 0x68, 0x7,
+                         0x78, 0x60, 0x87, 0x74, 0x18, 0x87, 0x74, 0xA0,
+                         0x87, 0x19, 0xCE, 0x53, 0xF, 0xEE, 0x0, 0xF, 0xF2,
+                         0x50, 0xE, 0xE4, 0x90, 0xE, 0xE3, 0x40, 0xF, 0xE1,
+                         0x20, 0xE, 0xEC, 0x50, 0xE, 0x33, 0x20, 0x28,
+                         0x1D, 0xDC, 0xC1, 0x1E, 0xC2, 0x41, 0x1E, 0xD2,
+                         0x21, 0x1C, 0xDC, 0x81, 0x1E, 0xDC, 0xE0, 0x1C,
+                         0xE4, 0xE1, 0x1D, 0xEA, 0x1, 0x1E, 0x66, 0x18,
+                         0x51, 0x38, 0xB0, 0x43, 0x3A, 0x9C, 0x83, 0x3B,
+                         0xCC, 0x50, 0x24, 0x76, 0x60, 0x7, 0x7B, 0x68,
+                         0x7, 0x37, 0x60, 0x87, 0x77, 0x78, 0x7, 0x78,
+                         0x98, 0x51, 0x4C, 0xF4, 0x90, 0xF, 0xF0, 0x50,
+                         0xE, 0x33, 0x1E, 0x6A, 0x1E, 0xCA, 0x61, 0x1C,
+                         0xE8, 0x21, 0x1D, 0xDE, 0xC1, 0x1D, 0x7E, 0x1,
+                         0x1E, 0xE4, 0xA1, 0x1C, 0xCC, 0x21, 0x1D, 0xF0,
+                         0x61, 0x6, 0x54, 0x85, 0x83, 0x38, 0xCC, 0xC3,
+                         0x3B, 0xB0, 0x43, 0x3D, 0xD0, 0x43, 0x39, 0xFC,
+                         0xC2, 0x3C, 0xE4, 0x43, 0x3B, 0x88, 0xC3, 0x3B,
+                         0xB0, 0xC3, 0x8C, 0xC5, 0xA, 0x87, 0x79, 0x98,
+                         0x87, 0x77, 0x18, 0x87, 0x74, 0x8, 0x7, 0x7A,
+                         0x28, 0x7, 0x72, 0x98, 0x81, 0x5C, 0xE3, 0x10,
+                         0xE, 0xEC, 0xC0, 0xE, 0xE5, 0x50, 0xE, 0xF3, 0x30,
+                         0x23, 0xC1, 0xD2, 0x41, 0x1E, 0xE4, 0xE1, 0x17,
+                         0xD8, 0xE1, 0x1D, 0xDE, 0x1, 0x1E, 0x66, 0x48,
+                         0x19, 0x3B, 0xB0, 0x83, 0x3D, 0xB4, 0x83, 0x1B,
+                         0x84, 0xC3, 0x38, 0x8C, 0x43, 0x39, 0xCC, 0xC3,
+                         0x3C, 0xB8, 0xC1, 0x39, 0xC8, 0xC3, 0x3B, 0xD4,
+                         0x3, 0x3C, 0xCC, 0x48, 0xB4, 0x71, 0x8, 0x7, 0x76,
+                         0x60, 0x7, 0x71, 0x8, 0x87, 0x71, 0x58, 0x87,
+                         0x19, 0xDB, 0xC6, 0xE, 0xEC, 0x60, 0xF, 0xED,
+                         0xE0, 0x6, 0xF0, 0x20, 0xF, 0xE5, 0x30, 0xF, 0xE5,
+                         0x20, 0xF, 0xF6, 0x50, 0xE, 0x6E, 0x10, 0xE, 0xE3,
+                         0x30, 0xE, 0xE5, 0x30, 0xF, 0xF3, 0xE0, 0x6, 0xE9,
+                         0xE0, 0xE, 0xE4, 0x50, 0xE, 0xF8, 0x30, 0x23,
+                         0xE2, 0xEC, 0x61, 0x1C, 0xC2, 0x81, 0x1D, 0xD8,
+                         0xE1, 0x17, 0xEC, 0x21, 0x1D, 0xE6, 0x21, 0x1D,
+                         0xC4, 0x21, 0x1D, 0xD8, 0x21, 0x1D, 0xE8, 0x21,
+                         0x1F, 0x66, 0x20, 0x9D, 0x3B, 0xBC, 0x43, 0x3D,
+                         0xB8, 0x3, 0x39, 0x94, 0x83, 0x39, 0xCC, 0x58,
+                         0xBC, 0x70, 0x70, 0x7, 0x77, 0x78, 0x7, 0x7A,
+                         0x8, 0x7, 0x7A, 0x48, 0x87, 0x77, 0x70, 0x87,
+                         0x19, 0xCB, 0xE7, 0xE, 0xEF, 0x30, 0xF, 0xE1,
+                         0xE0, 0xE, 0xE9, 0x40, 0xF, 0xE9, 0xA0, 0xF, 0xE5,
+                         0x30, 0xC3, 0x1, 0x3, 0x73, 0xA8, 0x7, 0x77, 0x18,
+                         0x87, 0x5F, 0x98, 0x87, 0x70, 0x70, 0x87, 0x74,
+                         0xA0, 0x87, 0x74, 0xD0, 0x87, 0x72, 0x98, 0x81,
+                         0x84, 0x41, 0x39, 0xE0, 0xC3, 0x38, 0xB0, 0x43,
+                         0x3D, 0x90, 0x43, 0x39, 0xCC, 0x40, 0xC4, 0xA0,
+                         0x1D, 0xCA, 0xA1, 0x1D, 0xE0, 0x41, 0x1E, 0xDE,
+                         0xC1, 0x1C, 0x66, 0x24, 0x63, 0x30, 0xE, 0xE1,
+                         0xC0, 0xE, 0xEC, 0x30, 0xF, 0xE9, 0x40, 0xF, 0xE5,
+                         0x30, 0x43, 0x21, 0x83, 0x75, 0x18, 0x7, 0x73,
+                         0x48, 0x87, 0x5F, 0xA0, 0x87, 0x7C, 0x80, 0x87,
+                         0x72, 0x98, 0xB1, 0x94, 0x1, 0x3C, 0x8C, 0xC3,
+                         0x3C, 0x94, 0xC3, 0x38, 0xD0, 0x43, 0x3A, 0xBC,
+                         0x83, 0x3B, 0xCC, 0xC3, 0x8C, 0xC5, 0xC, 0x48,
+                         0x21, 0x15, 0x42, 0x61, 0x1E, 0xE6, 0x21, 0x1D,
+                         0xCE, 0xC1, 0x1D, 0x52, 0x81, 0x14, 0x66, 0x4C,
+                         0x67, 0x30, 0xE, 0xEF, 0x20, 0xF, 0xEF, 0xE0,
+                         0x6, 0xEF, 0x50, 0xF, 0xF4, 0x30, 0xF, 0xE9, 0x40,
+                         0xE, 0xE5, 0xE0, 0x6, 0xE6, 0x20, 0xF, 0xE1, 0xD0,
+                         0xE, 0xE5, 0x30, 0xA3, 0x40, 0x83, 0x76, 0x68,
+                         0x7, 0x79, 0x8, 0x87, 0x19, 0x52, 0x1A, 0xB8,
+                         0xC3, 0x3B, 0x84, 0x3, 0x3B, 0xA4, 0x43, 0x38,
+                         0xCC, 0x83, 0x1B, 0x84, 0x3, 0x39, 0x90, 0x83,
+                         0x3C, 0xCC, 0x3, 0x3C, 0x84, 0xC3, 0x38, 0x94,
+                         0xC3, 0xC, 0x46, 0xD, 0xC6, 0x21, 0x1C, 0xD8,
+                         0x81, 0x1D, 0xCA, 0xA1, 0x1C, 0x7E, 0x81, 0x1E,
+                         0xF2, 0x1, 0x1E, 0xCA, 0x61, 0xC6, 0xB1, 0x6,
+                         0xEE, 0xF0, 0xE, 0xE6, 0x20, 0xF, 0xE5, 0x50,
+                         0xE, 0x33, 0x1C, 0x36, 0x20, 0x7, 0x7C, 0x70,
+                         0x3, 0x77, 0x78, 0x7, 0x77, 0xA8, 0x7, 0x77, 0x48,
+                         0x7, 0x73, 0x78, 0x7, 0x79, 0x68, 0x87, 0x19,
+                         0x55, 0x1B, 0x90, 0x3, 0x3E, 0xB8, 0xC1, 0x38,
+                         0xBC, 0x83, 0x3B, 0xD0, 0x83, 0x3C, 0xBC, 0x3,
+                         0x3B, 0x98, 0x3, 0x3B, 0xBC, 0xC3, 0x3D, 0xB8,
+                         0x1, 0x3A, 0xA4, 0x83, 0x3B, 0xD0, 0xC3, 0x3C,
+                         0xCC, 0x58, 0xDC, 0x80, 0x1C, 0xF0, 0xC1, 0xD,
+                         0xE0, 0x41, 0x1E, 0xCA, 0x61, 0x1C, 0xD2, 0x61,
+                         0x1E, 0xCA, 0x1, 0x0, 0x79, 0x28, 0x0, 0x0, 0x52,
+                         0x0, 0x0, 0x0, 0xC2, 0x3C, 0x90, 0x40, 0x86, 0x10,
+                         0x19, 0x32, 0xE2, 0x64, 0x90, 0x40, 0x46, 0x2,
+                         0x19, 0x23, 0x23, 0x46, 0x2, 0x13, 0x24, 0xC6,
+                         0x0, 0x13, 0x74, 0xCE, 0x61, 0x8C, 0x2D, 0xCC,
+                         0xED, 0xC, 0xC4, 0xAE, 0x4C, 0x6E, 0x2E, 0xED,
+                         0xCD, 0xD, 0x44, 0x46, 0xC6, 0x5, 0xC6, 0x5, 0xE6,
+                         0x2C, 0x8D, 0xE, 0x4, 0xE5, 0x2C, 0x8D, 0xE, 0xE8,
+                         0x2C, 0x8D, 0xE, 0xAD, 0x4E, 0xCC, 0x65, 0xEC,
+                         0xAD, 0x4D, 0x87, 0x8D, 0xCD, 0xAE, 0xED, 0x85,
+                         0x8D, 0xCD, 0xAE, 0xAD, 0x5, 0x4E, 0xEE, 0x4D,
+                         0xAD, 0x6C, 0x8C, 0xCE, 0xE5, 0x2C, 0x8D, 0xE,
+                         0xA4, 0xEC, 0xC6, 0x86, 0xA6, 0x2C, 0x26, 0x7,
+                         0xA6, 0xAC, 0xC, 0x26, 0x26, 0xE7, 0x46, 0x6C,
+                         0x2C, 0xA6, 0xC, 0x66, 0xA6, 0x6C, 0xC6, 0x4C,
+                         0x6, 0x86, 0x6C, 0x4C, 0x6, 0x46, 0xCC, 0x66,
+                         0x6C, 0x2C, 0xC, 0x27, 0x46, 0x6C, 0x86, 0x6C,
+                         0x2C, 0xE5, 0x8, 0x63, 0x73, 0x87, 0x68, 0xB,
+                         0x4B, 0x73, 0x3B, 0xCA, 0xDD, 0x18, 0x5A, 0x98,
+                         0xDC, 0xD7, 0x5C, 0x9A, 0x5E, 0xD9, 0x69, 0xCC,
+                         0xE4, 0xC2, 0xDA, 0xCA, 0x5A, 0xE0, 0xDE, 0xD2,
+                         0xDC, 0xE8, 0xCA, 0xE4, 0x86, 0x20, 0x1C, 0xC1,
+                         0x10, 0x84, 0x43, 0x18, 0x82, 0x70, 0xC, 0x43,
+                         0x10, 0xE, 0x62, 0x8, 0x42, 0x1, 0xC, 0x41, 0x38,
+                         0x8A, 0x21, 0x8, 0x87, 0x31, 0x6, 0xC1, 0x38,
+                         0xC6, 0x10, 0x4, 0x63, 0x18, 0x4, 0x24, 0x19,
+                         0x83, 0x60, 0x24, 0x63, 0x18, 0xC, 0xC3, 0x18,
+                         0x83, 0xB0, 0x44, 0x63, 0x28, 0x94, 0x1, 0x0,
+                         0xA4, 0x31, 0xC, 0x6, 0xB1, 0x8C, 0x61, 0x60,
+                         0xA, 0xC6, 0x24, 0x64, 0x78, 0x2E, 0x76, 0x61,
+                         0x6C, 0x76, 0x65, 0x72, 0x43, 0x9, 0x18, 0xA3,
+                         0xB0, 0xB1, 0xD9, 0xB5, 0xB9, 0xA4, 0x91, 0x95,
+                         0xB9, 0xD1, 0xD, 0x25, 0x68, 0x8C, 0x43, 0x86,
+                         0xE7, 0x32, 0x87, 0x16, 0x46, 0x56, 0x26, 0xD7,
+                         0xF4, 0x46, 0x56, 0xC6, 0x36, 0x94, 0xC0, 0x31,
+                         0xA, 0x19, 0x9E, 0x8B, 0x5D, 0x99, 0xDC, 0x5C,
+                         0xDA, 0x9B, 0xDB, 0x50, 0x82, 0xC7, 0x38, 0x64,
+                         0x78, 0x2E, 0x65, 0x6E, 0x74, 0x72, 0x79, 0x50,
+                         0x6F, 0x69, 0x6E, 0x74, 0x73, 0x43, 0x9, 0x24,
+                         0x13, 0xB1, 0xB1, 0xD9, 0xB5, 0xB9, 0xB4, 0xBD,
+                         0x91, 0xD5, 0xB1, 0x95, 0xB9, 0x98, 0xB1, 0x85,
+                         0x9D, 0xCD, 0xD, 0x45, 0x98, 0x28, 0x0, 0x71,
+                         0x20, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x6, 0x40,
+                         0x30, 0x0, 0xD2, 0x0, 0x0, 0x0, 0x61, 0x20, 0x0,
+                         0x0, 0x6, 0x0, 0x0, 0x0, 0x13, 0x4, 0x1, 0x86,
+                         0x3, 0x1, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x7, 0x50,
+                         0x10, 0xCD, 0x14, 0x61, 0x0, 0x0, 0x0, 0x0, 0x0,
+                         0x0, 0x0, 0x0, 0x0, 0x0 ]
+  - Name:            PSV0
+    Size:            76
+    PSVInfo:
+      Version:         3
+      ShaderStage:     5
+      MinimumWaveLaneCount: 0
+      MaximumWaveLaneCount: 4294967295
+      UsesViewID:      0
+      SigInputVectors: 0
+      SigOutputVectors: [ 0, 0, 0, 0 ]
+      NumThreadsX:     1
+      NumThreadsY:     1
+      NumThreadsZ:     1
+      EntryName:       main
+      ResourceStride:  24
+      Resources:       []
+      SigInputElements: []
+      SigOutputElements: []
+      SigPatchOrPrimElements: []
+      InputOutputMap:
+        - [  ]
+        - [  ]
+        - [  ]
+        - [  ]
+...
diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test
index fd1492f..bcffd40 100644
--- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test
+++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test
@@ -34,6 +34,7 @@
 # CHECK-NEXT:           {
 # CHECK-NEXT:             ID: 0
 # CHECK-NEXT:             Offset: 0x0
+# CHECK-NEXT:             Hash: 0x0
 # CHECK-NEXT:             Size: 0x1
 # CHECK-NEXT:             HasReturn: No
 # CHECK-NEXT:             HasTailCall: Yes
@@ -50,6 +51,7 @@
 # CHECK-NEXT:             ID: 2
 # CHECK-NEXT:             Offset: 0x3
 # CHECK-NEXT:             Callsite End Offsets: [1, 3]
+# CHECK-NEXT:             Hash: 0x123
 # CHECK-NEXT:             Size: 0x7
 # CHECK-NEXT:             HasReturn: Yes
 # CHECK-NEXT:             HasTailCall: No
@@ -144,8 +146,8 @@ Sections:
     ShSize: [[SIZE=<none>]]
     Link:   .text
     Entries:
-      - Version: 3
-        Feature: 0x28
+      - Version: 4
+        Feature: 0x68
         BBRanges:
           - BaseAddress: [[ADDR=0x11111]]
             BBEntries:
@@ -160,6 +162,7 @@ Sections:
                 Size:          0x4
                 Metadata:      0x15
                 CallsiteEndOffsets: [ 0x1 , 0x2 ]
+                Hash:          0x123
       - Version: 2
         BBRanges:
           - BaseAddress: 0x22222
diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml
index dc14025..7a22efe 100644
--- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml
+++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml
@@ -162,6 +162,92 @@ Sections:
         BBRanges:
           - BaseAddress: 0x20
 
+## Check that obj2yaml can dump basic block hash in the .llvm_bb_addr_map section.
+
+# RUN: yaml2obj --docnum=4 %s -o %t4
+# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=BBHASH
+
+# BBHASH:      --- !ELF
+# BBHASH-NEXT: FileHeader:
+# BBHASH-NEXT:   Class: ELFCLASS64
+# BBHASH-NEXT:   Data:  ELFDATA2LSB
+# BBHASH-NEXT:   Type:  ET_EXEC
+# BBHASH-NEXT: Sections:
+# BBHASH-NEXT:   - Name: .llvm_bb_addr_map
+# BBHASH-NEXT:     Type: SHT_LLVM_BB_ADDR_MAP
+# BBHASH-NEXT:     Entries:
+# BBHASH-NEXT:       - Version: 4
+# BBHASH-NEXT:         Feature: 0x40
+# BBHASH-NEXT:         BBRanges:
+# BBHASH-NEXT:           - BBEntries:
+# BBHASH-NEXT:             - ID:            0
+# BBHASH-NEXT:               AddressOffset: 0x1
+# BBHASH-NEXT:               Size:          0x2
+# BBHASH-NEXT:               Metadata:      0x3
+# BBHASH-NEXT:               Hash:          0x1
+# BBHASH-NEXT:             - ID:            2
+# BBHASH-NEXT:               AddressOffset: 0x4
+# BBHASH-NEXT:               Size:          0x5
+# BBHASH-NEXT:               Metadata:      0x6
+# BBHASH-NEXT:               Hash:          0x2
+# BBHASH-NEXT:             - ID:            4
+# BBHASH-NEXT:               AddressOffset: 0xFFFFFFFFFFFFFFF7
+# BBHASH-NEXT:               Size:          0xFFFFFFFFFFFFFFF8
+# BBHASH-NEXT:               Metadata:      0xFFFFFFFFFFFFFFF9
+# BBHASH-NEXT:               Hash:          0x3
+# BBHASH-NEXT:       - Version: 4
+# BBHASH-NEXT:         Feature: 0x68
+# BBHASH-NEXT:         BBRanges:
+# BBHASH-NEXT:           - BaseAddress: 0xFFFFFFFFFFFFFF20
+# BBHASH-NEXT:             BBEntries:
+# BBHASH-NEXT:               - ID:              6
+# BBHASH-NEXT:                 AddressOffset:   0xA
+# BBHASH-NEXT:                 Size:            0xB
+# BBHASH-NEXT:                 Metadata:        0xC
+# BBHASH-NEXT:                 CallsiteEndOffsets: [ 0x1, 0x2 ]
+# BBHASH-NEXT:                 Hash:          0x123
+
+--- !ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data:  ELFDATA2LSB
+  Type:  ET_EXEC
+Sections:
+  - Name:   .llvm_bb_addr_map
+    Type:   SHT_LLVM_BB_ADDR_MAP
+    Entries:
+      - Version: 4
+        Feature: 0x40
+        BBRanges:
+          - BaseAddress: 0x0
+            BBEntries:
+              - ID:            0
+                AddressOffset: 0x1
+                Size:          0x2
+                Metadata:      0x3
+                Hash:          0x1
+              - ID:            2
+                AddressOffset: 0x4
+                Size:          0x5
+                Metadata:      0x6
+                Hash:          0x2
+              - ID:            4
+                AddressOffset: 0xFFFFFFFFFFFFFFF7
+                Size:          0xFFFFFFFFFFFFFFF8
+                Metadata:      0xFFFFFFFFFFFFFFF9
+                Hash:          0x3
+      - Version:   4
+        Feature:   0x68
+        BBRanges:
+          - BaseAddress:   0xFFFFFFFFFFFFFF20
+            BBEntries:
+             - ID:              6
+               AddressOffset:   0xA
+               Size:            0xB
+               Metadata:        0xC
+               CallsiteEndOffsets: [ 0x1, 0x2 ]
+               Hash:            0x123
+
 ## Check that obj2yaml uses the "Content" tag to describe an .llvm_bb_addr_map section
 ## when it can't extract the entries, for example, when the section is truncated, or
 ## when an invalid 'NumBlocks' or 'NumBBRanges` field is specified.
diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
index 418f90f..339e419 100644
--- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
@@ -72,6 +72,13 @@
 # CHECK-NEXT:     0000: 03202000 00000000 0000010E 01000203
 # CHECK-NEXT:   )
 
+# Case 10: Specify basic block hash.
+# CHECK:        Name: .llvm_bb_addr_map (1)
+# CHECK:        SectionData (
+# CHECK-NEXT:     0000: 04602000 00000000 0000010E 01000203
+# CHECK-NEXT:     0010: 23010000 00000000
+# CHECK-NEXT:   )
+
 
 --- !ELF
 FileHeader:
@@ -176,6 +183,22 @@ Sections:
                Metadata:        0x00000003
                CallsiteEndOffsets: []
 
+## 10) We can produce a SHT_LLVM_BB_ADDR_MAP section with basic block hash.
+  - Name: '.llvm_bb_addr_map (10)'
+    Type: SHT_LLVM_BB_ADDR_MAP
+    Entries:
+      - Version: 4
+        Feature: 0x60
+        BBRanges:
+          - BaseAddress: 0x0000000000000020
+            BBEntries:
+             - ID:              14
+               AddressOffset:   0x00000001
+               Size:            0x00000002
+               Metadata:        0x00000003
+               CallsiteEndOffsets: []
+               Hash:            0x123
+
 ## Check we can't use Entries at the same time as either Content or Size.
 # RUN: not yaml2obj --docnum=2 -DCONTENT="00" %s 2>&1 | FileCheck %s --check-prefix=INVALID
 # RUN: not yaml2obj --docnum=2 -DSIZE="0" %s 2>&1 | FileCheck %s --check-prefix=INVALID
@@ -197,7 +220,7 @@ Sections:
 
 ## Check that yaml2obj generates a warning when we use unsupported versions.
 # RUN: yaml2obj --docnum=3  %s 2>&1 | FileCheck %s --check-prefix=INVALID-VERSION
-# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 4; encoding using the most recent version
+# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 5; encoding using the most recent version
 
 --- !ELF
 FileHeader:
@@ -209,4 +232,4 @@ Sections:
     Type: SHT_LLVM_BB_ADDR_MAP
     Entries:
 ##  Specify unsupported version
-      - Version: 4
+      - Version: 5
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index ab93316..9c9b2dd 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -8155,6 +8155,8 @@ void LLVMELFDumper<ELFT>::printBBAddrMaps(bool PrettyPGOAnalysis) {
             W.printHex("Offset", BBE.Offset);
             if (!BBE.CallsiteEndOffsets.empty())
               W.printList("Callsite End Offsets", BBE.CallsiteEndOffsets);
+            if (PAM.FeatEnable.BBHash)
+              W.printHex("Hash", BBE.Hash);
             W.printHex("Size", BBE.Size);
             W.printBoolean("HasReturn", BBE.hasReturn());
             W.printBoolean("HasTailCall", BBE.hasTailCall());
diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index ef4552f..68e18f6 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -900,7 +900,7 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) {
   while (Cur && Cur.tell() < Content.size()) {
     if (Shdr->sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) {
       Version = Data.getU8(Cur);
-      if (Cur && Version > 3)
+      if (Cur && Version > 4)
         return createStringError(
             errc::invalid_argument,
             "invalid SHT_LLVM_BB_ADDR_MAP section version: " +
@@ -946,8 +946,11 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) {
         }
         uint64_t Size = Data.getULEB128(Cur);
         uint64_t Metadata = Data.getULEB128(Cur);
+        std::optional<llvm::yaml::Hex64> Hash;
+        if (FeatureOrErr->BBHash)
+          Hash = Data.getU64(Cur);
         BBEntries.push_back(
-            {ID, Offset, Size, Metadata, std::move(CallsiteEndOffsets)});
+            {ID, Offset, Size, Metadata, std::move(CallsiteEndOffsets), Hash});
       }
       TotalNumBlocks += BBEntries.size();
       BBRanges.push_back({BaseAddress, /*NumBlocks=*/{}, BBEntries});
diff --git a/llvm/unittests/ADT/TypeTraitsTest.cpp b/llvm/unittests/ADT/TypeTraitsTest.cpp
index a56aa7e..f9b8d6d 100644
--- a/llvm/unittests/ADT/TypeTraitsTest.cpp
+++ b/llvm/unittests/ADT/TypeTraitsTest.cpp
@@ -40,9 +40,7 @@ struct Foo {
 struct CheckMethodPointer : CheckFunctionTraits<decltype(&Foo::func)> {};
 
 /// Test lambda references.
-LLVM_ATTRIBUTE_UNUSED auto lambdaFunc = [](const int &v) -> bool {
-  return true;
-};
+[[maybe_unused]] auto lambdaFunc = [](const int &v) -> bool { return true; };
 struct CheckLambda : CheckFunctionTraits<decltype(lambdaFunc)> {};
 
 } // end anonymous namespace
diff --git a/llvm/unittests/ExecutionEngine/Orc/ExecutorAddressTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ExecutorAddressTest.cpp
index ae9db14..9a37980 100644
--- a/llvm/unittests/ExecutionEngine/Orc/ExecutorAddressTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/ExecutorAddressTest.cpp
@@ -97,10 +97,16 @@ TEST(ExecutorAddrTest, AddrRanges) {
   EXPECT_FALSE(R1.contains(A0));
   EXPECT_FALSE(R1.contains(A2));
 
+  EXPECT_TRUE(R3.contains(R0));  // True for singleton range at start.
+  EXPECT_TRUE(R3.contains(R1));  // True for singleton range at end.
+  EXPECT_FALSE(R3.contains(R2)); // False for non-overlaping singleton range.
+  EXPECT_FALSE(R3.contains(R4)); // False for overlapping, uncontained range.
+
   EXPECT_FALSE(R1.overlaps(R0));
   EXPECT_FALSE(R1.overlaps(R2));
   EXPECT_TRUE(R1.overlaps(R3));
   EXPECT_TRUE(R1.overlaps(R4));
+  EXPECT_TRUE(R3.overlaps(R4));
 
   EXPECT_LE(R0, R0);
   EXPECT_LT(R0, R1);
diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp
index 54c7ddd..6376165 100644
--- a/llvm/unittests/IR/ConstantsTest.cpp
+++ b/llvm/unittests/IR/ConstantsTest.cpp
@@ -564,13 +564,17 @@ TEST(ConstantsTest, FoldGlobalVariablePtr) {
 
   Global->setAlignment(Align(4));
 
-  ConstantInt *TheConstant(ConstantInt::get(IntType, 2));
+  ConstantInt *TheConstant = ConstantInt::get(IntType, 2);
 
-  Constant *TheConstantExpr(ConstantExpr::getPtrToInt(Global.get(), IntType));
+  Constant *PtrToInt = ConstantExpr::getPtrToInt(Global.get(), IntType);
+  ASSERT_TRUE(
+      ConstantFoldBinaryInstruction(Instruction::And, PtrToInt, TheConstant)
+          ->isNullValue());
 
-  ASSERT_TRUE(ConstantFoldBinaryInstruction(Instruction::And, TheConstantExpr,
-                                            TheConstant)
-                  ->isNullValue());
+  Constant *PtrToAddr = ConstantExpr::getPtrToAddr(Global.get(), IntType);
+  ASSERT_TRUE(
+      ConstantFoldBinaryInstruction(Instruction::And, PtrToAddr, TheConstant)
+          ->isNullValue());
 }
 
 // Check that containsUndefOrPoisonElement and containsPoisonElement is working
diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp
index 17d9f50..d6a3ca5 100644
--- a/llvm/unittests/Object/ELFObjectFileTest.cpp
+++ b/llvm/unittests/Object/ELFObjectFileTest.cpp
@@ -531,7 +531,7 @@ Sections:
   // Check that we can detect unsupported versions.
   SmallString<128> UnsupportedVersionYamlString(CommonYamlString);
   UnsupportedVersionYamlString += R"(
-      - Version: 4
+      - Version: 5
         BBRanges:
           - BaseAddress: 0x11111
             BBEntries:
@@ -543,7 +543,7 @@ Sections:
   {
     SCOPED_TRACE("unsupported version");
     DoCheck(UnsupportedVersionYamlString,
-            "unsupported SHT_LLVM_BB_ADDR_MAP version: 4");
+            "unsupported SHT_LLVM_BB_ADDR_MAP version: 5");
   }
 
   SmallString<128> ZeroBBRangesYamlString(CommonYamlString);
@@ -761,14 +761,14 @@ Sections:
 
   BBAddrMap E1 = {
       {{0x11111,
-        {{1, 0x0, 0x3, {false, true, false, false, false}, {0x1, 0x2}}}}}};
+        {{1, 0x0, 0x3, {false, true, false, false, false}, {0x1, 0x2}, 0}}}}};
   BBAddrMap E2 = {
-      {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}}}},
-       {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}, {}}}}}};
+      {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}},
+       {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}, {}, 0}}}}};
   BBAddrMap E3 = {
-      {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}}}}}};
+      {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0}}}}};
   BBAddrMap E4 = {
-      {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}}}}}};
+      {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0}}}}};
 
   std::vector<BBAddrMap> Section0BBAddrMaps = {E4};
   std::vector<BBAddrMap> Section1BBAddrMaps = {E3};
@@ -988,6 +988,123 @@ Sections:
   }
 }
 
+// Test for the ELFObjectFile::readBBAddrMap API with BBHash.
+TEST(ELFObjectFileTest, ReadBBHash) {
+  StringRef CommonYamlString(R"(
+--- !ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data:  ELFDATA2LSB
+  Type:  ET_EXEC
+Sections:
+  - Name: .llvm_bb_addr_map_1
+    Type: SHT_LLVM_BB_ADDR_MAP
+    Link: 1
+    Entries:
+      - Version: 4
+        Feature: 0x60
+        BBRanges:
+          - BaseAddress: 0x11111
+            BBEntries:
+              - ID:              1
+                AddressOffset:   0x0
+                Size:            0x1
+                Metadata:        0x2
+                CallsiteEndOffsets: [ 0x1 , 0x1 ]
+                Hash:            0x1
+  - Name: .llvm_bb_addr_map_2
+    Type: SHT_LLVM_BB_ADDR_MAP
+    Link: 1
+    Entries:
+      - Version: 4
+        Feature: 0x48
+        BBRanges:
+          - BaseAddress: 0x22222
+            BBEntries:
+              - ID:            2
+                AddressOffset: 0x0
+                Size:          0x2
+                Metadata:      0x4
+                Hash:          0x2
+          - BaseAddress: 0xFFFFF
+            BBEntries:
+              - ID:            15
+                AddressOffset: 0xF0
+                Size:          0xF1
+                Metadata:      0x1F
+                Hash:          0xF
+  - Name: .llvm_bb_addr_map_3
+    Type: SHT_LLVM_BB_ADDR_MAP
+    Link: 2
+    Entries:
+      - Version: 4
+        Feature: 0x40
+        BBRanges:
+          - BaseAddress: 0x33333
+            BBEntries:
+              - ID:            0
+                AddressOffset: 0x0
+                Size:          0x3
+                Metadata:      0x6
+                Hash:          0x3
+  - Name: .llvm_bb_addr_map_4
+    Type: SHT_LLVM_BB_ADDR_MAP
+  # Link: 0 (by default, can be overriden)
+    Entries:
+      - Version: 4
+        Feature: 0x40
+        BBRanges:
+          - BaseAddress: 0x44444
+            BBEntries:
+              - ID:            0
+                AddressOffset: 0x0
+                Size:          0x4
+                Metadata:      0x18
+                Hash:          0x4
+)");
+
+  BBAddrMap E1 = {
+      {{0x11111,
+        {{1, 0x0, 0x3, {false, true, false, false, false}, {0x1, 0x2}, 0x1}}}}};
+  BBAddrMap E2 = {
+      {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0x2}}},
+       {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}, {}, 0xF}}}}};
+  BBAddrMap E3 = {
+      {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0x3}}}}};
+  BBAddrMap E4 = {
+      {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0x4}}}}};
+
+  std::vector<BBAddrMap> Section0BBAddrMaps = {E4};
+  std::vector<BBAddrMap> Section1BBAddrMaps = {E3};
+  std::vector<BBAddrMap> Section2BBAddrMaps = {E1, E2};
+  std::vector<BBAddrMap> AllBBAddrMaps = {E1, E2, E3, E4};
+
+  auto DoCheckSucceeds = [&](StringRef YamlString,
+                             std::optional<unsigned> TextSectionIndex,
+                             std::vector<BBAddrMap> ExpectedResult) {
+    SCOPED_TRACE("for TextSectionIndex: " +
+                 (TextSectionIndex ? llvm::Twine(*TextSectionIndex) : "{}") +
+                 " and object yaml:\n" + YamlString);
+    SmallString<0> Storage;
+    Expected<ELFObjectFile<ELF64LE>> ElfOrErr =
+        toBinary<ELF64LE>(Storage, YamlString);
+    ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded());
+
+    Expected<const typename ELF64LE::Shdr *> BBAddrMapSecOrErr =
+        ElfOrErr->getELFFile().getSection(1);
+    ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded());
+    auto BBAddrMaps = ElfOrErr->readBBAddrMap(TextSectionIndex);
+    ASSERT_THAT_EXPECTED(BBAddrMaps, Succeeded());
+    EXPECT_EQ(*BBAddrMaps, ExpectedResult);
+  };
+
+  DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt,
+                  AllBBAddrMaps);
+  DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps);
+  DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, Section1BBAddrMaps);
+  DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section2BBAddrMaps);
+}
+
 // Test for the ELFObjectFile::readBBAddrMap API with PGOAnalysisMap.
 TEST(ELFObjectFileTest, ReadPGOAnalysisMap) {
   StringRef CommonYamlString(R"(
@@ -1159,29 +1276,32 @@ Sections:
 )");
 
   BBAddrMap E1 = {
-      {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}, {}}}}}};
-  PGOAnalysisMap P1 = {892, {}, {true, false, false, false, false, false}};
+      {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}, {}, 0}}}}};
+  PGOAnalysisMap P1 = {
+      892, {}, {true, false, false, false, false, false, false}};
   BBAddrMap E2 = {
-      {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}}}}}};
+      {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}};
   PGOAnalysisMap P2 = {{},
                        {{BlockFrequency(343), {}}},
-                       {false, true, false, false, false, false}};
-  BBAddrMap E3 = {{{0x33333,
-                    {{0, 0x0, 0x3, {false, true, true, false, false}, {}},
-                     {1, 0x3, 0x3, {false, false, true, false, false}, {}},
-                     {2, 0x6, 0x3, {false, false, false, false, false}, {}}}}}};
+                       {false, true, false, false, false, false, false}};
+  BBAddrMap E3 = {
+      {{0x33333,
+        {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0},
+         {1, 0x3, 0x3, {false, false, true, false, false}, {}, 0},
+         {2, 0x6, 0x3, {false, false, false, false, false}, {}, 0}}}}};
   PGOAnalysisMap P3 = {{},
                        {{{},
                          {{1, BranchProbability::getRaw(0x1111'1111)},
                           {2, BranchProbability::getRaw(0xeeee'eeee)}}},
                         {{}, {{2, BranchProbability::getRaw(0xffff'ffff)}}},
                         {{}, {}}},
-                       {false, false, true, false, false, false}};
-  BBAddrMap E4 = {{{0x44444,
-                    {{0, 0x0, 0x4, {false, false, false, true, true}, {}},
-                     {1, 0x4, 0x4, {false, false, false, false, false}, {}},
-                     {2, 0x8, 0x4, {false, false, false, false, false}, {}},
-                     {3, 0xc, 0x4, {false, false, false, false, false}, {}}}}}};
+                       {false, false, true, false, false, false, false}};
+  BBAddrMap E4 = {
+      {{0x44444,
+        {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0},
+         {1, 0x4, 0x4, {false, false, false, false, false}, {}, 0},
+         {2, 0x8, 0x4, {false, false, false, false, false}, {}, 0},
+         {3, 0xc, 0x4, {false, false, false, false, false}, {}, 0}}}}};
   PGOAnalysisMap P4 = {
       1000,
       {{BlockFrequency(1000),
@@ -1193,22 +1313,24 @@ Sections:
          {3, BranchProbability::getRaw(0xeeee'eeee)}}},
        {BlockFrequency(18), {{3, BranchProbability::getRaw(0xffff'ffff)}}},
        {BlockFrequency(1000), {}}},
-      {true, true, true, false, false, false}};
+      {true, true, true, false, false, false, false}};
   BBAddrMap E5 = {
-      {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}, {}}}}}};
-  PGOAnalysisMap P5 = {{}, {}, {false, false, false, false, false, false}};
+      {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}};
+  PGOAnalysisMap P5 = {
+      {}, {}, {false, false, false, false, false, false, false}};
   BBAddrMap E6 = {
       {{0x66666,
-        {{0, 0x0, 0x6, {false, true, true, false, false}, {}},
-         {1, 0x6, 0x6, {false, false, true, false, false}, {}}}},
-       {0x666661, {{2, 0x0, 0x6, {false, false, false, false, false}, {}}}}}};
+        {{0, 0x0, 0x6, {false, true, true, false, false}, {}, 0},
+         {1, 0x6, 0x6, {false, false, true, false, false}, {}, 0}}},
+       {0x666661,
+        {{2, 0x0, 0x6, {false, false, false, false, false}, {}, 0}}}}};
   PGOAnalysisMap P6 = {{},
                        {{{},
                          {{1, BranchProbability::getRaw(0x2222'2222)},
                           {2, BranchProbability::getRaw(0xcccc'cccc)}}},
                         {{}, {{2, BranchProbability::getRaw(0x8888'8888)}}},
                         {{}, {}}},
-                       {false, false, true, true, false, false}};
+                       {false, false, true, true, false, false, false}};
 
   std::vector<BBAddrMap> Section0BBAddrMaps = {E4, E5, E6};
   std::vector<BBAddrMap> Section1BBAddrMaps = {E3};
diff --git a/llvm/unittests/Object/ELFTypesTest.cpp b/llvm/unittests/Object/ELFTypesTest.cpp
index f88931b5f..1765e15 100644
--- a/llvm/unittests/Object/ELFTypesTest.cpp
+++ b/llvm/unittests/Object/ELFTypesTest.cpp
@@ -101,21 +101,22 @@ static_assert(
     "PGOAnalysisMap should use the same type for basic block ID as BBAddrMap");
 
 TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) {
-  const std::array<BBAddrMap::Features, 11> Decoded = {
-      {{false, false, false, false, false, false},
-       {true, false, false, false, false, false},
-       {false, true, false, false, false, false},
-       {false, false, true, false, false, false},
-       {false, false, false, true, false, false},
-       {true, true, false, false, false, false},
-       {false, true, true, false, false, false},
-       {false, true, true, true, false, false},
-       {true, true, true, true, false, false},
-       {false, false, false, false, true, false},
-       {false, false, false, false, false, true}}};
-  const std::array<uint8_t, 11> Encoded = {{0b0000, 0b0001, 0b0010, 0b0100,
-                                            0b1000, 0b0011, 0b0110, 0b1110,
-                                            0b1111, 0b1'0000, 0b10'0000}};
+  const std::array<BBAddrMap::Features, 12> Decoded = {
+      {{false, false, false, false, false, false, false},
+       {true, false, false, false, false, false, false},
+       {false, true, false, false, false, false, false},
+       {false, false, true, false, false, false, false},
+       {false, false, false, true, false, false, false},
+       {true, true, false, false, false, false, false},
+       {false, true, true, false, false, false, false},
+       {false, true, true, true, false, false, false},
+       {true, true, true, true, false, false, false},
+       {false, false, false, false, true, false, false},
+       {false, false, false, false, false, true, false},
+       {false, false, false, false, false, false, true}}};
+  const std::array<uint8_t, 12> Encoded = {
+      {0b0000, 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b0110, 0b1110, 0b1111,
+       0b1'0000, 0b10'0000, 0b100'0000}};
   for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded))
     EXPECT_EQ(Feat.encode(), EncodedVal);
   for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) {
@@ -128,9 +129,9 @@ TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) {
 
 TEST(ELFTypesTest, BBAddrMapFeaturesInvalidEncodingTest) {
   const std::array<std::string, 2> Errors = {
-      "invalid encoding for BBAddrMap::Features: 0x40",
+      "invalid encoding for BBAddrMap::Features: 0x80",
       "invalid encoding for BBAddrMap::Features: 0xf0"};
-  const std::array<uint8_t, 2> Values = {{0b100'0000, 0b1111'0000}};
+  const std::array<uint8_t, 2> Values = {{0b1000'0000, 0b1111'0000}};
   for (const auto &[Val, Error] : llvm::zip(Values, Errors)) {
     EXPECT_THAT_ERROR(BBAddrMap::Features::decode(Val).takeError(),
                       FailedWithMessage(Error));
diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index 5ac4c53..809960d 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -228,6 +228,114 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_SUB) {
   EXPECT_EQ(DAG->ComputeNumSignBits(Op), 2u);
 }
 
+TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_ADD) {
+  SDLoc Loc;
+  auto IntVT = EVT::getIntegerVT(Context, 8);
+  auto Nneg1 = DAG->getConstant(0xFF, Loc, IntVT);
+  auto N0 = DAG->getConstant(0x00, Loc, IntVT);
+  auto N1 = DAG->getConstant(0x01, Loc, IntVT);
+  auto N5 = DAG->getConstant(0x05, Loc, IntVT);
+  auto N8 = DAG->getConstant(0x08, Loc, IntVT);
+  auto Nsign1 = DAG->getConstant(0x55, Loc, IntVT);
+  auto UnknownOp = DAG->getRegister(0, IntVT);
+  auto Mask = DAG->getConstant(0x1e, Loc, IntVT);
+  auto Nsign3 = DAG->getNode(ISD::AND, Loc, IntVT, Mask, UnknownOp);
+  // RHS early out
+  // Nsign1 = 01010101
+  // Nsign3 = 000????0
+  auto OpRhsEo = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign3, Nsign1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpRhsEo), 1u);
+
+  // ADD 0 -1
+  // N0    = 00000000
+  // Nneg1 = 11111111
+  auto OpNegZero = DAG->getNode(ISD::ADD, Loc, IntVT, N0, Nneg1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpNegZero), 8u);
+
+  // ADD 1 -1
+  // N1    = 00000001
+  // Nneg1 = 11111111
+  auto OpNegOne = DAG->getNode(ISD::ADD, Loc, IntVT, N1, Nneg1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpNegOne), 8u);
+
+  // ADD 8 -1
+  // N8    = 00001000
+  // Nneg1 = 11111111
+  auto OpSeven = DAG->getNode(ISD::ADD, Loc, IntVT, N8, Nneg1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpSeven), 5u);
+
+  // Non negative
+  // Nsign3 = 000????0
+  // Nneg1  = 11111111
+  auto OpNonNeg = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign3, Nneg1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpNonNeg), 3u);
+
+  // LHS early out
+  // Nsign1 = 01010101
+  // Nsign3 = 000????0
+  auto OpLhsEo = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign1, Nsign3);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpLhsEo), 1u);
+
+  // Nsign3 = 000????0
+  // N5     = 00000101
+  auto Op = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign3, N5);
+  EXPECT_EQ(DAG->ComputeNumSignBits(Op), 2u);
+}
+
+TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_ADDC) {
+  SDLoc Loc;
+  auto IntVT = EVT::getIntegerVT(Context, 8);
+  auto Nneg1 = DAG->getConstant(0xFF, Loc, IntVT);
+  auto N0 = DAG->getConstant(0x00, Loc, IntVT);
+  auto N1 = DAG->getConstant(0x01, Loc, IntVT);
+  auto N5 = DAG->getConstant(0x05, Loc, IntVT);
+  auto N8 = DAG->getConstant(0x08, Loc, IntVT);
+  auto Nsign1 = DAG->getConstant(0x55, Loc, IntVT);
+  auto UnknownOp = DAG->getRegister(0, IntVT);
+  auto Mask = DAG->getConstant(0x1e, Loc, IntVT);
+  auto Nsign3 = DAG->getNode(ISD::AND, Loc, IntVT, Mask, UnknownOp);
+  // RHS early out
+  // Nsign1 = 01010101
+  // Nsign3 = 000????0
+  auto OpRhsEo = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign3, Nsign1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpRhsEo), 1u);
+
+  // ADD 0 -1
+  // N0    = 00000000
+  // Nneg1 = 11111111
+  auto OpNegZero = DAG->getNode(ISD::ADDC, Loc, IntVT, N0, Nneg1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpNegZero), 8u);
+
+  // ADD 1 -1
+  // N1    = 00000001
+  // Nneg1 = 11111111
+  auto OpNegOne = DAG->getNode(ISD::ADDC, Loc, IntVT, N1, Nneg1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpNegOne), 8u);
+
+  // ADD 8 -1
+  // N8    = 00001000
+  // Nneg1 = 11111111
+  auto OpSeven = DAG->getNode(ISD::ADDC, Loc, IntVT, N8, Nneg1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpSeven), 4u);
+
+  // Non negative
+  // Nsign3 = 000????0
+  // Nneg1  = 11111111
+  auto OpNonNeg = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign3, Nneg1);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpNonNeg), 3u);
+
+  // LHS early out
+  // Nsign1 = 01010101
+  // Nsign3 = 000????0
+  auto OpLhsEo = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign1, Nsign3);
+  EXPECT_EQ(DAG->ComputeNumSignBits(OpLhsEo), 1u);
+
+  // Nsign3 = 000????0
+  // N5     = 00000101
+  auto Op = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign3, N5);
+  EXPECT_EQ(DAG->ComputeNumSignBits(Op), 2u);
+}
+
 TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) {
   TargetLowering TL(*TM);
 
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index 343c2bb71..a5c5426 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -906,7 +906,6 @@ Transforms/InstCombine/select_frexp.ll
 Transforms/InstCombine/select.ll
 Transforms/InstCombine/select-min-max.ll
 Transforms/InstCombine/select-of-symmetric-selects.ll
-Transforms/InstCombine/select-safe-impliedcond-transforms.ll
 Transforms/InstCombine/select-safe-transforms.ll
 Transforms/InstCombine/select-select.ll
 Transforms/InstCombine/select-with-extreme-eq-cond.ll
@@ -1237,7 +1236,6 @@ Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll
 Transforms/PGOProfile/chr-dead-pred.ll
 Transforms/PGOProfile/chr-dup-threshold.ll
 Transforms/PGOProfile/chr-lifetimes.ll
-Transforms/PGOProfile/chr.ll
 Transforms/PGOProfile/chr-poison.ll
 Transforms/PGOProfile/comdat.ll
 Transforms/PGOProfile/memop_profile_funclet_wasm.ll
@@ -1312,14 +1310,11 @@ Transforms/SimpleLoopUnswitch/pr60736.ll
 Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
 Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
 Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
-Transforms/SROA/addrspacecast.ll
-Transforms/SROA/phi-and-select.ll
 Transforms/SROA/phi-gep.ll
 Transforms/SROA/scalable-vectors-with-known-vscale.ll
 Transforms/SROA/select-gep.ll
 Transforms/SROA/select-load.ll
 Transforms/SROA/slice-width.ll
-Transforms/SROA/std-clamp.ll
 Transforms/SROA/vector-conversion.ll
 Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll
 Transforms/SROA/vector-promotion.ll
diff --git a/mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h
new file mode 100644
index 0000000..72ac247
--- /dev/null
+++ b/mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h
@@ -0,0 +1,54 @@
+//===- StridedMetadataRange.h - Strided metadata range analysis -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_DATAFLOW_STRIDEDMETADATARANGE_H
+#define MLIR_ANALYSIS_DATAFLOW_STRIDEDMETADATARANGE_H
+
+#include "mlir/Analysis/DataFlow/SparseAnalysis.h"
+#include "mlir/Interfaces/InferStridedMetadataInterface.h"
+
+namespace mlir {
+namespace dataflow {
+
+/// This lattice element represents the strided metadata of an SSA value.
+class StridedMetadataRangeLattice : public Lattice<StridedMetadataRange> {
+public:
+  using Lattice::Lattice;
+};
+
+/// Strided metadata range analysis determines the strided metadata ranges of
+/// SSA values using operations that define `InferStridedMetadataInterface`.
+///
+/// This analysis depends on DeadCodeAnalysis, SparseConstantPropagation, and
+/// IntegerRangeAnalysis, and will be a silent no-op if the analyses are not
+/// loaded in the same solver context.
+class StridedMetadataRangeAnalysis
+    : public SparseForwardDataFlowAnalysis<StridedMetadataRangeLattice> {
+public:
+  StridedMetadataRangeAnalysis(DataFlowSolver &solver,
+                               int32_t indexBitwidth = 64);
+
+  /// At an entry point, we cannot reason about strided metadata ranges unless
+  /// the type also encodes the data. For example, a memref with static layout.
+  void setToEntryState(StridedMetadataRangeLattice *lattice) override;
+
+  /// Visit an operation. Invoke the transfer function on each operation that
+  /// implements `InferStridedMetadataInterface`.
+  LogicalResult
+  visitOperation(Operation *op,
+                 ArrayRef<const StridedMetadataRangeLattice *> operands,
+                 ArrayRef<StridedMetadataRangeLattice *> results) override;
+
+private:
+  /// Index bitwidth to use when operating with the int-ranges.
+  int32_t indexBitwidth = 64;
+};
+} // namespace dataflow
+} // end namespace mlir
+
+#endif // MLIR_ANALYSIS_DATAFLOW_STRIDEDMETADATARANGE_H
diff --git a/mlir/include/mlir/Conversion/MathToXeVM/MathToXeVM.h b/mlir/include/mlir/Conversion/MathToXeVM/MathToXeVM.h
new file mode 100644
index 0000000..91d3c92
--- /dev/null
+++ b/mlir/include/mlir/Conversion/MathToXeVM/MathToXeVM.h
@@ -0,0 +1,27 @@
+//===- MathToXeVM.h - Utils for converting Math to XeVM -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_CONVERSION_MATHTOXEVM_MATHTOXEVM_H_
+#define MLIR_CONVERSION_MATHTOXEVM_MATHTOXEVM_H_
+
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Dialect/LLVMIR/XeVMDialect.h"
+#include "mlir/IR/PatternMatch.h"
+#include <memory>
+
+namespace mlir {
+class Pass;
+
+#define GEN_PASS_DECL_CONVERTMATHTOXEVM
+#include "mlir/Conversion/Passes.h.inc"
+
+/// Populate the given list with patterns that convert from Math to XeVM calls.
+void populateMathToXeVMConversionPatterns(RewritePatternSet &patterns,
+                                          bool convertArith);
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_MATHTOXEVM_MATHTOXEVM_H_
diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h
index da061b2..40d866e 100644
--- a/mlir/include/mlir/Conversion/Passes.h
+++ b/mlir/include/mlir/Conversion/Passes.h
@@ -49,6 +49,7 @@
 #include "mlir/Conversion/MathToLibm/MathToLibm.h"
 #include "mlir/Conversion/MathToROCDL/MathToROCDL.h"
 #include "mlir/Conversion/MathToSPIRV/MathToSPIRVPass.h"
+#include "mlir/Conversion/MathToXeVM/MathToXeVM.h"
 #include "mlir/Conversion/MemRefToEmitC/MemRefToEmitCPass.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/MemRefToSPIRV/MemRefToSPIRVPass.h"
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 3c18ecc..25e9d34 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -797,6 +797,31 @@ def ConvertMathToSPIRVPass : Pass<"convert-math-to-spirv"> {
 }
 
 //===----------------------------------------------------------------------===//
+// MathToXeVM
+//===----------------------------------------------------------------------===//
+
+def ConvertMathToXeVM : Pass<"convert-math-to-xevm", "ModuleOp"> {
+  let summary =
+      "Convert (fast) math operations to native XeVM/SPIRV equivalents";
+  let description = [{
+    This pass converts supported math ops marked with the `afn` fastmath flag
+    to function calls for OpenCL `native_` math intrinsics: These intrinsics
+    are typically mapped directly to native device instructions, often resulting
+    in better performance. However, the precision/error of these intrinsics
+    are implementation-defined, and thus math ops are only converted when they
+    have the `afn` fastmath flag enabled.
+  }];
+  let options = [Option<
+      "convertArith", "convert-arith", "bool", /*default=*/"true",
+      "Convert supported Arith ops (e.g. arith.divf) as well.">];
+  let dependentDialects = [
+    "arith::ArithDialect",
+    "xevm::XeVMDialect",
+    "LLVM::LLVMDialect",
+  ];
+}
+
+//===----------------------------------------------------------------------===//
 // MathToEmitC
 //===----------------------------------------------------------------------===//
 
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
index 30f33ed..69447f7 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
@@ -17,6 +17,7 @@
 #include "mlir/Interfaces/CastInterfaces.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/InferIntRangeInterface.h"
+#include "mlir/Interfaces/InferStridedMetadataInterface.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
 #include "mlir/Interfaces/MemOpInterfaces.h"
 #include "mlir/Interfaces/MemorySlotInterfaces.h"
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index 89bd0f1..b39207f 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -14,6 +14,7 @@ include "mlir/Dialect/MemRef/IR/MemRefBase.td"
 include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/InferIntRangeInterface.td"
+include "mlir/Interfaces/InferStridedMetadataInterface.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/MemOpInterfaces.td"
 include "mlir/Interfaces/MemorySlotInterfaces.td"
@@ -2085,6 +2086,7 @@ def MemRef_StoreOp : MemRef_Op<"store",
 
 def SubViewOp : MemRef_OpWithOffsetSizesAndStrides<"subview", [
     DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
+    DeclareOpInterfaceMethods<InferStridedMetadataOpInterface>,
     DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
     DeclareOpInterfaceMethods<ViewLikeOpInterface>,
     AttrSizedOperandSegments,
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 77e833f..fecf81b 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1316,6 +1316,24 @@ def OpenACC_PrivateRecipeOp
   }];
 
   let hasRegionVerifier = 1;
+
+  let extraClassDeclaration = [{
+    /// Creates a PrivateRecipeOp and populates its regions based on the
+    /// variable type as long as the type implements MappableType or
+    /// PointerLikeType interface. If a type implements both, the MappableType
+    /// API will be preferred. Returns std::nullopt if the recipe cannot be
+    /// created or populated. The builder's current insertion point will be used
+    /// and it must be a valid place for this operation to be inserted. The
+    /// `recipeName` must be a unique name to prevent "redefinition of symbol"
+    /// IR errors.
+    static std::optional<PrivateRecipeOp> createAndPopulate(
+        ::mlir::OpBuilder &builder,
+        ::mlir::Location loc,
+        ::llvm::StringRef recipeName,
+        ::mlir::Type varType,
+        ::llvm::StringRef varName = "",
+        ::mlir::ValueRange bounds = {});
+  }];
 }
 
 //===----------------------------------------------------------------------===//
@@ -1410,6 +1428,24 @@ def OpenACC_FirstprivateRecipeOp
   }];
 
   let hasRegionVerifier = 1;
+
+  let extraClassDeclaration = [{
+    /// Creates a FirstprivateRecipeOp and populates its regions based on the
+    /// variable type as long as the type implements MappableType or
+    /// PointerLikeType interface. If a type implements both, the MappableType
+    /// API will be preferred. Returns std::nullopt if the recipe cannot be
+    /// created or populated. The builder's current insertion point will be used
+    /// and it must be a valid place for this operation to be inserted. The
+    /// `recipeName` must be a unique name to prevent "redefinition of symbol"
+    /// IR errors.
+    static std::optional<FirstprivateRecipeOp> createAndPopulate(
+        ::mlir::OpBuilder &builder,
+        ::mlir::Location loc,
+        ::llvm::StringRef recipeName,
+        ::mlir::Type varType,
+        ::llvm::StringRef varName = "",
+        ::mlir::ValueRange bounds = {});
+  }];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
index 0d16255..6736bc8 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
@@ -83,7 +83,15 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> {
         The `originalVar` parameter is optional but enables support for dynamic
         types (e.g., dynamic memrefs). When provided, implementations can extract
         runtime dimension information from the original variable to create
-        allocations with matching dynamic sizes.
+        allocations with matching dynamic sizes. When generating recipe bodies,
+        `originalVar` should be the block argument representing the original
+        variable in the recipe region.
+
+        The `needsFree` output parameter indicates whether the allocated memory
+        requires explicit deallocation. Implementations should set this to true
+        for heap allocations that need a matching deallocation operation (e.g.,
+        alloc) and false for stack-based allocations (e.g., alloca). During
+        recipe generation, this determines whether a destroy region is created.
 
         Returns a Value representing the result of the allocation. If no value
         is returned, it means the allocation was not successfully generated.
@@ -94,7 +102,8 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> {
                     "::mlir::Location":$loc,
                     "::llvm::StringRef":$varName,
                     "::mlir::Type":$varType,
-                    "::mlir::Value":$originalVar),
+                    "::mlir::Value":$originalVar,
+                    "bool &":$needsFree),
       /*methodBody=*/"",
       /*defaultImplementation=*/[{
         return {};
@@ -102,23 +111,34 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> {
     >,
     InterfaceMethod<
       /*description=*/[{
-        Generates deallocation operations for the pointer-like type. It deallocates
-        the instance provided.
+        Generates deallocation operations for the pointer-like type.
 
-        The `varPtr` parameter is required and must represent an instance that was
-        previously allocated. If the current type is represented in a way that it
-        does not capture the pointee type, `varType` must be passed in to provide
-        the necessary type information. Nothing is generated in case the allocate
-        is `alloca`-like.
+        The `varToFree` parameter is required and must represent an instance
+        that was previously allocated. When generating recipe bodies, this
+        should be the block argument representing the private variable in the
+        destroy region.
+
+        The `allocRes` parameter is optional and provides the result of the
+        corresponding allocation from the init region. This allows implementations
+        to inspect the allocation operation to determine the appropriate
+        deallocation strategy. This is necessary because in recipe generation,
+        the allocation and deallocation occur in separate regions. Dialects that
+        use only one allocation type or can determine deallocation from type
+        information alone may ignore this parameter.
 
-        Returns true if deallocation was successfully generated or successfully
-        deemed as not needed to be generated, false otherwise.
+        The `varType` parameter must be provided if the current type does not
+        capture the pointee type information. No deallocation is generated for
+        stack-based allocations (e.g., alloca).
+
+        Returns true if deallocation was successfully generated or determined to
+        be unnecessary, false otherwise.
       }],
       /*retTy=*/"bool",
       /*methodName=*/"genFree",
       /*args=*/(ins "::mlir::OpBuilder &":$builder,
                     "::mlir::Location":$loc,
-                    "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr,
+                    "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varToFree,
+                    "::mlir::Value":$allocRes,
                     "::mlir::Type":$varType),
       /*methodBody=*/"",
       /*defaultImplementation=*/[{
@@ -274,6 +294,14 @@ def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> {
         The `initVal` can be empty - it is primarily needed for reductions
         to ensure the variable is also initialized with appropriate value.
 
+        The `needsDestroy` out-parameter is set by implementations to indicate
+        that destruction code must be generated after the returned private
+        variable usages, typically in the destroy region of recipe operations
+        (for example, when heap allocations or temporaries requiring cleanup
+        are created during initialization). When `needsDestroy` is set, callers
+        should invoke `generatePrivateDestroy` in the recipe's destroy region
+        with the privatized value returned by this method.
+
         If the return value is empty, it means that recipe body was not
         successfully generated.
       }],
@@ -284,12 +312,38 @@ def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> {
                     "::mlir::TypedValue<::mlir::acc::MappableType>":$var,
                     "::llvm::StringRef":$varName,
                     "::mlir::ValueRange":$extents,
-                    "::mlir::Value":$initVal),
+                    "::mlir::Value":$initVal,
+                    "bool &":$needsDestroy),
       /*methodBody=*/"",
       /*defaultImplementation=*/[{
         return {};
       }]
     >,
+    InterfaceMethod<
+      /*description=*/[{
+        Generates destruction operations for a privatized value previously
+        produced by `generatePrivateInit`. This is typically inserted in a
+        recipe's destroy region, after all uses of the privatized value.
+
+        The `privatized` value is the SSA value yielded by the init region
+        (and passed as the privatized argument to the destroy region).
+        Implementations should free heap-allocated storage or perform any
+        cleanup required for the given type. If no destruction is required,
+        this function should be a no-op and return `true`.
+
+        Returns true if destruction was successfully generated or deemed not
+        necessary, false otherwise.
+      }],
+      /*retTy=*/"bool",
+      /*methodName=*/"generatePrivateDestroy",
+      /*args=*/(ins "::mlir::OpBuilder &":$builder,
+                    "::mlir::Location":$loc,
+                    "::mlir::Value":$privatized),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        return true;
+      }]
+    >,
   ];
 }
 
diff --git a/mlir/include/mlir/IR/Remarks.h b/mlir/include/mlir/IR/Remarks.h
index 20e84ec..9877926 100644
--- a/mlir/include/mlir/IR/Remarks.h
+++ b/mlir/include/mlir/IR/Remarks.h
@@ -18,7 +18,6 @@
 #include "llvm/Remarks/Remark.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Regex.h"
-#include <optional>
 
 #include "mlir/IR/Diagnostics.h"
 #include "mlir/IR/MLIRContext.h"
@@ -144,7 +143,7 @@ public:
 
   llvm::StringRef getCategoryName() const { return categoryName; }
 
-  llvm::StringRef getFullCategoryName() const {
+  llvm::StringRef getCombinedCategoryName() const {
     if (categoryName.empty() && subCategoryName.empty())
       return {};
     if (subCategoryName.empty())
@@ -318,7 +317,7 @@ private:
 };
 
 //===----------------------------------------------------------------------===//
-// MLIR Remark Streamer
+// Pluggable Remark Utilities
 //===----------------------------------------------------------------------===//
 
 /// Base class for MLIR remark streamers that is used to stream
@@ -338,6 +337,26 @@ public:
   virtual void finalize() {} // optional
 };
 
+using ReportFn = llvm::unique_function<void(const Remark &)>;
+
+/// Base class for MLIR remark emitting policies that is used to emit
+/// optimization remarks to the underlying remark streamer. The derived classes
+/// should implement the `reportRemark` method to provide the actual emitting
+/// implementation.
+class RemarkEmittingPolicyBase {
+protected:
+  ReportFn reportImpl;
+
+public:
+  RemarkEmittingPolicyBase() = default;
+  virtual ~RemarkEmittingPolicyBase() = default;
+
+  void initialize(ReportFn fn) { reportImpl = std::move(fn); }
+
+  virtual void reportRemark(const Remark &remark) = 0;
+  virtual void finalize() = 0;
+};
+
 //===----------------------------------------------------------------------===//
 // Remark Engine (MLIR Context will own this class)
 //===----------------------------------------------------------------------===//
@@ -355,6 +374,8 @@ private:
   std::optional<llvm::Regex> failedFilter;
   /// The MLIR remark streamer that will be used to emit the remarks.
   std::unique_ptr<MLIRRemarkStreamerBase> remarkStreamer;
+  /// The MLIR remark policy that will be used to emit the remarks.
+  std::unique_ptr<RemarkEmittingPolicyBase> remarkEmittingPolicy;
   /// When is enabled, engine also prints remarks as mlir::emitRemarks.
   bool printAsEmitRemarks = false;
 
@@ -392,6 +413,8 @@ private:
   InFlightRemark emitIfEnabled(Location loc, RemarkOpts opts,
                                bool (RemarkEngine::*isEnabled)(StringRef)
                                    const);
+  /// Report a remark.
+  void reportImpl(const Remark &remark);
 
 public:
   /// Default constructor is deleted, use the other constructor.
@@ -407,8 +430,15 @@ public:
   ~RemarkEngine();
 
   /// Setup the remark engine with the given output path and format.
-  LogicalResult initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer,
-                           std::string *errMsg);
+  LogicalResult
+  initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer,
+             std::unique_ptr<RemarkEmittingPolicyBase> remarkEmittingPolicy,
+             std::string *errMsg);
+
+  /// Get the remark emitting policy.
+  RemarkEmittingPolicyBase *getRemarkEmittingPolicy() const {
+    return remarkEmittingPolicy.get();
+  }
 
   /// Report a remark.
   void report(const Remark &&remark);
@@ -446,6 +476,46 @@ inline InFlightRemark withEngine(Fn fn, Location loc, Args &&...args) {
 
 namespace mlir::remark {
 
+//===----------------------------------------------------------------------===//
+// Remark Emitting Policies
+//===----------------------------------------------------------------------===//
+
+/// Policy that emits all remarks.
+class RemarkEmittingPolicyAll : public detail::RemarkEmittingPolicyBase {
+public:
+  RemarkEmittingPolicyAll();
+
+  void reportRemark(const detail::Remark &remark) override {
+    assert(reportImpl && "reportImpl is not set");
+    reportImpl(remark);
+  }
+  void finalize() override {}
+};
+
+/// Policy that emits final remarks.
+class RemarkEmittingPolicyFinal : public detail::RemarkEmittingPolicyBase {
+private:
+  /// user can intercept them for custom processing via a registered callback,
+  /// otherwise they will be reported on engine destruction.
+  llvm::DenseSet<detail::Remark> postponedRemarks;
+
+public:
+  RemarkEmittingPolicyFinal();
+
+  void reportRemark(const detail::Remark &remark) override {
+    postponedRemarks.erase(remark);
+    postponedRemarks.insert(remark);
+  }
+
+  void finalize() override {
+    assert(reportImpl && "reportImpl is not set");
+    for (auto &remark : postponedRemarks) {
+      if (reportImpl)
+        reportImpl(remark);
+    }
+  }
+};
+
 /// Create a Reason with llvm::formatv formatting.
 template <class... Ts>
 inline detail::LazyTextBuild reason(const char *fmt, Ts &&...ts) {
@@ -505,16 +575,72 @@ inline detail::InFlightRemark analysis(Location loc, RemarkOpts opts) {
 
 /// Setup remarks for the context. This function will enable the remark engine
 /// and set the streamer to be used for optimization remarks. The remark
-/// categories are used to filter the remarks that will be emitted by the remark
-/// engine. If a category is not specified, it will not be emitted. If
+/// categories are used to filter the remarks that will be emitted by the
+/// remark engine. If a category is not specified, it will not be emitted. If
 /// `printAsEmitRemarks` is true, the remarks will be printed as
 /// mlir::emitRemarks. 'streamer' must inherit from MLIRRemarkStreamerBase and
 /// will be used to stream the remarks.
 LogicalResult enableOptimizationRemarks(
     MLIRContext &ctx,
     std::unique_ptr<remark::detail::MLIRRemarkStreamerBase> streamer,
+    std::unique_ptr<remark::detail::RemarkEmittingPolicyBase>
+        remarkEmittingPolicy,
     const remark::RemarkCategories &cats, bool printAsEmitRemarks = false);
 
 } // namespace mlir::remark
 
+// DenseMapInfo specialization for Remark
+namespace llvm {
+template <>
+struct DenseMapInfo<mlir::remark::detail::Remark> {
+  static constexpr StringRef kEmptyKey = "<EMPTY_KEY>";
+  static constexpr StringRef kTombstoneKey = "<TOMBSTONE_KEY>";
+
+  /// Helper to provide a static dummy context for sentinel keys.
+  static mlir::MLIRContext *getStaticDummyContext() {
+    static mlir::MLIRContext dummyContext;
+    return &dummyContext;
+  }
+
+  /// Create an empty remark
+  static inline mlir::remark::detail::Remark getEmptyKey() {
+    return mlir::remark::detail::Remark(
+        mlir::remark::RemarkKind::RemarkUnknown, mlir::DiagnosticSeverity::Note,
+        mlir::UnknownLoc::get(getStaticDummyContext()),
+        mlir::remark::RemarkOpts::name(kEmptyKey));
+  }
+
+  /// Create a dead remark
+  static inline mlir::remark::detail::Remark getTombstoneKey() {
+    return mlir::remark::detail::Remark(
+        mlir::remark::RemarkKind::RemarkUnknown, mlir::DiagnosticSeverity::Note,
+        mlir::UnknownLoc::get(getStaticDummyContext()),
+        mlir::remark::RemarkOpts::name(kTombstoneKey));
+  }
+
+  /// Compute the hash value of the remark
+  static unsigned getHashValue(const mlir::remark::detail::Remark &remark) {
+    return llvm::hash_combine(
+        remark.getLocation().getAsOpaquePointer(),
+        llvm::hash_value(remark.getRemarkName()),
+        llvm::hash_value(remark.getCombinedCategoryName()));
+  }
+
+  static bool isEqual(const mlir::remark::detail::Remark &lhs,
+                      const mlir::remark::detail::Remark &rhs) {
+    // Check for empty/tombstone keys first
+    if (lhs.getRemarkName() == kEmptyKey ||
+        lhs.getRemarkName() == kTombstoneKey ||
+        rhs.getRemarkName() == kEmptyKey ||
+        rhs.getRemarkName() == kTombstoneKey) {
+      return lhs.getRemarkName() == rhs.getRemarkName();
+    }
+
+    // For regular remarks, compare key identifying fields
+    return lhs.getLocation() == rhs.getLocation() &&
+           lhs.getRemarkName() == rhs.getRemarkName() &&
+           lhs.getCombinedCategoryName() == rhs.getCombinedCategoryName();
+  }
+};
+} // namespace llvm
 #endif // MLIR_IR_REMARKS_H
diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt
index a5feb59..72ed046 100644
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@@ -6,6 +6,7 @@ add_mlir_interface(DestinationStyleOpInterface)
 add_mlir_interface(FunctionInterfaces)
 add_mlir_interface(IndexingMapOpInterface)
 add_mlir_interface(InferIntRangeInterface)
+add_mlir_interface(InferStridedMetadataInterface)
 add_mlir_interface(InferTypeOpInterface)
 add_mlir_interface(LoopLikeInterface)
 add_mlir_interface(MemOpInterfaces)
diff --git a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h
index 0e107e8..a6de3d1 100644
--- a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h
+++ b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h
@@ -117,7 +117,8 @@ public:
   IntegerValueRange(ConstantIntRanges value) : value(std::move(value)) {}
 
   /// Create an integer value range lattice value.
-  IntegerValueRange(std::optional<ConstantIntRanges> value = std::nullopt)
+  explicit IntegerValueRange(
+      std::optional<ConstantIntRanges> value = std::nullopt)
       : value(std::move(value)) {}
 
   /// Whether the range is uninitialized. This happens when the state hasn't
@@ -167,6 +168,15 @@ using SetIntRangeFn =
 using SetIntLatticeFn =
     llvm::function_ref<void(Value, const IntegerValueRange &)>;
 
+/// Helper callback type to get the integer range of a value.
+using GetIntRangeFn = function_ref<IntegerValueRange(Value)>;
+
+/// Helper function to collect the integer range values of an array of op fold
+/// results.
+SmallVector<IntegerValueRange> getIntValueRanges(ArrayRef<OpFoldResult> values,
+                                                 GetIntRangeFn getIntRange,
+                                                 int32_t indexBitwidth);
+
 class InferIntRangeInterface;
 
 namespace intrange::detail {
diff --git a/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h
new file mode 100644
index 0000000..0c572e0
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h
@@ -0,0 +1,145 @@
+//===- InferStridedMetadataInterface.h - Strided Metadata Inference -C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions of the strided metadata inference interface
+// defined in `InferStridedMetadataInterface.td`
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE_H
+#define MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE_H
+
+#include "mlir/Interfaces/InferIntRangeInterface.h"
+
+namespace mlir {
+/// A class that represents the strided metadata range information, including
+/// offsets, sizes, and strides as integer ranges.
+class StridedMetadataRange {
+public:
+  /// Default constructor creates uninitialized ranges.
+  StridedMetadataRange() = default;
+
+  /// Returns a ranked strided metadata range.
+  static StridedMetadataRange
+  getRanked(SmallVectorImpl<ConstantIntRanges> &&offsets,
+            SmallVectorImpl<ConstantIntRanges> &&sizes,
+            SmallVectorImpl<ConstantIntRanges> &&strides) {
+    return StridedMetadataRange(std::move(offsets), std::move(sizes),
+                                std::move(strides));
+  }
+
+  /// Returns a strided metadata range with maximum ranges.
+  static StridedMetadataRange getMaxRanges(int32_t indexBitwidth,
+                                           int32_t offsetsRank,
+                                           int32_t sizeRank,
+                                           int32_t stridedRank) {
+    return StridedMetadataRange(
+        SmallVector<ConstantIntRanges>(
+            offsetsRank, ConstantIntRanges::maxRange(indexBitwidth)),
+        SmallVector<ConstantIntRanges>(
+            sizeRank, ConstantIntRanges::maxRange(indexBitwidth)),
+        SmallVector<ConstantIntRanges>(
+            stridedRank, ConstantIntRanges::maxRange(indexBitwidth)));
+  }
+
+  static StridedMetadataRange getMaxRanges(int32_t indexBitwidth,
+                                           int32_t rank) {
+    return getMaxRanges(indexBitwidth, 1, rank, rank);
+  }
+
+  /// Returns whether the metadata is uninitialized.
+  bool isUninitialized() const { return !offsets.has_value(); }
+
+  /// Get the offsets range.
+  ArrayRef<ConstantIntRanges> getOffsets() const {
+    return offsets ? *offsets : ArrayRef<ConstantIntRanges>();
+  }
+  MutableArrayRef<ConstantIntRanges> getOffsets() {
+    return offsets ? *offsets : MutableArrayRef<ConstantIntRanges>();
+  }
+
+  /// Get the sizes ranges.
+  ArrayRef<ConstantIntRanges> getSizes() const { return sizes; }
+  MutableArrayRef<ConstantIntRanges> getSizes() { return sizes; }
+
+  /// Get the strides ranges.
+  ArrayRef<ConstantIntRanges> getStrides() const { return strides; }
+  MutableArrayRef<ConstantIntRanges> getStrides() { return strides; }
+
+  /// Compare two strided metadata ranges.
+  bool operator==(const StridedMetadataRange &other) const {
+    return offsets == other.offsets && sizes == other.sizes &&
+           strides == other.strides;
+  }
+
+  /// Print the strided metadata range.
+  void print(raw_ostream &os) const;
+
+  /// Join two strided metadata ranges, by taking the element-wise union of the
+  /// metadata.
+  static StridedMetadataRange join(const StridedMetadataRange &lhs,
+                                   const StridedMetadataRange &rhs) {
+    if (lhs.isUninitialized())
+      return rhs;
+    if (rhs.isUninitialized())
+      return lhs;
+
+    // Helper fuction to compute the range union of constant ranges.
+    auto rangeUnion =
+        +[](const std::tuple<ConstantIntRanges, ConstantIntRanges> &lhsRhs)
+        -> ConstantIntRanges {
+      return std::get<0>(lhsRhs).rangeUnion(std::get<1>(lhsRhs));
+    };
+
+    // Get the elementwise range union. Note, that `zip_equal` will assert if
+    // sizes are not equal.
+    SmallVector<ConstantIntRanges> offsets = llvm::map_to_vector(
+        llvm::zip_equal(*lhs.offsets, *rhs.offsets), rangeUnion);
+    SmallVector<ConstantIntRanges> sizes =
+        llvm::map_to_vector(llvm::zip_equal(lhs.sizes, rhs.sizes), rangeUnion);
+    SmallVector<ConstantIntRanges> strides = llvm::map_to_vector(
+        llvm::zip_equal(lhs.strides, rhs.strides), rangeUnion);
+
+    // Return the joined metadata.
+    return StridedMetadataRange(std::move(offsets), std::move(sizes),
+                                std::move(strides));
+  }
+
+private:
+  /// Create a strided metadata range with the given offset, sizes, and strides.
+  StridedMetadataRange(SmallVectorImpl<ConstantIntRanges> &&offsets,
+                       SmallVectorImpl<ConstantIntRanges> &&sizes,
+                       SmallVectorImpl<ConstantIntRanges> &&strides)
+      : offsets(std::move(offsets)), sizes(std::move(sizes)),
+        strides(std::move(strides)) {}
+
+  /// The offsets range.
+  std::optional<SmallVector<ConstantIntRanges>> offsets;
+
+  /// The sizes ranges.
+  SmallVector<ConstantIntRanges> sizes;
+
+  /// The strides ranges.
+  SmallVector<ConstantIntRanges> strides;
+};
+
+/// Print the strided metadata to `os`.
+inline raw_ostream &operator<<(raw_ostream &os,
+                               const StridedMetadataRange &range) {
+  range.print(os);
+  return os;
+}
+
+/// Callback function type for setting the strided metadata of a value.
+using SetStridedMetadataRangeFn =
+    function_ref<void(Value, const StridedMetadataRange &)>;
+} // end namespace mlir
+
+#include "mlir/Interfaces/InferStridedMetadataInterface.h.inc"
+
+#endif // MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE_H
diff --git a/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td
new file mode 100644
index 0000000..ee5b094
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td
@@ -0,0 +1,45 @@
+//===- InferStridedMetadataInterface.td - Strided MD Inference ----------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the interface for strided metadata range analysis
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE
+#define MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE
+
+include "mlir/IR/OpBase.td"
+
+def InferStridedMetadataOpInterface :
+    OpInterface<"InferStridedMetadataOpInterface"> {
+  let description = [{
+    Allows operations to participate in strided metadata analysis by providing
+    methods that allow them to specify bounds on offsets, sizes, and strides
+    of their result(s) given bounds on their input(s) if known.
+  }];
+  let cppNamespace = "::mlir";
+
+  let methods = [
+    InterfaceMethod<[{
+      Infer the strided metadata bounds on the results of this op given
+      the bounds on its operands.
+      For each result value or block argument of interest, the method should
+      call `setMetadata` with that `Value` as an argument.
+      The `operands` parameter contains the strided metadata ranges for all the
+      operands of the operation in order.
+      The `getIntRange` callback is provided for obtaining the int-range
+      analysis result for a given value.
+    }],
+    "void", "inferStridedMetadataRanges",
+    (ins "::llvm::ArrayRef<::mlir::StridedMetadataRange>":$operands,
+         "::mlir::GetIntRangeFn":$getIntRange,
+         "::mlir::SetStridedMetadataRangeFn":$setMetadata,
+         "int32_t":$indexBitwidth)>
+  ];
+}
+#endif // MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE
diff --git a/mlir/include/mlir/Remark/RemarkStreamer.h b/mlir/include/mlir/Remark/RemarkStreamer.h
index 170d6b4..19a70fa 100644
--- a/mlir/include/mlir/Remark/RemarkStreamer.h
+++ b/mlir/include/mlir/Remark/RemarkStreamer.h
@@ -45,6 +45,7 @@ namespace mlir::remark {
 /// mlir::emitRemarks.
 LogicalResult enableOptimizationRemarksWithLLVMStreamer(
     MLIRContext &ctx, StringRef filePath, llvm::remarks::Format fmt,
+    std::unique_ptr<detail::RemarkEmittingPolicyBase> remarkEmittingPolicy,
     const RemarkCategories &cat, bool printAsEmitRemarks = false);
 
 } // namespace mlir::remark
diff --git a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
index 0fbe15f..b739438 100644
--- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
+++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
@@ -44,6 +44,11 @@ enum class RemarkFormat {
   REMARK_FORMAT_BITSTREAM,
 };
 
+enum class RemarkPolicy {
+  REMARK_POLICY_ALL,
+  REMARK_POLICY_FINAL,
+};
+
 /// Configuration options for the mlir-opt tool.
 /// This is intended to help building tools like mlir-opt by collecting the
 /// supported options.
@@ -242,6 +247,8 @@ public:
 
   /// Set the reproducer output filename
   RemarkFormat getRemarkFormat() const { return remarkFormatFlag; }
+  /// Set the remark policy to use.
+  RemarkPolicy getRemarkPolicy() const { return remarkPolicyFlag; }
   /// Set the remark format to use.
   std::string getRemarksAllFilter() const { return remarksAllFilterFlag; }
   /// Set the remark output file.
@@ -265,6 +272,8 @@ protected:
 
   /// Remark format
   RemarkFormat remarkFormatFlag = RemarkFormat::REMARK_FORMAT_STDOUT;
+  /// Remark policy
+  RemarkPolicy remarkPolicyFlag = RemarkPolicy::REMARK_POLICY_ALL;
   /// Remark file to output to
   std::string remarksOutputFileFlag = "";
   /// Remark filters
diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt
index 609cb34..db10ebc 100644
--- a/mlir/lib/Analysis/CMakeLists.txt
+++ b/mlir/lib/Analysis/CMakeLists.txt
@@ -40,6 +40,7 @@ add_mlir_library(MLIRAnalysis
   DataFlow/IntegerRangeAnalysis.cpp
   DataFlow/LivenessAnalysis.cpp
   DataFlow/SparseAnalysis.cpp
+  DataFlow/StridedMetadataRangeAnalysis.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Analysis
@@ -53,6 +54,7 @@ add_mlir_library(MLIRAnalysis
   MLIRDataLayoutInterfaces
   MLIRFunctionInterfaces
   MLIRInferIntRangeInterface
+  MLIRInferStridedMetadataInterface
   MLIRInferTypeOpInterface
   MLIRLoopLikeInterface
   MLIRPresburger
diff --git a/mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp
new file mode 100644
index 0000000..01c9daf
--- /dev/null
+++ b/mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp
@@ -0,0 +1,127 @@
+//===- StridedMetadataRangeAnalysis.cpp - Integer range analysis --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the dataflow analysis class for integer range inference
+// which is used in transformations over the `arith` dialect such as
+// branch elimination or signed->unsigned rewriting
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h"
+#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
+#include "mlir/Dialect/Utils/IndexingUtils.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Support/DebugStringHelper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
+
+#define DEBUG_TYPE "strided-metadata-range-analysis"
+
+using namespace mlir;
+using namespace mlir::dataflow;
+
+/// Get the entry state for a value. For any value that is not a ranked memref,
+/// this function sets the metadata to a top state with no offsets, sizes, or
+/// strides. For `memref` types, this function will use the metadata in the type
+/// to try to deduce as much informaiton as possible.
+static StridedMetadataRange getEntryStateImpl(Value v, int32_t indexBitwidth) {
+  // TODO: generalize this method with a type interface.
+  auto mTy = dyn_cast<BaseMemRefType>(v.getType());
+
+  // If not a memref or it's un-ranked, don't infer any metadata.
+  if (!mTy || !mTy.hasRank())
+    return StridedMetadataRange::getMaxRanges(indexBitwidth, 0, 0, 0);
+
+  // Get the top state.
+  auto metadata =
+      StridedMetadataRange::getMaxRanges(indexBitwidth, mTy.getRank());
+
+  // Compute the offset and strides.
+  int64_t offset;
+  SmallVector<int64_t> strides;
+  if (failed(cast<MemRefType>(mTy).getStridesAndOffset(strides, offset)))
+    return metadata;
+
+  // Refine the metadata if we know it from the type.
+  if (!ShapedType::isDynamic(offset)) {
+    metadata.getOffsets()[0] =
+        ConstantIntRanges::constant(APInt(indexBitwidth, offset));
+  }
+  for (auto &&[size, range] :
+       llvm::zip_equal(mTy.getShape(), metadata.getSizes())) {
+    if (ShapedType::isDynamic(size))
+      continue;
+    range = ConstantIntRanges::constant(APInt(indexBitwidth, size));
+  }
+  for (auto &&[stride, range] :
+       llvm::zip_equal(strides, metadata.getStrides())) {
+    if (ShapedType::isDynamic(stride))
+      continue;
+    range = ConstantIntRanges::constant(APInt(indexBitwidth, stride));
+  }
+
+  return metadata;
+}
+
+StridedMetadataRangeAnalysis::StridedMetadataRangeAnalysis(
+    DataFlowSolver &solver, int32_t indexBitwidth)
+    : SparseForwardDataFlowAnalysis(solver), indexBitwidth(indexBitwidth) {
+  assert(indexBitwidth > 0 && "invalid bitwidth");
+}
+
+void StridedMetadataRangeAnalysis::setToEntryState(
+    StridedMetadataRangeLattice *lattice) {
+  propagateIfChanged(lattice, lattice->join(getEntryStateImpl(
+                                  lattice->getAnchor(), indexBitwidth)));
+}
+
+LogicalResult StridedMetadataRangeAnalysis::visitOperation(
+    Operation *op, ArrayRef<const StridedMetadataRangeLattice *> operands,
+    ArrayRef<StridedMetadataRangeLattice *> results) {
+  auto inferrable = dyn_cast<InferStridedMetadataOpInterface>(op);
+
+  // Bail if we cannot reason about the op.
+  if (!inferrable) {
+    setAllToEntryStates(results);
+    return success();
+  }
+
+  LDBG() << "Inferring metadata for: "
+         << OpWithFlags(op, OpPrintingFlags().skipRegions());
+
+  // Helper function to retrieve int range values.
+  auto getIntRange = [&](Value value) -> IntegerValueRange {
+    auto lattice = getOrCreateFor<IntegerValueRangeLattice>(
+        getProgramPointAfter(op), value);
+    return lattice ? lattice->getValue() : IntegerValueRange();
+  };
+
+  // Convert the arguments lattices to a vector.
+  SmallVector<StridedMetadataRange> argRanges = llvm::map_to_vector(
+      operands, [](const StridedMetadataRangeLattice *lattice) {
+        return lattice->getValue();
+      });
+
+  // Callback to set metadata on a result.
+  auto joinCallback = [&](Value v, const StridedMetadataRange &md) {
+    auto result = cast<OpResult>(v);
+    assert(llvm::is_contained(op->getResults(), result));
+    LDBG() << "- Inferred metadata: " << md;
+    StridedMetadataRangeLattice *lattice = results[result.getResultNumber()];
+    ChangeResult changed = lattice->join(md);
+    LDBG() << "- Joined metadata: " << lattice->getValue();
+    propagateIfChanged(lattice, changed);
+  };
+
+  // Infer the metadata.
+  inferrable.inferStridedMetadataRanges(argRanges, getIntRange, joinCallback,
+                                        indexBitwidth);
+  return success();
+}
diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
index 71986f8..bebf1b8 100644
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -40,6 +40,7 @@ add_subdirectory(MathToLibm)
 add_subdirectory(MathToLLVM)
 add_subdirectory(MathToROCDL)
 add_subdirectory(MathToSPIRV)
+add_subdirectory(MathToXeVM)
 add_subdirectory(MemRefToEmitC)
 add_subdirectory(MemRefToLLVM)
 add_subdirectory(MemRefToSPIRV)
diff --git a/mlir/lib/Conversion/MathToXeVM/CMakeLists.txt b/mlir/lib/Conversion/MathToXeVM/CMakeLists.txt
new file mode 100644
index 0000000..050c0ed
--- /dev/null
+++ b/mlir/lib/Conversion/MathToXeVM/CMakeLists.txt
@@ -0,0 +1,22 @@
+add_mlir_conversion_library(MLIRMathToXeVM
+  MathToXeVM.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/MathToXeVM
+
+  DEPENDS
+  MLIRConversionPassIncGen
+
+  LINK_COMPONENTS
+  Core
+
+  LINK_LIBS PUBLIC
+  MLIRArithAttrToLLVMConversion
+  MLIRArithDialect
+  MLIRLLVMCommonConversion
+  MLIRLLVMDialect
+  MLIRMathDialect
+  MLIRXeVMDialect
+  MLIRPass
+  MLIRTransforms
+  )
diff --git a/mlir/lib/Conversion/MathToXeVM/MathToXeVM.cpp b/mlir/lib/Conversion/MathToXeVM/MathToXeVM.cpp
new file mode 100644
index 0000000..0fe31d0
--- /dev/null
+++ b/mlir/lib/Conversion/MathToXeVM/MathToXeVM.cpp
@@ -0,0 +1,167 @@
+//===-- MathToXeVM.cpp - conversion from Math to XeVM ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/MathToXeVM/MathToXeVM.h"
+#include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h"
+#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/Support/FormatVariadic.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_CONVERTMATHTOXEVM
+#include "mlir/Conversion/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+
+#define DEBUG_TYPE "math-to-xevm"
+
+/// Convert math ops marked with `fast` (`afn`) to native OpenCL intrinsics.
+template <typename Op>
+struct ConvertNativeFuncPattern final : public OpConversionPattern<Op> {
+
+  ConvertNativeFuncPattern(MLIRContext *context, StringRef nativeFunc,
+                           PatternBenefit benefit = 1)
+      : OpConversionPattern<Op>(context, benefit), nativeFunc(nativeFunc) {}
+
+  LogicalResult
+  matchAndRewrite(Op op, typename Op::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!isSPIRVCompatibleFloatOrVec(op.getType()))
+      return failure();
+
+    arith::FastMathFlags fastFlags = op.getFastmath();
+    if (!arith::bitEnumContainsAll(fastFlags, arith::FastMathFlags::afn))
+      return rewriter.notifyMatchFailure(op, "not a fastmath `afn` operation");
+
+    SmallVector<Type, 1> operandTypes;
+    for (auto operand : adaptor.getOperands()) {
+      Type opTy = operand.getType();
+      // This pass only supports operations on vectors that are already in SPIRV
+      // supported vector sizes: Distributing unsupported vector sizes to SPIRV
+      // supported vector sizes are done in other blocking optimization passes.
+      if (!isSPIRVCompatibleFloatOrVec(opTy))
+        return rewriter.notifyMatchFailure(
+            op, llvm::formatv("incompatible operand type: '{0}'", opTy));
+      operandTypes.push_back(opTy);
+    }
+
+    auto moduleOp = op->template getParentWithTrait<OpTrait::SymbolTable>();
+    auto funcOpRes = LLVM::lookupOrCreateFn(
+        rewriter, moduleOp, getMangledNativeFuncName(operandTypes),
+        operandTypes, op.getType());
+    assert(!failed(funcOpRes));
+    LLVM::LLVMFuncOp funcOp = funcOpRes.value();
+
+    auto callOp = rewriter.replaceOpWithNewOp<LLVM::CallOp>(
+        op, funcOp, adaptor.getOperands());
+    // Preserve fastmath flags in our MLIR op when converting to llvm function
+    // calls, in order to allow further fastmath optimizations: We thus need to
+    // convert arith fastmath attrs into attrs recognized by llvm.
+    arith::AttrConvertFastMathToLLVM<Op, LLVM::CallOp> fastAttrConverter(op);
+    mlir::NamedAttribute fastAttr = fastAttrConverter.getAttrs()[0];
+    callOp->setAttr(fastAttr.getName(), fastAttr.getValue());
+    return success();
+  }
+
+  inline bool isSPIRVCompatibleFloatOrVec(Type type) const {
+    if (type.isFloat())
+      return true;
+    if (auto vecType = dyn_cast<VectorType>(type)) {
+      if (!vecType.getElementType().isFloat())
+        return false;
+      // SPIRV distinguishes between vectors and matrices: OpenCL native math
+      // intrsinics are not compatible with matrices.
+      ArrayRef<int64_t> shape = vecType.getShape();
+      if (shape.size() != 1)
+        return false;
+      // SPIRV only allows vectors of size 2, 3, 4, 8, 16.
+      if (shape[0] == 2 || shape[0] == 3 || shape[0] == 4 || shape[0] == 8 ||
+          shape[0] == 16)
+        return true;
+    }
+    return false;
+  }
+
+  inline std::string
+  getMangledNativeFuncName(const ArrayRef<Type> operandTypes) const {
+    std::string mangledFuncName =
+        "_Z" + std::to_string(nativeFunc.size()) + nativeFunc.str();
+
+    auto appendFloatToMangledFunc = [&mangledFuncName](Type type) {
+      if (type.isF32())
+        mangledFuncName += "f";
+      else if (type.isF16())
+        mangledFuncName += "Dh";
+      else if (type.isF64())
+        mangledFuncName += "d";
+    };
+
+    for (auto type : operandTypes) {
+      if (auto vecType = dyn_cast<VectorType>(type)) {
+        mangledFuncName += "Dv" + std::to_string(vecType.getShape()[0]) + "_";
+        appendFloatToMangledFunc(vecType.getElementType());
+      } else
+        appendFloatToMangledFunc(type);
+    }
+
+    return mangledFuncName;
+  }
+
+  const StringRef nativeFunc;
+};
+
+void mlir::populateMathToXeVMConversionPatterns(RewritePatternSet &patterns,
+                                                bool convertArith) {
+  patterns.add<ConvertNativeFuncPattern<math::ExpOp>>(patterns.getContext(),
+                                                      "__spirv_ocl_native_exp");
+  patterns.add<ConvertNativeFuncPattern<math::CosOp>>(patterns.getContext(),
+                                                      "__spirv_ocl_native_cos");
+  patterns.add<ConvertNativeFuncPattern<math::Exp2Op>>(
+      patterns.getContext(), "__spirv_ocl_native_exp2");
+  patterns.add<ConvertNativeFuncPattern<math::LogOp>>(patterns.getContext(),
+                                                      "__spirv_ocl_native_log");
+  patterns.add<ConvertNativeFuncPattern<math::Log2Op>>(
+      patterns.getContext(), "__spirv_ocl_native_log2");
+  patterns.add<ConvertNativeFuncPattern<math::Log10Op>>(
+      patterns.getContext(), "__spirv_ocl_native_log10");
+  patterns.add<ConvertNativeFuncPattern<math::PowFOp>>(
+      patterns.getContext(), "__spirv_ocl_native_powr");
+  patterns.add<ConvertNativeFuncPattern<math::RsqrtOp>>(
+      patterns.getContext(), "__spirv_ocl_native_rsqrt");
+  patterns.add<ConvertNativeFuncPattern<math::SinOp>>(patterns.getContext(),
+                                                      "__spirv_ocl_native_sin");
+  patterns.add<ConvertNativeFuncPattern<math::SqrtOp>>(
+      patterns.getContext(), "__spirv_ocl_native_sqrt");
+  patterns.add<ConvertNativeFuncPattern<math::TanOp>>(patterns.getContext(),
+                                                      "__spirv_ocl_native_tan");
+  if (convertArith)
+    patterns.add<ConvertNativeFuncPattern<arith::DivFOp>>(
+        patterns.getContext(), "__spirv_ocl_native_divide");
+}
+
+namespace {
+struct ConvertMathToXeVMPass
+    : public impl::ConvertMathToXeVMBase<ConvertMathToXeVMPass> {
+  using Base::Base;
+  void runOnOperation() override;
+};
+} // namespace
+
+void ConvertMathToXeVMPass::runOnOperation() {
+  RewritePatternSet patterns(&getContext());
+  populateMathToXeVMConversionPatterns(patterns, convertArith);
+  ConversionTarget target(getContext());
+  target.addLegalDialect<BuiltinDialect, LLVM::LLVMDialect>();
+  if (failed(
+          applyPartialConversion(getOperation(), target, std::move(patterns))))
+    signalPassFailure();
+}
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
index a5336ed..00df14b1 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -1392,6 +1392,137 @@ public:
   }
 };
 
+// Collapse tensor<1xiN> into tensor<iN>
+// E.g. tensor.collapse_shape %arg1 [] : tensor<1xi16> into tensor<i16>
+static Value collapse1xNTensorToN(PatternRewriter &rewriter, Value input,
+                                  Location loc) {
+  SmallVector<ReassociationExprs, 1> reassociation;
+  // Create the collapsed type
+  auto inputType = cast<RankedTensorType>(input.getType());
+  auto elemType = inputType.getElementType();
+  auto collapsedType = RankedTensorType::get({}, elemType);
+  // Emit the collapse op
+  return rewriter.create<tensor::CollapseShapeOp>(loc, collapsedType, input,
+                                                  reassociation);
+}
+
+static llvm::SmallVector<int8_t>
+convertToI8(const llvm::SmallVector<int32_t> &input) {
+  llvm::SmallVector<int8_t> output;
+  output.reserve(input.size());
+
+  for (auto v : llvm::map_range(
+           input, [](int32_t val) { return static_cast<int8_t>(val); })) {
+    output.push_back(v);
+  }
+  return output;
+}
+
+// The shift or multiplier may be either constant or non-constant, depending on
+// whether dynamic extension is enabled.
+// - If the shift or multiplier is non-constant, add it as an input to
+// linalg::GenericOp by:
+//     1. Pushing it into 'genericInputs'.
+//     2. Appending a corresponding affine map to 'indexingMaps'.
+// - If the shift or multiplier is constant, set 'constant' instead.
+static void setupLinalgGenericOpInputAndIndexingMap(
+    PatternRewriter &rewriter, llvm::SmallVector<int32_t> &values,
+    SmallVector<Value, 4> &genericInputs, SmallVector<AffineMap> &indexingMaps,
+    bool isConstant, tosa::RescaleOp op, Value &constant, int64_t &arg,
+    bool isShift = false) {
+
+  auto loc = op.getLoc();
+  auto inputTy = cast<ShapedType>(op.getInput().getType());
+  unsigned rank = inputTy.getRank();
+  SmallVector<AffineExpr, 2> exprs = {rewriter.getAffineDimExpr(rank - 1)};
+
+  if (isConstant) {
+    // If we are rescaling per-channel then we need to store the
+    // values in a buffer.
+    if (values.size() == 1) {
+      IntegerAttr intAttr = isShift
+                                ? rewriter.getI8IntegerAttr(values.front())
+                                : rewriter.getI32IntegerAttr(values.front());
+      constant = rewriter.create<arith::ConstantOp>(loc, intAttr);
+    } else {
+      auto elementType =
+          isShift ? rewriter.getIntegerType(8) : rewriter.getI32Type();
+      auto tensorType = RankedTensorType::get(
+          {static_cast<int64_t>(values.size())}, elementType);
+      DenseIntElementsAttr EltAttr;
+      if (isShift)
+        EltAttr = DenseIntElementsAttr::get(tensorType, convertToI8(values));
+      else
+        EltAttr = DenseIntElementsAttr::get(tensorType, values);
+      genericInputs.push_back(
+          arith::ConstantOp::create(rewriter, loc, EltAttr));
+      indexingMaps.push_back(AffineMap::get(/*dimCount=*/rank,
+                                            /*symbolCount=*/0, exprs,
+                                            rewriter.getContext()));
+    }
+  } else {
+    // If we are not rescaling per-channel then we need to collapse 1xN to N
+    // and push broadcastMap.
+    auto operand = isShift ? op.getShift() : op.getMultiplier();
+    auto tensorType = dyn_cast<RankedTensorType>(operand.getType());
+    if (tensorType && tensorType.hasStaticShape() &&
+        tensorType.getShape()[0] == 1) {
+      // broadcastMap = affine_map<(d0, d1) -> ()>
+      // It would affect as broadcast for scalar values in linalg::GenericOp.
+      AffineMap broadcastMap =
+          AffineMap::get(rank, 0, {}, rewriter.getContext());
+      genericInputs.push_back(collapse1xNTensorToN(rewriter, operand, loc));
+      indexingMaps.push_back(broadcastMap);
+    } else {
+      genericInputs.push_back(operand);
+      indexingMaps.push_back(AffineMap::get(/*dimCount=*/rank,
+                                            /*symbolCount=*/0, exprs,
+                                            rewriter.getContext()));
+    }
+  }
+  arg = indexingMaps.size() - 1;
+}
+
+// Return the extended Zp to be used in subsequent arithmetic operations.
+static Value getExtendZp(OpBuilder &builder, Type valueTy,
+                         FailureOr<int64_t> maybeZp, Location loc,
+                         ValueRange blockArgs, int64_t zpArg,
+                         bool isOutputZp = false) {
+  Value result;
+  const int32_t bitwidth = valueTy.getIntOrFloatBitWidth();
+  const uint32_t attrBitwidth =
+      isOutputZp ? 32 : (bitwidth > 32 ? bitwidth : 32);
+  auto extendType = builder.getIntegerType(attrBitwidth);
+  // The Zp value can be either constant or non-constant, depending on
+  // whether dynamic extension is enabled.
+  // If 'maybeZp' fails, it indicates that Zp is non-constant and will
+  // be passed as an input to linalg::GenericOp.
+  if (failed(maybeZp)) {
+    result = blockArgs[zpArg];
+    auto zpTy = result.getType();
+    if (zpTy.getIntOrFloatBitWidth() < attrBitwidth) {
+      // For ExtUIOp, the input must be signless.
+      // UnrealizedConversionCastOp will cast the input to signless type.
+      if (zpTy.isUnsignedInteger()) {
+        result =
+            UnrealizedConversionCastOp::create(
+                builder, loc,
+                builder.getIntegerType(zpTy.getIntOrFloatBitWidth()), result)
+                .getResult(0);
+      }
+      if (zpTy.isUnsignedInteger()) {
+        return builder.create<arith::ExtUIOp>(loc, extendType, result);
+      } else {
+        return builder.create<arith::ExtSIOp>(loc, extendType, result);
+      }
+    }
+  } else {
+    return builder.create<arith::ConstantOp>(
+        loc, IntegerAttr::get(extendType, *maybeZp));
+  }
+  return result;
+}
+
 class RescaleConverter : public OpRewritePattern<tosa::RescaleOp> {
 public:
   using OpRewritePattern<tosa::RescaleOp>::OpRewritePattern;
@@ -1423,40 +1554,46 @@ public:
       }
     }
 
-    // The shift and multiplier values.
     DenseElementsAttr shiftElems;
-    if (!matchPattern(op.getShift(), m_Constant(&shiftElems)))
-      return rewriter.notifyMatchFailure(
-          op, "tosa.rescale requires constant shift input values");
+    bool isShiftConstant = false;
+    if (matchPattern(op.getShift(), m_Constant(&shiftElems)))
+      isShiftConstant = true;
 
     DenseElementsAttr multiplierElems;
-    if (!matchPattern(op.getMultiplier(), m_Constant(&multiplierElems)))
-      return rewriter.notifyMatchFailure(
-          op, "tosa.rescale requires constant multiplier input values");
-
-    llvm::SmallVector<int8_t> shiftValues =
-        llvm::to_vector(shiftElems.getValues<int8_t>());
-    // explicit cast is required here
-    llvm::SmallVector<int32_t> multiplierValues = llvm::to_vector(
-        llvm::map_range(multiplierElems.getValues<IntegerAttr>(),
-                        [](IntegerAttr attr) -> int32_t {
-                          return static_cast<int32_t>(attr.getInt());
-                        }));
-
-    // If we shift by more than the bitwidth, this just sets to 0.
-    for (int i = 0, s = multiplierValues.size(); i < s; i++) {
-      if (shiftValues[i] > 63) {
-        shiftValues[i] = 0;
-        multiplierValues[i] = 0;
+    bool isMultiplierConstant = false;
+    if (matchPattern(op.getMultiplier(), m_Constant(&multiplierElems)))
+      isMultiplierConstant = true;
+
+    llvm::SmallVector<int32_t> shiftValues;
+    llvm::SmallVector<int32_t> multiplierValues;
+    bool doubleRound;
+
+    if (isMultiplierConstant && isShiftConstant) {
+      // explicit cast is required here
+      shiftValues = llvm::to_vector(llvm::map_range(
+          shiftElems.getValues<IntegerAttr>(), [](IntegerAttr attr) -> int32_t {
+            return static_cast<int32_t>(attr.getInt());
+          }));
+      multiplierValues = llvm::to_vector(
+          llvm::map_range(multiplierElems.getValues<IntegerAttr>(),
+                          [](IntegerAttr attr) -> int32_t {
+                            return static_cast<int32_t>(attr.getInt());
+                          }));
+
+      // If we shift by more than the bitwidth, this just sets to 0.
+      for (int i = 0, s = multiplierValues.size(); i < s; i++) {
+        if (shiftValues[i] > 63) {
+          shiftValues[i] = 0;
+          multiplierValues[i] = 0;
+        }
       }
-    }
+      // Double round only occurs if shift is greater than 31, check that this
+      // is ever true.
+      doubleRound = op.getRoundingMode() == RoundingMode::DOUBLE_ROUND &&
+                    llvm::any_of(shiftValues, [](int32_t v) { return v > 31; });
+    } else
+      doubleRound = op.getRoundingMode() == RoundingMode::DOUBLE_ROUND;
 
-    // Double round only occurs if shift is greater than 31, check that this
-    // is ever true.
-
-    bool doubleRound =
-        op.getRoundingMode() == RoundingMode::DOUBLE_ROUND &&
-        llvm::any_of(shiftValues, [](int32_t v) { return v > 31; });
     RoundingMode roundingMode =
         doubleRound ? RoundingMode::DOUBLE_ROUND : RoundingMode::SINGLE_ROUND;
 
@@ -1468,45 +1605,43 @@ public:
     // values in a buffer.
     Value multiplierConstant;
     int64_t multiplierArg = 0;
-    if (multiplierValues.size() == 1) {
-      multiplierConstant = arith::ConstantOp::create(
-          rewriter, loc, rewriter.getI32IntegerAttr(multiplierValues.front()));
-    } else {
-      SmallVector<AffineExpr, 2> multiplierExprs{
-          rewriter.getAffineDimExpr(rank - 1)};
-      auto multiplierType =
-          RankedTensorType::get({static_cast<int64_t>(multiplierValues.size())},
-                                rewriter.getI32Type());
-      genericInputs.push_back(arith::ConstantOp::create(
-          rewriter, loc,
-          DenseIntElementsAttr::get(multiplierType, multiplierValues)));
-
-      indexingMaps.push_back(AffineMap::get(/*dimCount=*/rank,
-                                            /*symbolCount=*/0, multiplierExprs,
-                                            rewriter.getContext()));
-
-      multiplierArg = indexingMaps.size() - 1;
-    }
+    setupLinalgGenericOpInputAndIndexingMap(
+        rewriter, multiplierValues, genericInputs, indexingMaps,
+        isMultiplierConstant, op, multiplierConstant, multiplierArg);
 
     // If we are rescaling per-channel then we need to store the shift
     // values in a buffer.
     Value shiftConstant;
     int64_t shiftArg = 0;
-    if (shiftValues.size() == 1) {
-      shiftConstant = arith::ConstantOp::create(
-          rewriter, loc, rewriter.getI8IntegerAttr(shiftValues.front()));
-    } else {
-      SmallVector<AffineExpr, 2> shiftExprs = {
-          rewriter.getAffineDimExpr(rank - 1)};
-      auto shiftType =
-          RankedTensorType::get({static_cast<int64_t>(shiftValues.size())},
-                                rewriter.getIntegerType(8));
-      genericInputs.push_back(arith::ConstantOp::create(
-          rewriter, loc, DenseIntElementsAttr::get(shiftType, shiftValues)));
-      indexingMaps.push_back(AffineMap::get(/*dimCount=*/rank,
-                                            /*symbolCount=*/0, shiftExprs,
-                                            rewriter.getContext()));
-      shiftArg = indexingMaps.size() - 1;
+    setupLinalgGenericOpInputAndIndexingMap(
+        rewriter, shiftValues, genericInputs, indexingMaps, isShiftConstant, op,
+        shiftConstant, shiftArg, true);
+
+    // broadcastMap = affine_map<(d0, d1) -> ()>
+    // It would affect as broadcast for scalar values in linalg::GenericOp.
+    AffineMap broadcastMap = AffineMap::get(rank, 0, {}, rewriter.getContext());
+    FailureOr<int64_t> maybeIZp = op.getInputZeroPoint();
+    FailureOr<int64_t> maybeOZp = op.getOutputZeroPoint();
+    // The inputZp and outputZp may be either constant or non-constant,
+    // depending on whether dynamic extension is enabled.
+    // - If the zp's are non-constant, add them as an inputs to
+    // linalg::GenericOp by:
+    //     1. Pushing it into 'genericInputs'.
+    //     2. Appending a corresponding affine map to 'indexingMaps'.
+    // - If the zp's are constant, they would be generated as arith.constant.
+    int64_t iZpArg = 0;
+    if (failed(maybeIZp)) {
+      genericInputs.push_back(
+          collapse1xNTensorToN(rewriter, op->getOperand(3), loc));
+      indexingMaps.push_back(broadcastMap);
+      iZpArg = indexingMaps.size() - 1;
+    }
+    int64_t oZpArg = 0;
+    if (failed(maybeOZp)) {
+      genericInputs.push_back(
+          collapse1xNTensorToN(rewriter, op->getOperand(4), loc));
+      indexingMaps.push_back(broadcastMap);
+      oZpArg = indexingMaps.size() - 1;
     }
 
     // Indexing maps for output values.
@@ -1526,36 +1661,17 @@ public:
           Type valueTy = value.getType();
 
           FailureOr<int64_t> maybeIZp = op.getInputZeroPoint();
-          if (failed(maybeIZp)) {
-            (void)rewriter.notifyMatchFailure(
-                op, "input zero point cannot be statically determined");
-            return;
-          }
-
-          const int32_t inBitwidth = valueTy.getIntOrFloatBitWidth();
-          // Extend zeropoint for sub-32bits widths.
-          const int32_t inAttrBitwidth = inBitwidth > 32 ? inBitwidth : 32;
-          auto inputZp = arith::ConstantOp::create(
-              nestedBuilder, loc,
-              IntegerAttr::get(rewriter.getIntegerType(inAttrBitwidth),
-                               *maybeIZp));
+          auto inputZp = getExtendZp(nestedBuilder, valueTy, maybeIZp,
+                                     nestedLoc, blockArgs, iZpArg);
 
           FailureOr<int64_t> maybeOZp = op.getOutputZeroPoint();
-          if (failed(maybeOZp)) {
-            (void)rewriter.notifyMatchFailure(
-                op, "output zero point cannot be statically determined");
-            return;
-          };
+          auto outputZp = getExtendZp(nestedBuilder, valueTy, maybeOZp,
+                                      nestedLoc, blockArgs, oZpArg, true);
 
           IntegerType outIntType =
               cast<IntegerType>(blockArgs.back().getType());
           unsigned outBitWidth = outIntType.getWidth();
-          const int32_t outAttrBitwidth = 32;
           assert(outBitWidth <= 32 && "Unexpected output zeropoint bitwidth");
-          auto outputZp = arith::ConstantOp::create(
-              nestedBuilder, loc,
-              IntegerAttr::get(rewriter.getIntegerType(outAttrBitwidth),
-                               *maybeOZp));
 
           Value multiplier = multiplierConstant ? multiplierConstant
                                                 : blockArgs[multiplierArg];
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp
index 624519f..70faa71 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp
@@ -64,12 +64,13 @@ mlir::bufferization::dropEquivalentBufferResults(ModuleOp module) {
   module.walk([&](func::CallOp callOp) {
     if (func::FuncOp calledFunc =
             dyn_cast_or_null<func::FuncOp>(callOp.resolveCallable())) {
-      callerMap[calledFunc].insert(callOp);
+      if (!calledFunc.isPublic() && !calledFunc.isExternal())
+        callerMap[calledFunc].insert(callOp);
     }
   });
 
   for (auto funcOp : module.getOps<func::FuncOp>()) {
-    if (funcOp.isExternal())
+    if (funcOp.isExternal() || funcOp.isPublic())
       continue;
     func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
     // TODO: Support functions with multiple blocks.
diff --git a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
index e25a012..1382c7ac 100644
--- a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
@@ -5,7 +5,7 @@ add_mlir_dialect_library(MLIRMemRefDialect
   ValueBoundsOpInterfaceImpl.cpp
 
   ADDITIONAL_HEADER_DIRS
-  ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRefDialect
+  ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRef/IR
 
   DEPENDS
   MLIRMemRefOpsIncGen
@@ -18,6 +18,7 @@ add_mlir_dialect_library(MLIRMemRefDialect
   MLIRDialectUtils
   MLIRInferIntRangeCommon
   MLIRInferIntRangeInterface
+  MLIRInferStridedMetadataInterface
   MLIRInferTypeOpInterface
   MLIRIR
   MLIRMemOpInterfaces
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index e9bdcda..507597b 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -3437,6 +3437,65 @@ SubViewOp::bubbleDownCasts(OpBuilder &builder) {
   return bubbleDownCastsPassthroughOpImpl(*this, builder, getSourceMutable());
 }
 
+void SubViewOp::inferStridedMetadataRanges(
+    ArrayRef<StridedMetadataRange> ranges, GetIntRangeFn getIntRange,
+    SetStridedMetadataRangeFn setMetadata, int32_t indexBitwidth) {
+  auto isUninitialized =
+      +[](IntegerValueRange range) { return range.isUninitialized(); };
+
+  // Bail early if any of the operands metadata is not ready:
+  SmallVector<IntegerValueRange> offsetOperands =
+      getIntValueRanges(getMixedOffsets(), getIntRange, indexBitwidth);
+  if (llvm::any_of(offsetOperands, isUninitialized))
+    return;
+
+  SmallVector<IntegerValueRange> sizeOperands =
+      getIntValueRanges(getMixedSizes(), getIntRange, indexBitwidth);
+  if (llvm::any_of(sizeOperands, isUninitialized))
+    return;
+
+  SmallVector<IntegerValueRange> stridesOperands =
+      getIntValueRanges(getMixedStrides(), getIntRange, indexBitwidth);
+  if (llvm::any_of(stridesOperands, isUninitialized))
+    return;
+
+  StridedMetadataRange sourceRange =
+      ranges[getSourceMutable().getOperandNumber()];
+  if (sourceRange.isUninitialized())
+    return;
+
+  ArrayRef<ConstantIntRanges> srcStrides = sourceRange.getStrides();
+
+  // Get the dropped dims.
+  llvm::SmallBitVector droppedDims = getDroppedDims();
+
+  // Compute the new offset, strides and sizes.
+  ConstantIntRanges offset = sourceRange.getOffsets()[0];
+  SmallVector<ConstantIntRanges> strides, sizes;
+
+  for (size_t i = 0, e = droppedDims.size(); i < e; ++i) {
+    bool dropped = droppedDims.test(i);
+    // Compute the new offset.
+    ConstantIntRanges off =
+        intrange::inferMul({offsetOperands[i].getValue(), srcStrides[i]});
+    offset = intrange::inferAdd({offset, off});
+
+    // Skip dropped dimensions.
+    if (dropped)
+      continue;
+    // Multiply the strides.
+    strides.push_back(
+        intrange::inferMul({stridesOperands[i].getValue(), srcStrides[i]}));
+    // Get the sizes.
+    sizes.push_back(sizeOperands[i].getValue());
+  }
+
+  setMetadata(getResult(),
+              StridedMetadataRange::getRanked(
+                  SmallVector<ConstantIntRanges>({std::move(offset)}),
+                  std::move(sizes), std::move(strides)));
+}
+
 //===----------------------------------------------------------------------===//
 // TransposeOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 6564a4e..642ced9 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -17,6 +17,7 @@
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/SymbolTable.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "llvm/ADT/SmallSet.h"
@@ -74,14 +75,16 @@ struct MemRefPointerLikeModel
   }
 
   mlir::Value genAllocate(Type pointer, OpBuilder &builder, Location loc,
-                          StringRef varName, Type varType,
-                          Value originalVar) const {
+                          StringRef varName, Type varType, Value originalVar,
+                          bool &needsFree) const {
     auto memrefTy = cast<MemRefType>(pointer);
 
     // Check if this is a static memref (all dimensions are known) - if yes
     // then we can generate an alloca operation.
-    if (memrefTy.hasStaticShape())
+    if (memrefTy.hasStaticShape()) {
+      needsFree = false; // alloca doesn't need deallocation
       return memref::AllocaOp::create(builder, loc, memrefTy).getResult();
+    }
 
     // For dynamic memrefs, extract sizes from the original variable if
     // provided. Otherwise they cannot be handled.
@@ -99,6 +102,7 @@ struct MemRefPointerLikeModel
         // Note: We only add dynamic sizes to the dynamicSizes array
         // Static dimensions are handled automatically by AllocOp
       }
+      needsFree = true; // alloc needs deallocation
       return memref::AllocOp::create(builder, loc, memrefTy, dynamicSizes)
           .getResult();
     }
@@ -108,10 +112,14 @@ struct MemRefPointerLikeModel
   }
 
   bool genFree(Type pointer, OpBuilder &builder, Location loc,
-               TypedValue<PointerLikeType> varPtr, Type varType) const {
-    if (auto memrefValue = dyn_cast<TypedValue<MemRefType>>(varPtr)) {
+               TypedValue<PointerLikeType> varToFree, Value allocRes,
+               Type varType) const {
+    if (auto memrefValue = dyn_cast<TypedValue<MemRefType>>(varToFree)) {
+      // Use allocRes if provided to determine the allocation type
+      Value valueToInspect = allocRes ? allocRes : memrefValue;
+
       // Walk through casts to find the original allocation
-      Value currentValue = memrefValue;
+      Value currentValue = valueToInspect;
       Operation *originalAlloc = nullptr;
 
       // Follow the chain of operations to find the original allocation
@@ -150,7 +158,7 @@ struct MemRefPointerLikeModel
           return true;
         }
         if (isa<memref::AllocOp>(originalAlloc)) {
-          // This is an alloc - generate dealloc
+          // This is an alloc - generate dealloc on varToFree
           memref::DeallocOp::create(builder, loc, memrefValue);
           return true;
         }
@@ -1003,6 +1011,142 @@ struct RemoveConstantIfConditionWithRegion : public OpRewritePattern<OpTy> {
   }
 };
 
+//===----------------------------------------------------------------------===//
+// Recipe Region Helpers
+//===----------------------------------------------------------------------===//
+
+/// Create and populate an init region for privatization recipes.
+/// Returns the init block on success, or nullptr on failure.
+/// Sets needsFree to indicate if the allocated memory requires deallocation.
+static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
+                                               Type varType, StringRef varName,
+                                               ValueRange bounds,
+                                               bool &needsFree) {
+  // Create init block with arguments: original value + bounds
+  SmallVector<Type> argTypes{varType};
+  SmallVector<Location> argLocs{loc};
+  for (Value bound : bounds) {
+    argTypes.push_back(bound.getType());
+    argLocs.push_back(loc);
+  }
+
+  auto initBlock = std::make_unique<Block>();
+  initBlock->addArguments(argTypes, argLocs);
+  builder.setInsertionPointToStart(initBlock.get());
+
+  Value privatizedValue;
+
+  // Get the block argument that represents the original variable
+  Value blockArgVar = initBlock->getArgument(0);
+
+  // Generate init region body based on variable type
+  if (isa<MappableType>(varType)) {
+    auto mappableTy = cast<MappableType>(varType);
+    auto typedVar = cast<TypedValue<MappableType>>(blockArgVar);
+    privatizedValue = mappableTy.generatePrivateInit(
+        builder, loc, typedVar, varName, bounds, {}, needsFree);
+    if (!privatizedValue)
+      return nullptr;
+  } else {
+    assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
+    auto pointerLikeTy = cast<PointerLikeType>(varType);
+    // Use PointerLikeType's allocation API with the block argument
+    privatizedValue = pointerLikeTy.genAllocate(builder, loc, varName, varType,
+                                                blockArgVar, needsFree);
+    if (!privatizedValue)
+      return nullptr;
+  }
+
+  // Add yield operation to init block
+  acc::YieldOp::create(builder, loc, privatizedValue);
+
+  return initBlock;
+}
+
+/// Create and populate a copy region for firstprivate recipes.
+/// Returns the copy block on success, or nullptr on failure.
+/// TODO: Handle MappableType - it does not yet have a copy API.
+static std::unique_ptr<Block> createCopyRegion(OpBuilder &builder, Location loc,
+                                               Type varType,
+                                               ValueRange bounds) {
+  // Create copy block with arguments: original value + privatized value +
+  // bounds
+  SmallVector<Type> copyArgTypes{varType, varType};
+  SmallVector<Location> copyArgLocs{loc, loc};
+  for (Value bound : bounds) {
+    copyArgTypes.push_back(bound.getType());
+    copyArgLocs.push_back(loc);
+  }
+
+  auto copyBlock = std::make_unique<Block>();
+  copyBlock->addArguments(copyArgTypes, copyArgLocs);
+  builder.setInsertionPointToStart(copyBlock.get());
+
+  bool isMappable = isa<MappableType>(varType);
+  bool isPointerLike = isa<PointerLikeType>(varType);
+  // TODO: Handle MappableType - it does not yet have a copy API.
+  // Otherwise, for now just fallback to pointer-like behavior.
+  if (isMappable && !isPointerLike)
+    return nullptr;
+
+  // Generate copy region body based on variable type
+  if (isPointerLike) {
+    auto pointerLikeTy = cast<PointerLikeType>(varType);
+    Value originalArg = copyBlock->getArgument(0);
+    Value privatizedArg = copyBlock->getArgument(1);
+
+    // Generate copy operation using PointerLikeType interface
+    if (!pointerLikeTy.genCopy(
+            builder, loc, cast<TypedValue<PointerLikeType>>(privatizedArg),
+            cast<TypedValue<PointerLikeType>>(originalArg), varType))
+      return nullptr;
+  }
+
+  // Add terminator to copy block
+  acc::TerminatorOp::create(builder, loc);
+
+  return copyBlock;
+}
+
+/// Create and populate a destroy region for privatization recipes.
+/// Returns the destroy block on success, or nullptr if not needed.
+static std::unique_ptr<Block> createDestroyRegion(OpBuilder &builder,
+                                                  Location loc, Type varType,
+                                                  Value allocRes,
+                                                  ValueRange bounds) {
+  // Create destroy block with arguments: original value + privatized value +
+  // bounds
+  SmallVector<Type> destroyArgTypes{varType, varType};
+  SmallVector<Location> destroyArgLocs{loc, loc};
+  for (Value bound : bounds) {
+    destroyArgTypes.push_back(bound.getType());
+    destroyArgLocs.push_back(loc);
+  }
+
+  auto destroyBlock = std::make_unique<Block>();
+  destroyBlock->addArguments(destroyArgTypes, destroyArgLocs);
+  builder.setInsertionPointToStart(destroyBlock.get());
+
+  bool isMappable = isa<MappableType>(varType);
+  bool isPointerLike = isa<PointerLikeType>(varType);
+  // TODO: Handle MappableType - it does not yet have a deallocation API.
+  // Otherwise, for now just fallback to pointer-like behavior.
+  if (isMappable && !isPointerLike)
+    return nullptr;
+
+  assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
+  auto pointerLikeTy = cast<PointerLikeType>(varType);
+  auto privatizedArg =
+      cast<TypedValue<PointerLikeType>>(destroyBlock->getArgument(1));
+  // Pass allocRes to help determine the allocation type
+  if (!pointerLikeTy.genFree(builder, loc, privatizedArg, allocRes, varType))
+    return nullptr;
+
+  acc::TerminatorOp::create(builder, loc);
+
+  return destroyBlock;
+}
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
@@ -1050,6 +1194,55 @@ LogicalResult acc::PrivateRecipeOp::verifyRegions() {
   return success();
 }
 
+std::optional<PrivateRecipeOp>
+PrivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
+                                   StringRef recipeName, Type varType,
+                                   StringRef varName, ValueRange bounds) {
+  // First, validate that we can handle this variable type
+  bool isMappable = isa<MappableType>(varType);
+  bool isPointerLike = isa<PointerLikeType>(varType);
+
+  // Unsupported type
+  if (!isMappable && !isPointerLike)
+    return std::nullopt;
+
+  // Create init and destroy blocks using shared helpers
+  OpBuilder::InsertionGuard guard(builder);
+
+  // Save the original insertion point for creating the recipe operation later
+  auto originalInsertionPoint = builder.saveInsertionPoint();
+
+  bool needsFree = false;
+  auto initBlock =
+      createInitRegion(builder, loc, varType, varName, bounds, needsFree);
+  if (!initBlock)
+    return std::nullopt;
+
+  // Only create destroy region if the allocation needs deallocation
+  std::unique_ptr<Block> destroyBlock;
+  if (needsFree) {
+    // Extract the allocated value from the init block's yield operation
+    auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator());
+    Value allocRes = yieldOp.getOperand(0);
+
+    destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds);
+    if (!destroyBlock)
+      return std::nullopt;
+  }
+
+  // Now create the recipe operation at the original insertion point and attach
+  // the blocks
+  builder.restoreInsertionPoint(originalInsertionPoint);
+  auto recipe = PrivateRecipeOp::create(builder, loc, recipeName, varType);
+
+  // Move the blocks into the recipe's regions
+  recipe.getInitRegion().push_back(initBlock.release());
+  if (destroyBlock)
+    recipe.getDestroyRegion().push_back(destroyBlock.release());
+
+  return recipe;
+}
+
 //===----------------------------------------------------------------------===//
 // FirstprivateRecipeOp
 //===----------------------------------------------------------------------===//
@@ -1080,6 +1273,60 @@ LogicalResult acc::FirstprivateRecipeOp::verifyRegions() {
   return success();
 }
 
+std::optional<FirstprivateRecipeOp>
+FirstprivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
+                                        StringRef recipeName, Type varType,
+                                        StringRef varName, ValueRange bounds) {
+  // First, validate that we can handle this variable type
+  bool isMappable = isa<MappableType>(varType);
+  bool isPointerLike = isa<PointerLikeType>(varType);
+
+  // Unsupported type
+  if (!isMappable && !isPointerLike)
+    return std::nullopt;
+
+  // Create init, copy, and destroy blocks using shared helpers
+  OpBuilder::InsertionGuard guard(builder);
+
+  // Save the original insertion point for creating the recipe operation later
+  auto originalInsertionPoint = builder.saveInsertionPoint();
+
+  bool needsFree = false;
+  auto initBlock =
+      createInitRegion(builder, loc, varType, varName, bounds, needsFree);
+  if (!initBlock)
+    return std::nullopt;
+
+  auto copyBlock = createCopyRegion(builder, loc, varType, bounds);
+  if (!copyBlock)
+    return std::nullopt;
+
+  // Only create destroy region if the allocation needs deallocation
+  std::unique_ptr<Block> destroyBlock;
+  if (needsFree) {
+    // Extract the allocated value from the init block's yield operation
+    auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator());
+    Value allocRes = yieldOp.getOperand(0);
+
+    destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds);
+    if (!destroyBlock)
+      return std::nullopt;
+  }
+
+  // Now create the recipe operation at the original insertion point and attach
+  // the blocks
+  builder.restoreInsertionPoint(originalInsertionPoint);
+  auto recipe = FirstprivateRecipeOp::create(builder, loc, recipeName, varType);
+
+  // Move the blocks into the recipe's regions
+  recipe.getInitRegion().push_back(initBlock.release());
+  recipe.getCopyRegion().push_back(copyBlock.release());
+  if (destroyBlock)
+    recipe.getDestroyRegion().push_back(destroyBlock.release());
+
+  return recipe;
+}
+
 //===----------------------------------------------------------------------===//
 // ReductionRecipeOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp
index 1fa04ed..89b81cf 100644
--- a/mlir/lib/IR/MLIRContext.cpp
+++ b/mlir/lib/IR/MLIRContext.cpp
@@ -121,6 +121,11 @@ namespace mlir {
 class MLIRContextImpl {
 public:
   //===--------------------------------------------------------------------===//
+  // Remark
+  //===--------------------------------------------------------------------===//
+  std::unique_ptr<remark::detail::RemarkEngine> remarkEngine;
+
+  //===--------------------------------------------------------------------===//
   // Debugging
   //===--------------------------------------------------------------------===//
 
@@ -135,11 +140,6 @@ public:
   DiagnosticEngine diagEngine;
 
   //===--------------------------------------------------------------------===//
-  // Remark
-  //===--------------------------------------------------------------------===//
-  std::unique_ptr<remark::detail::RemarkEngine> remarkEngine;
-
-  //===--------------------------------------------------------------------===//
   // Options
   //===--------------------------------------------------------------------===//
 
@@ -357,7 +357,10 @@ MLIRContext::MLIRContext(const DialectRegistry &registry, Threading setting)
   impl->affineUniquer.registerParametricStorageType<IntegerSetStorage>();
 }
 
-MLIRContext::~MLIRContext() = default;
+MLIRContext::~MLIRContext() {
+  // finalize remark engine before destroying anything else.
+  impl->remarkEngine.reset();
+}
 
 /// Copy the specified array of elements into memory managed by the provided
 /// bump pointer allocator.  This assumes the elements are all PODs.
diff --git a/mlir/lib/IR/Remarks.cpp b/mlir/lib/IR/Remarks.cpp
index a55f61a..031eae2 100644
--- a/mlir/lib/IR/Remarks.cpp
+++ b/mlir/lib/IR/Remarks.cpp
@@ -16,7 +16,7 @@
 #include "llvm/ADT/StringRef.h"
 
 using namespace mlir::remark::detail;
-
+using namespace mlir::remark;
 //------------------------------------------------------------------------------
 // Remark
 //------------------------------------------------------------------------------
@@ -70,7 +70,7 @@ static void printArgs(llvm::raw_ostream &os, llvm::ArrayRef<Remark::Arg> args) {
 void Remark::print(llvm::raw_ostream &os, bool printLocation) const {
   // Header: [Type] pass:remarkName
   StringRef type = getRemarkTypeString();
-  StringRef categoryName = getFullCategoryName();
+  StringRef categoryName = getCombinedCategoryName();
   StringRef name = remarkName;
 
   os << '[' << type << "] ";
@@ -81,9 +81,10 @@ void Remark::print(llvm::raw_ostream &os, bool printLocation) const {
     os << "Function=" << getFunction() << " | ";
 
   if (printLocation) {
-    if (auto flc = mlir::dyn_cast<mlir::FileLineColLoc>(getLocation()))
+    if (auto flc = mlir::dyn_cast<mlir::FileLineColLoc>(getLocation())) {
       os << " @" << flc.getFilename() << ":" << flc.getLine() << ":"
          << flc.getColumn();
+    }
   }
 
   printArgs(os, getArgs());
@@ -140,7 +141,7 @@ llvm::remarks::Remark Remark::generateRemark() const {
   r.RemarkType = getRemarkType();
   r.RemarkName = getRemarkName();
   // MLIR does not use passes; instead, it has categories and sub-categories.
-  r.PassName = getFullCategoryName();
+  r.PassName = getCombinedCategoryName();
   r.FunctionName = getFunction();
   r.Loc = locLambda();
   for (const Remark::Arg &arg : getArgs()) {
@@ -225,26 +226,42 @@ InFlightRemark RemarkEngine::emitOptimizationRemarkAnalysis(Location loc,
 // RemarkEngine
 //===----------------------------------------------------------------------===//
 
-void RemarkEngine::report(const Remark &&remark) {
+void RemarkEngine::reportImpl(const Remark &remark) {
   // Stream the remark
-  if (remarkStreamer)
+  if (remarkStreamer) {
     remarkStreamer->streamOptimizationRemark(remark);
+  }
 
   // Print using MLIR's diagnostic
   if (printAsEmitRemarks)
     emitRemark(remark.getLocation(), remark.getMsg());
 }
 
+void RemarkEngine::report(const Remark &&remark) {
+  if (remarkEmittingPolicy)
+    remarkEmittingPolicy->reportRemark(remark);
+}
+
 RemarkEngine::~RemarkEngine() {
+  if (remarkEmittingPolicy)
+    remarkEmittingPolicy->finalize();
+
   if (remarkStreamer)
     remarkStreamer->finalize();
 }
 
-llvm::LogicalResult
-RemarkEngine::initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer,
-                         std::string *errMsg) {
-  // If you need to validate categories/filters, do so here and set errMsg.
+llvm::LogicalResult RemarkEngine::initialize(
+    std::unique_ptr<MLIRRemarkStreamerBase> streamer,
+    std::unique_ptr<RemarkEmittingPolicyBase> remarkEmittingPolicy,
+    std::string *errMsg) {
+
   remarkStreamer = std::move(streamer);
+
+  auto reportFunc =
+      std::bind(&RemarkEngine::reportImpl, this, std::placeholders::_1);
+  remarkEmittingPolicy->initialize(ReportFn(std::move(reportFunc)));
+
+  this->remarkEmittingPolicy = std::move(remarkEmittingPolicy);
   return success();
 }
 
@@ -301,14 +318,15 @@ RemarkEngine::RemarkEngine(bool printAsEmitRemarks,
 }
 
 llvm::LogicalResult mlir::remark::enableOptimizationRemarks(
-    MLIRContext &ctx,
-    std::unique_ptr<remark::detail::MLIRRemarkStreamerBase> streamer,
-    const remark::RemarkCategories &cats, bool printAsEmitRemarks) {
+    MLIRContext &ctx, std::unique_ptr<detail::MLIRRemarkStreamerBase> streamer,
+    std::unique_ptr<detail::RemarkEmittingPolicyBase> remarkEmittingPolicy,
+    const RemarkCategories &cats, bool printAsEmitRemarks) {
   auto engine =
-      std::make_unique<remark::detail::RemarkEngine>(printAsEmitRemarks, cats);
+      std::make_unique<detail::RemarkEngine>(printAsEmitRemarks, cats);
 
   std::string errMsg;
-  if (failed(engine->initialize(std::move(streamer), &errMsg))) {
+  if (failed(engine->initialize(std::move(streamer),
+                                std::move(remarkEmittingPolicy), &errMsg))) {
     llvm::report_fatal_error(
         llvm::Twine("Failed to initialize remark engine. Error: ") + errMsg);
   }
@@ -316,3 +334,12 @@ llvm::LogicalResult mlir::remark::enableOptimizationRemarks(
 
   return success();
 }
+
+//===----------------------------------------------------------------------===//
+// Remark emitting policies
+//===----------------------------------------------------------------------===//
+
+namespace mlir::remark {
+RemarkEmittingPolicyAll::RemarkEmittingPolicyAll() = default;
+RemarkEmittingPolicyFinal::RemarkEmittingPolicyFinal() = default;
+} // namespace mlir::remark
diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index 388de1c..f96af02 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LLVM_OPTIONAL_SOURCES
   FunctionInterfaces.cpp
   IndexingMapOpInterface.cpp
   InferIntRangeInterface.cpp
+  InferStridedMetadataInterface.cpp
   InferTypeOpInterface.cpp
   LoopLikeInterface.cpp
   MemOpInterfaces.cpp
@@ -64,6 +65,21 @@ add_mlir_library(MLIRFunctionInterfaces
 
 add_mlir_interface_library(IndexingMapOpInterface)
 add_mlir_interface_library(InferIntRangeInterface)
+
+add_mlir_library(MLIRInferStridedMetadataInterface
+  InferStridedMetadataInterface.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
+
+  DEPENDS
+  MLIRInferStridedMetadataInterfaceIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRInferIntRangeInterface
+  MLIRIR
+)
+
 add_mlir_interface_library(InferTypeOpInterface)
 
 add_mlir_library(MLIRLoopLikeInterface
diff --git a/mlir/lib/Interfaces/InferIntRangeInterface.cpp b/mlir/lib/Interfaces/InferIntRangeInterface.cpp
index 9f3e97d..84fc9b8 100644
--- a/mlir/lib/Interfaces/InferIntRangeInterface.cpp
+++ b/mlir/lib/Interfaces/InferIntRangeInterface.cpp
@@ -146,6 +146,25 @@ raw_ostream &mlir::operator<<(raw_ostream &os, const IntegerValueRange &range) {
   return os;
 }
 
+SmallVector<IntegerValueRange>
+mlir::getIntValueRanges(ArrayRef<OpFoldResult> values,
+                        GetIntRangeFn getIntRange, int32_t indexBitwidth) {
+  SmallVector<IntegerValueRange> ranges;
+  ranges.reserve(values.size());
+  for (OpFoldResult ofr : values) {
+    if (auto value = dyn_cast<Value>(ofr)) {
+      ranges.push_back(getIntRange(value));
+      continue;
+    }
+
+    // Create a constant range.
+    auto attr = cast<IntegerAttr>(cast<Attribute>(ofr));
+    ranges.emplace_back(ConstantIntRanges::constant(
+        attr.getValue().sextOrTrunc(indexBitwidth)));
+  }
+  return ranges;
+}
+
 void mlir::intrange::detail::defaultInferResultRanges(
     InferIntRangeInterface interface, ArrayRef<IntegerValueRange> argRanges,
     SetIntLatticeFn setResultRanges) {
diff --git a/mlir/lib/Interfaces/InferStridedMetadataInterface.cpp b/mlir/lib/Interfaces/InferStridedMetadataInterface.cpp
new file mode 100644
index 0000000..483e9f1
--- /dev/null
+++ b/mlir/lib/Interfaces/InferStridedMetadataInterface.cpp
@@ -0,0 +1,36 @@
+//===- InferStridedMetadataInterface.cpp - Strided md inference interface -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/InferStridedMetadataInterface.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/TypeUtilities.h"
+#include <optional>
+
+using namespace mlir;
+
+#include "mlir/Interfaces/InferStridedMetadataInterface.cpp.inc"
+
+void StridedMetadataRange::print(raw_ostream &os) const {
+  if (isUninitialized()) {
+    os << "strided_metadata<None>";
+    return;
+  }
+  os << "strided_metadata<offset = [";
+  llvm::interleaveComma(*offsets, os, [&](const ConstantIntRanges &range) {
+    os << "{" << range << "}";
+  });
+  os << "], sizes = [";
+  llvm::interleaveComma(sizes, os, [&](const ConstantIntRanges &range) {
+    os << "{" << range << "}";
+  });
+  os << "], strides = [";
+  llvm::interleaveComma(strides, os, [&](const ConstantIntRanges &range) {
+    os << "{" << range << "}";
+  });
+  os << "]>";
+}
diff --git a/mlir/lib/Remark/RemarkStreamer.cpp b/mlir/lib/Remark/RemarkStreamer.cpp
index d213a1a..bf36286 100644
--- a/mlir/lib/Remark/RemarkStreamer.cpp
+++ b/mlir/lib/Remark/RemarkStreamer.cpp
@@ -60,6 +60,7 @@ void LLVMRemarkStreamer::finalize() {
 namespace mlir::remark {
 LogicalResult enableOptimizationRemarksWithLLVMStreamer(
     MLIRContext &ctx, StringRef path, llvm::remarks::Format fmt,
+    std::unique_ptr<detail::RemarkEmittingPolicyBase> remarkEmittingPolicy,
     const RemarkCategories &cat, bool printAsEmitRemarks) {
 
   FailureOr<std::unique_ptr<detail::MLIRRemarkStreamerBase>> sOr =
@@ -67,7 +68,8 @@ LogicalResult enableOptimizationRemarksWithLLVMStreamer(
   if (failed(sOr))
     return failure();
 
-  return remark::enableOptimizationRemarks(ctx, std::move(*sOr), cat,
+  return remark::enableOptimizationRemarks(ctx, std::move(*sOr),
+                                           std::move(remarkEmittingPolicy), cat,
                                            printAsEmitRemarks);
 }
 
diff --git a/mlir/lib/Target/Wasm/TranslateFromWasm.cpp b/mlir/lib/Target/Wasm/TranslateFromWasm.cpp
index 132be4e..51c6077 100644
--- a/mlir/lib/Target/Wasm/TranslateFromWasm.cpp
+++ b/mlir/lib/Target/Wasm/TranslateFromWasm.cpp
@@ -956,7 +956,7 @@ inline parsed_inst_t ExpressionParser::buildNumericOp(
          << ", type = " << ty << " ***";
   auto tysToPop = SmallVector<Type, numOperands>();
   tysToPop.resize(numOperands);
-  std::fill(tysToPop.begin(), tysToPop.end(), ty);
+  llvm::fill(tysToPop, ty);
   auto operands = popOperands(tysToPop);
   if (failed(operands))
     return failure();
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index 30fd384..9ef405d 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -37,6 +37,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/LogicalResult.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -226,6 +227,18 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
                                     "bitstream", "Print bitstream file")),
         llvm::cl::cat(remarkCategory)};
 
+    static llvm::cl::opt<RemarkPolicy, /*ExternalStorage=*/true> remarkPolicy{
+        "remark-policy",
+        llvm::cl::desc("Specify the policy for remark output."),
+        cl::location(remarkPolicyFlag),
+        llvm::cl::value_desc("format"),
+        llvm::cl::init(RemarkPolicy::REMARK_POLICY_ALL),
+        llvm::cl::values(clEnumValN(RemarkPolicy::REMARK_POLICY_ALL, "all",
+                                    "Print all remarks"),
+                         clEnumValN(RemarkPolicy::REMARK_POLICY_FINAL, "final",
+                                    "Print final remarks")),
+        llvm::cl::cat(remarkCategory)};
+
     static cl::opt<std::string, /*ExternalStorage=*/true> remarksAll(
         "remarks-filter",
         cl::desc("Show all remarks: passed, missed, failed, analysis"),
@@ -517,18 +530,28 @@ performActions(raw_ostream &os,
     return failure();
 
   context->enableMultithreading(wasThreadingEnabled);
-
+  // Set the remark categories and policy.
   remark::RemarkCategories cats{
       config.getRemarksAllFilter(), config.getRemarksPassedFilter(),
       config.getRemarksMissedFilter(), config.getRemarksAnalyseFilter(),
       config.getRemarksFailedFilter()};
 
   mlir::MLIRContext &ctx = *context;
+  // Helper to create the appropriate policy based on configuration
+  auto createPolicy = [&config]()
+      -> std::unique_ptr<mlir::remark::detail::RemarkEmittingPolicyBase> {
+    if (config.getRemarkPolicy() == RemarkPolicy::REMARK_POLICY_ALL)
+      return std::make_unique<mlir::remark::RemarkEmittingPolicyAll>();
+    if (config.getRemarkPolicy() == RemarkPolicy::REMARK_POLICY_FINAL)
+      return std::make_unique<mlir::remark::RemarkEmittingPolicyFinal>();
+
+    llvm_unreachable("Invalid remark policy");
+  };
 
   switch (config.getRemarkFormat()) {
   case RemarkFormat::REMARK_FORMAT_STDOUT:
     if (failed(mlir::remark::enableOptimizationRemarks(
-            ctx, nullptr, cats, true /*printAsEmitRemarks*/)))
+            ctx, nullptr, createPolicy(), cats, true /*printAsEmitRemarks*/)))
       return failure();
     break;
 
@@ -537,7 +560,7 @@ performActions(raw_ostream &os,
                            ? "mlir-remarks.yaml"
                            : config.getRemarksOutputFile();
     if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer(
-            ctx, file, llvm::remarks::Format::YAML, cats)))
+            ctx, file, llvm::remarks::Format::YAML, createPolicy(), cats)))
       return failure();
     break;
   }
@@ -547,7 +570,7 @@ performActions(raw_ostream &os,
                            ? "mlir-remarks.bitstream"
                            : config.getRemarksOutputFile();
     if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer(
-            ctx, file, llvm::remarks::Format::Bitstream, cats)))
+            ctx, file, llvm::remarks::Format::Bitstream, createPolicy(), cats)))
       return failure();
     break;
   }
@@ -593,6 +616,12 @@ performActions(raw_ostream &os,
   AsmState asmState(op.get(), OpPrintingFlags(), /*locationMap=*/nullptr,
                     &fallbackResourceMap);
   os << OpWithState(op.get(), asmState) << '\n';
+
+  // This is required if the remark policy is final. Otherwise, the remarks are
+  // not emitted.
+  if (remark::detail::RemarkEngine *engine = ctx.getRemarkEngine())
+    engine->getRemarkEmittingPolicy()->finalize();
+
   return success();
 }
 
diff --git a/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp b/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp
index 111f58e..5f3b04a 100644
--- a/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp
@@ -66,7 +66,9 @@ size_t mlir::moveLoopInvariantCode(
   size_t numMoved = 0;
 
   for (Region *region : regions) {
-    LDBG() << "Original loop:\n" << *region->getParentOp();
+    LDBG() << "Original loop:\n"
+           << OpWithFlags(region->getParentOp(),
+                          OpPrintingFlags().skipRegions());
 
     std::queue<Operation *> worklist;
     // Add top-level operations in the loop body to the worklist.
@@ -90,7 +92,8 @@ size_t mlir::moveLoopInvariantCode(
           !canBeHoisted(op, definedOutside))
         continue;
 
-      LDBG() << "Moving loop-invariant op: " << *op;
+      LDBG() << "Moving loop-invariant op: "
+             << OpWithFlags(op, OpPrintingFlags().skipRegions());
       moveOutOfRegion(op, region);
       ++numMoved;
 
@@ -111,9 +114,7 @@ size_t mlir::moveLoopInvariantCode(LoopLikeOpInterface loopLike) {
       [&](Value value, Region *) {
         return loopLike.isDefinedOutsideOfLoop(value);
       },
-      [&](Operation *op, Region *) {
-        return isMemoryEffectFree(op) && isSpeculatable(op);
-      },
+      [&](Operation *op, Region *) { return isPure(op); },
       [&](Operation *op, Region *) { loopLike.moveOutOfLoop(op); });
 }
 
diff --git a/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir
new file mode 100644
index 0000000..808c1c2
--- /dev/null
+++ b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir
@@ -0,0 +1,67 @@
+// RUN: mlir-opt -test-strided-metadata-range-analysis %s 2>&1 | FileCheck %s
+
+func.func @memref_subview(%arg0: memref<8x16x4xf32, strided<[64, 4, 1]>>, %arg1: memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>>, %arg2: memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>>, %arg3: index, %arg4: index, %arg5: index) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c2 = arith.constant 2 : index
+  %0 = test.with_bounds {smax = 13 : index, smin = 11 : index, umax = 13 : index, umin = 11 : index} : index
+  %1 = test.with_bounds {smax = 7 : index, smin = 5 : index, umax = 7 : index, umin = 5 : index} : index
+
+  // Test subview with unknown sizes, and constant offsets and strides.
+  // CHECK: Op:  %[[SV0:.*]] = memref.subview
+  // CHECK-NEXT: result[0]: strided_metadata<
+  // CHECK-SAME: offset = [{unsigned : [1, 1] signed : [1, 1]}]
+  // CHECK-SAME: sizes = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+  // CHECK-SAME: strides = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [4, 4] signed : [4, 4]}, {unsigned : [1, 1] signed : [1, 1]}]
+  %subview = memref.subview %arg0[%c0, %c0, %c1] [%arg3, %arg4, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[64, 4, 1]>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+  // Test a subview of a subview, with bounded dynamic offsets.
+  // CHECK: Op:  %[[SV1:.*]] = memref.subview
+  // CHECK-NEXT: result[0]: strided_metadata<
+  // CHECK-SAME: offset = [{unsigned : [346, 484] signed : [346, 484]}]
+  // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}]
+  // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}]
+  %subview_0 = memref.subview %subview[%1, %1, %1] [%c2, %c2, %c2] [%0, %0, %0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+  // Test a subview of a subview, with constant operands.
+  // CHECK: Op:  %[[SV2:.*]] = memref.subview
+  // CHECK-NEXT: result[0]: strided_metadata<
+  // CHECK-SAME: offset = [{unsigned : [368, 510] signed : [368, 510]}]
+  // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}]
+  // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}]
+  %subview_1 = memref.subview %subview_0[%c0, %c0, %c2] [%c2, %c2, %c2] [%c1, %c1, %c1] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+  // Test a rank-reducing subview.
+  // CHECK: Op:  %[[SV3:.*]] = memref.subview
+  // CHECK-NEXT: result[0]: strided_metadata<
+  // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+  // CHECK-SAME: sizes = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [16, 16] signed : [16, 16]}]
+  // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+  %subview_2 = memref.subview %arg1[%arg4, %arg4, %arg4, %arg4, %arg4] [1, 64, 1, 16, 1] [%arg5, %arg5, %arg5, %arg5, %arg5] : memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>> to memref<64x16xf32, strided<[?, ?], offset: ?>>
+
+  // Test a subview of a rank-reducing subview
+  // CHECK: Op:  %[[SV4:.*]] = memref.subview
+  // CHECK-NEXT: result[0]: strided_metadata<
+  // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+  // CHECK-SAME: sizes = [{unsigned : [5, 7] signed : [5, 7]}]
+  // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+  %subview_3 = memref.subview %subview_2[%c0, %0] [1, %1] [%c1, %c2] : memref<64x16xf32, strided<[?, ?], offset: ?>> to memref<?xf32, strided<[?], offset: ?>>
+
+  // Test a subview with mixed bounded and unbound dynamic sizes.
+  // CHECK: Op:  %[[SV5:.*]] = memref.subview
+  // CHECK-NEXT: result[0]: strided_metadata<
+  // CHECK-SAME: offset = [{unsigned : [32, 32] signed : [32, 32]}]
+  // CHECK-SAME: sizes = [{unsigned : [11, 13] signed : [11, 13]}, {unsigned : [5, 7] signed : [5, 7]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+  // CHECK-SAME: strides = [{unsigned : [1, 1] signed : [1, 1]}, {unsigned : [64, 64] signed : [64, 64]}, {unsigned : [8, 8] signed : [8, 8]}]
+  %subview_4 = memref.subview %arg2[%c0, %c0, %c2] [%0, %1, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+  return
+}
+
+// CHECK:       func.func @memref_subview
+// CHECK:       %[[A0:.*]]: memref<8x16x4xf32, strided<[64, 4, 1]>>
+// CHECK:       %[[SV0]] = memref.subview %[[A0]]
+// CHECK-NEXT:  %[[SV1]] = memref.subview
+// CHECK-NEXT:  %[[SV2]] = memref.subview
+// CHECK-NEXT:  %[[SV3]] = memref.subview
+// CHECK-NEXT:  %[[SV4]] = memref.subview
+// CHECK-NEXT:  %[[SV5]] = memref.subview
diff --git a/mlir/test/Conversion/MathToXeVM/lit.local.cfg b/mlir/test/Conversion/MathToXeVM/lit.local.cfg
new file mode 100644
index 0000000..cc1ce35
--- /dev/null
+++ b/mlir/test/Conversion/MathToXeVM/lit.local.cfg
@@ -0,0 +1,7 @@
+spirv_backend_tests = [
+    'native-spirv-builtins.mlir',
+]
+ 
+# Exclude SPIRV backend tests if SPIRV target is disabled: 
+if(not config.run_xevm_tests):
+    config.excludes.update(spirv_backend_tests)
diff --git a/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir b/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir
new file mode 100644
index 0000000..d76627b
--- /dev/null
+++ b/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir
@@ -0,0 +1,155 @@
+// RUN: mlir-opt %s -convert-math-to-xevm \
+// RUN:   | FileCheck %s -check-prefixes='CHECK,CHECK-ARITH' 
+// RUN: mlir-opt %s -convert-math-to-xevm='convert-arith=false' \
+// RUN:   | FileCheck %s -check-prefixes='CHECK,CHECK-NO-ARITH'
+
+module @test_module {
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64
+  //
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv2_d(vector<2xf64>) -> vector<2xf64>
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv3_d(vector<3xf64>) -> vector<3xf64>
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv4_d(vector<4xf64>) -> vector<4xf64>
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv8_d(vector<8xf64>) -> vector<8xf64>
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv16_d(vector<16xf64>) -> vector<16xf64>
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv16_f(vector<16xf32>) -> vector<16xf32>
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv4_Dh(vector<4xf16>) -> vector<4xf16>
+  //
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16
+  // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16
+  // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32
+  // CHECK-DAG: llvm.func @_Z24__spirv_ocl_native_log10d(f64) -> f64
+  // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_powrDhDh(f16, f16) -> f16
+  // CHECK-DAG: llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16
+  // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32
+  // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64
+  // CHECK-ARITH-DAG: llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32
+
+  // CHECK-LABEL: func @math_ops
+  func.func @math_ops() {
+
+    %c1_f16 = arith.constant 1. : f16
+    %c1_f32 = arith.constant 1. : f32
+    %c1_f64 = arith.constant 1. : f64
+
+    // CHECK: math.exp
+    %exp_normal_f16 = math.exp %c1_f16 : f16
+    // CHECK: math.exp
+    %exp_normal_f32 = math.exp %c1_f32 : f32
+    // CHECK: math.exp
+    %exp_normal_f64 = math.exp %c1_f64 : f64
+
+    // Check float operations are converted properly:
+
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f16) -> f16
+    %exp_fast_f16 = math.exp %c1_f16 fastmath<fast> : f16
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f32) -> f32
+    %exp_fast_f32 = math.exp %c1_f32 fastmath<fast> : f32
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expd(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f64) -> f64
+    %exp_fast_f64 = math.exp %c1_f64 fastmath<fast> : f64
+    
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+    %exp_afn_f16 = math.exp %c1_f16 fastmath<afn> : f16
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+    %exp_afn_f32 = math.exp %c1_f32 fastmath<afn> : f32
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expd(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+    %exp_afn_f64 = math.exp %c1_f64 fastmath<afn> : f64
+
+    // CHECK: math.exp
+    %exp_none_f16 = math.exp %c1_f16 fastmath<none> : f16
+    // CHECK: math.exp
+    %exp_none_f32 = math.exp %c1_f32 fastmath<none> : f32
+    // CHECK: math.exp
+    %exp_none_f64 = math.exp %c1_f64 fastmath<none> : f64
+
+    // Check vector operations:
+
+    %v2_c1_f64 = arith.constant dense<1.> : vector<2xf64>
+    %v3_c1_f64 = arith.constant dense<1.> : vector<3xf64>
+    %v4_c1_f64 = arith.constant dense<1.> : vector<4xf64>
+    %v8_c1_f64 = arith.constant dense<1.> : vector<8xf64>
+    %v16_c1_f64 = arith.constant dense<1.> : vector<16xf64>
+
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv2_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<2xf64>) -> vector<2xf64>
+    %exp_v2_f64 = math.exp %v2_c1_f64 fastmath<afn> : vector<2xf64>
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv3_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<3xf64>) -> vector<3xf64>
+    %exp_v3_f64 = math.exp %v3_c1_f64 fastmath<afn> : vector<3xf64>
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv4_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<4xf64>) -> vector<4xf64>
+    %exp_v4_f64 = math.exp %v4_c1_f64 fastmath<afn> : vector<4xf64>
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv8_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64>
+    %exp_v8_f64 = math.exp %v8_c1_f64 fastmath<afn> : vector<8xf64>
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv16_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf64>) -> vector<16xf64>
+    %exp_v16_f64 = math.exp %v16_c1_f64 fastmath<afn> : vector<16xf64>
+
+    %v16_c1_f32 = arith.constant dense<1.> : vector<16xf32>
+    %v4_c1_f16 = arith.constant dense<1.> : vector<4xf16>
+
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv16_f(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (vector<16xf32>) -> vector<16xf32>
+    %exp_v16_f32 = math.exp %v16_c1_f32 fastmath<fast> : vector<16xf32>
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv4_Dh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (vector<4xf16>) -> vector<4xf16>
+    %exp_v4_f16 = math.exp %v4_c1_f16 fastmath<fast> : vector<4xf16>
+
+    // Check unsupported vector sizes are not converted:
+
+    %v5_c1_f64 = arith.constant dense<1.> : vector<5xf64>
+    %v32_c1_f64 = arith.constant dense<1.> : vector<32xf64>
+
+    // CHECK: math.exp
+    %exp_v5_f64 = math.exp %v5_c1_f64 fastmath<afn> : vector<5xf64>
+    // CHECK: math.exp
+    %exp_v32_f64 = math.exp %v32_c1_f64 fastmath<afn> : vector<32xf64>
+
+    // Check fastmath flags propagate properly:
+
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f16) -> f16
+    %exp_fastmath_all_f16 = math.exp %c1_f16 fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn> : f16
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<nnan, ninf, nsz, arcp, contract, afn>} : (f32) -> f32
+    %exp_fastmath_most_f32 = math.exp %c1_f32 fastmath<nnan,ninf,nsz,arcp,contract,afn> : f32
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<nnan, afn, reassoc>} : (f32) -> f32
+    %exp_afn_reassoc_nnan_f32 = math.exp %c1_f32 fastmath<afn,reassoc,nnan> : f32
+
+    // Check all other math operations:
+
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_cosDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+    %cos_afn_f16 = math.cos %c1_f16 fastmath<afn> : f16
+
+    // CHECK: llvm.call @_Z23__spirv_ocl_native_exp2f(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+    %exp2_afn_f32 = math.exp2 %c1_f32 fastmath<afn> : f32
+
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_logDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+    %log_afn_f16 = math.log %c1_f16 fastmath<afn> : f16
+
+    // CHECK: llvm.call @_Z23__spirv_ocl_native_log2f(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+    %log2_afn_f32 = math.log2 %c1_f32 fastmath<afn> : f32
+
+    // CHECK: llvm.call @_Z24__spirv_ocl_native_log10d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+    %log10_afn_f64 = math.log10 %c1_f64 fastmath<afn> : f64
+
+    // CHECK: llvm.call @_Z23__spirv_ocl_native_powrDhDh(%{{.*}}, %{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16, f16) -> f16
+    %powr_afn_f16 = math.powf %c1_f16, %c1_f16 fastmath<afn> : f16
+
+    // CHECK: llvm.call @_Z24__spirv_ocl_native_rsqrtd(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+    %rsqrt_afn_f64 = math.rsqrt %c1_f64 fastmath<afn> : f64
+
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_sinDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+    %sin_afn_f16 = math.sin %c1_f16 fastmath<afn> : f16
+
+    // CHECK: llvm.call @_Z23__spirv_ocl_native_sqrtf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+    %sqrt_afn_f32 = math.sqrt %c1_f32 fastmath<afn> : f32
+
+    // CHECK: llvm.call @_Z22__spirv_ocl_native_tand(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+    %tan_afn_f64 = math.tan %c1_f64 fastmath<afn> : f64
+
+    %c6_9_f32 = arith.constant 6.9 : f32
+    %c7_f32 = arith.constant 7. : f32
+
+    // CHECK-ARITH: llvm.call @_Z25__spirv_ocl_native_divideff(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32
+    // CHECK-NO-ARITH: arith.divf
+    %divf_afn_f32 = arith.divf %c6_9_f32, %c7_f32 fastmath<afn> : f32
+
+    return
+  }
+}
diff --git a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
new file mode 100644
index 0000000..2492ada
--- /dev/null
+++ b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
@@ -0,0 +1,118 @@
+// RUN: mlir-opt %s -gpu-module-to-binary="format=isa" \
+// RUN:             -debug-only=serialize-to-isa 2> %t 
+// RUN: FileCheck --input-file=%t %s
+//
+// MathToXeVM pass generates OpenCL intrinsics function calls when converting
+// Math ops with `fastmath` attr to native function calls. It is assumed that
+// the SPIRV backend would correctly convert these intrinsics calls to OpenCL
+// ExtInst instructions in SPIRV (See llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp).
+//
+// To ensure this assumption holds, this test verifies that the SPIRV backend
+// behaves as expected.
+
+module @test_ocl_intrinsics attributes {gpu.container_module} {
+  gpu.module @kernel [#xevm.target] {
+    llvm.func spir_kernelcc @native_fcns() attributes {gpu.kernel} {
+      // CHECK-DAG: %[[F16T:.+]] = OpTypeFloat 16
+      // CHECK-DAG: %[[ZERO_F16:.+]] = OpConstantNull %[[F16T]]
+      %c0_f16 = llvm.mlir.constant(0. : f16) : f16
+      // CHECK-DAG: %[[F32T:.+]] = OpTypeFloat 32
+      // CHECK-DAG: %[[ZERO_F32:.+]] = OpConstantNull %[[F32T]]
+      %c0_f32 = llvm.mlir.constant(0. : f32) : f32
+      // CHECK-DAG: %[[F64T:.+]] = OpTypeFloat 64
+      // CHECK-DAG: %[[ZERO_F64:.+]] = OpConstantNull %[[F64T]]
+      %c0_f64 = llvm.mlir.constant(0. : f64) : f64
+
+      // CHECK-DAG: %[[V2F64T:.+]] = OpTypeVector %[[F64T]] 2
+      // CHECK-DAG: %[[V2_ZERO_F64:.+]] = OpConstantNull %[[V2F64T]]
+      %v2_c0_f64 = llvm.mlir.constant(dense<0.> : vector<2xf64>) : vector<2xf64>
+      // CHECK-DAG: %[[V3F32T:.+]] = OpTypeVector %[[F32T]] 3
+      // CHECK-DAG: %[[V3_ZERO_F32:.+]] = OpConstantNull %[[V3F32T]]
+      %v3_c0_f32 = llvm.mlir.constant(dense<0.> : vector<3xf32>) : vector<3xf32>
+      // CHECK-DAG: %[[V4F64T:.+]] = OpTypeVector %[[F64T]] 4
+      // CHECK-DAG: %[[V4_ZERO_F64:.+]] = OpConstantNull %[[V4F64T]]
+      %v4_c0_f64 = llvm.mlir.constant(dense<0.> : vector<4xf64>) : vector<4xf64>
+      // CHECK-DAG: %[[V8F64T:.+]] = OpTypeVector %[[F64T]] 8
+      // CHECK-DAG: %[[V8_ZERO_F64:.+]] = OpConstantNull %[[V8F64T]]
+      %v8_c0_f64 = llvm.mlir.constant(dense<0.> : vector<8xf64>) : vector<8xf64>
+      // CHECK-DAG: %[[V16F16T:.+]] = OpTypeVector %[[F16T]] 16
+      // CHECK-DAG: %[[V16_ZERO_F16:.+]] = OpConstantNull %[[V16F16T]]
+      %v16_c0_f16 = llvm.mlir.constant(dense<0.> : vector<16xf16>) : vector<16xf16>     
+
+      // CHECK: OpExtInst %[[F16T]] %{{.+}} native_exp %[[ZERO_F16]]
+      %exp_f16 = llvm.call @_Z22__spirv_ocl_native_expDh(%c0_f16) : (f16) -> f16
+      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp %[[ZERO_F32]]
+      %exp_f32 = llvm.call @_Z22__spirv_ocl_native_expf(%c0_f32) : (f32) -> f32
+      // CHECK: OpExtInst %[[F64T]] %{{.+}} native_exp %[[ZERO_F64]]
+      %exp_f64 = llvm.call @_Z22__spirv_ocl_native_expd(%c0_f64) : (f64) -> f64
+
+      // CHECK: OpExtInst %[[V2F64T]] %{{.+}} native_exp %[[V2_ZERO_F64]]
+      %exp_v2_f64 = llvm.call @_Z22__spirv_ocl_native_expDv2_f64(%v2_c0_f64) : (vector<2xf64>) -> vector<2xf64>
+      // CHECK: OpExtInst %[[V3F32T]] %{{.+}} native_exp %[[V3_ZERO_F32]]
+      %exp_v3_f32 = llvm.call @_Z22__spirv_ocl_native_expDv3_f32(%v3_c0_f32) : (vector<3xf32>) -> vector<3xf32>
+      // CHECK: OpExtInst %[[V4F64T]] %{{.+}} native_exp %[[V4_ZERO_F64]]
+      %exp_v4_f64 = llvm.call @_Z22__spirv_ocl_native_expDv4_f64(%v4_c0_f64) : (vector<4xf64>) -> vector<4xf64>
+      // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_exp %[[V8_ZERO_F64]]
+      %exp_v8_f64 = llvm.call @_Z22__spirv_ocl_native_expDv8_f64(%v8_c0_f64) : (vector<8xf64>) -> vector<8xf64>
+      // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_exp %[[V16_ZERO_F16]]
+      %exp_v16_f16 = llvm.call @_Z22__spirv_ocl_native_expDv16_f16(%v16_c0_f16) : (vector<16xf16>) -> vector<16xf16>
+
+      // SPIRV backend does not currently handle fastmath flags: The SPIRV
+      // backend would need to generate OpDecorate calls to decorate math ops
+      // with FPFastMathMode/FPFastMathModeINTEL decorations.
+      //
+      // FIXME: When support for fastmath flags in the SPIRV backend is added, 
+      // add tests here to ensure fastmath flags are converted to the correct
+      // OpDecorate calls.
+      // 
+      // See:
+      // - https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_math_extended_instructions
+      // - https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpDecorate
+
+      // CHECK: OpExtInst %[[F16T]] %{{.+}} native_cos %[[ZERO_F16]]
+      %cos_afn_f16 = llvm.call @_Z22__spirv_ocl_native_cosDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp2 %[[ZERO_F32]]
+      %exp2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_exp2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+      // CHECK: OpExtInst %[[F16T]] %{{.+}} native_log %[[ZERO_F16]]
+      %log_afn_f16 = llvm.call @_Z22__spirv_ocl_native_logDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_log2 %[[ZERO_F32]]
+      %log2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_log2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+      // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_log10 %[[V8_ZERO_F64]]
+      %log10_afn_f64 = llvm.call @_Z24__spirv_ocl_native_log10Dv8_d(%v8_c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64>
+      // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_powr %[[V16_ZERO_F16]] %[[V16_ZERO_F16]]
+      %powr_afn_f16 = llvm.call @_Z23__spirv_ocl_native_powrDv16_DhS_(%v16_c0_f16, %v16_c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf16>, vector<16xf16>) -> vector<16xf16>
+      // CHECK: OpExtInst %[[F64T]] %{{.+}} native_rsqrt %[[ZERO_F64]]
+      %rsqrt_afn_f64 = llvm.call @_Z24__spirv_ocl_native_rsqrtd(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+      // CHECK: OpExtInst %[[F16T]] %{{.+}} native_sin %[[ZERO_F16]]
+      %sin_afn_f16 = llvm.call @_Z22__spirv_ocl_native_sinDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_sqrt %[[ZERO_F32]]
+      %sqrt_afn_f32 = llvm.call @_Z23__spirv_ocl_native_sqrtf(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+      // CHECK: OpExtInst %[[F64T]] %{{.+}} native_tan %[[ZERO_F64]]
+      %tan_afn_f64 = llvm.call @_Z22__spirv_ocl_native_tand(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+      // CHECK: OpExtInst %[[F32T]] %{{.+}} native_divide %[[ZERO_F32]] %[[ZERO_F32]]
+      %divide_afn_f32 = llvm.call @_Z25__spirv_ocl_native_divideff(%c0_f32, %c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32
+
+      llvm.return
+    }
+
+    llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16
+    llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
+    llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64
+    llvm.func @_Z22__spirv_ocl_native_expDv2_f64(vector<2xf64>) -> vector<2xf64>
+    llvm.func @_Z22__spirv_ocl_native_expDv3_f32(vector<3xf32>) -> vector<3xf32>
+    llvm.func @_Z22__spirv_ocl_native_expDv4_f64(vector<4xf64>) -> vector<4xf64>
+    llvm.func @_Z22__spirv_ocl_native_expDv8_f64(vector<8xf64>) -> vector<8xf64>
+    llvm.func @_Z22__spirv_ocl_native_expDv16_f16(vector<16xf16>) -> vector<16xf16>
+    llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16
+    llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32
+    llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16
+    llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32
+    llvm.func @_Z24__spirv_ocl_native_log10Dv8_d(vector<8xf64>) -> vector<8xf64>
+    llvm.func @_Z23__spirv_ocl_native_powrDv16_DhS_(vector<16xf16>, vector<16xf16>) -> vector<16xf16>
+    llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64
+    llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16
+    llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32
+    llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64
+    llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32
+  }
+}
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index a7a73ae..780c25a 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -1538,6 +1538,92 @@ func.func @unsupportedRescaleInexactRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>
 
 // -----
 
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
+// CHECK-LABEL: @rescale_no_const
+// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
+func.func @rescale_no_const(%arg0 : tensor<2xi8>, %multiplier : tensor<1xi32>, %shift : tensor<1xi8>, %input_zp : tensor<1xi8>, %output_zp : tensor<1xi8>) -> (tensor<2xi8>) {
+  // CHECK: [[MULTIPLIER:%.+]] = tensor.collapse_shape %arg1 [] : tensor<1xi32> into tensor<i32>
+  // CHECK: [[SHIFT:%.+]] = tensor.collapse_shape %arg2 [] : tensor<1xi8> into tensor<i8>
+  // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xi8> into tensor<i8>
+  // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xi8> into tensor<i8>
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xi8>
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[MULTIPLIER]], [[SHIFT]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<i32>, tensor<i8>, tensor<i8>, tensor<i8>) outs([[INIT]] : tensor<2xi8>) {
+  // CHECK:   ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: i8, [[ARG4:%.*]]: i8, [[OUT:%.*]]: i8):
+  // CHECK:    [[INPUT_ZP_I32:%.+]] = arith.extsi [[ARG3]] : i8 to i32
+  // CHECK:    [[OUTPUT_ZP_I32:%.+]] = arith.extsi [[ARG4]] : i8 to i32
+  // CHECK:    [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32
+  // CHECK:    [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32
+  // CHECK:    [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32
+  // CHECK:    [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32
+  // CHECK:    %c-128_i32 = arith.constant -128 : i32
+  // CHECK:    %c127_i32 = arith.constant 127 : i32
+  // CHECK:    [[MAX:%.+]] = arith.maxsi %c-128_i32, [[TMP3]] : i32
+  // CHECK:    [[MIN:%.+]] = arith.minsi %c127_i32, [[MAX]] : i32
+  %0 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = false, input_unsigned = false, output_unsigned = false} : (tensor<2xi8>, tensor<1xi32>, tensor<1xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8>
+  return %0 : tensor<2xi8>
+}
+
+// -----
+
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
+// CHECK-LABEL: @rescale_no_const_per_channel
+// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
+// CHECK-SAME:  [[ARG1:%[0-9a-zA-Z_]*]]
+// CHECK-SAME:  [[ARG2:%[0-9a-zA-Z_]*]]
+func.func @rescale_no_const_per_channel(%arg0 : tensor<2xi8>, %arg1 : tensor<2xi32>, %arg2 : tensor<2xi8>, %input_zp : tensor<1xi8>, %output_zp : tensor<1xi8>) -> (tensor<2xi8>) {
+  // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xi8> into tensor<i8>
+  // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xi8> into tensor<i8>
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xi8>
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[ARG1]], [[ARG2]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<i8>, tensor<i8>) outs([[INIT]] : tensor<2xi8>) {
+  // CHECK:   ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: i8, [[ARG4:%.*]]: i8, [[OUT:%.*]]: i8):
+  // CHECK:    [[INPUT_ZP_I32:%.+]] = arith.extsi [[ARG3]] : i8 to i32
+  // CHECK:    [[OUTPUT_ZP_I32:%.+]] = arith.extsi [[ARG4]] : i8 to i32
+  // CHECK:    [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32
+  // CHECK:    [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32
+  // CHECK:    [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32
+  // CHECK:    [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32
+  // CHECK:    %c-128_i32 = arith.constant -128 : i32
+  // CHECK:    %c127_i32 = arith.constant 127 : i32
+  // CHECK:    [[MAX:%.+]] = arith.maxsi %c-128_i32, [[TMP3]] : i32
+  // CHECK:    [[MIN:%.+]] = arith.minsi %c127_i32, [[MAX]] : i32
+    %0 = tosa.rescale %arg0, %arg1, %arg2, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = true, input_unsigned = false, output_unsigned = false} : (tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8>
+  return %0 : tensor<2xi8>
+}
+
+// -----
+
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
+// CHECK-LABEL: @rescale_no_const_per_channel_input_output_zp_ui8
+// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
+// CHECK-SAME:  [[ARG1:%[0-9a-zA-Z_]*]]
+// CHECK-SAME:  [[ARG2:%[0-9a-zA-Z_]*]]
+func.func @rescale_no_const_per_channel_input_output_zp_ui8(%arg0 : tensor<2xi8>, %arg1 : tensor<2xi32>, %arg2 : tensor<2xi8>, %input_zp : tensor<1xui8>, %output_zp : tensor<1xui8>) -> (tensor<2xui8>) {
+  // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xui8> into tensor<ui8>
+  // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xui8> into tensor<ui8>
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xui8>
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[ARG1]], [[ARG2]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<ui8>, tensor<ui8>) outs([[INIT]] : tensor<2xui8>) {
+  // CHECK:   ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: ui8, [[ARG4:%.*]]: ui8, [[OUT:%.*]]: ui8):
+  // CHECK:    [[INPUT_ZP_I8:%.+]]  = builtin.unrealized_conversion_cast [[ARG3]] : ui8 to i8
+  // CHECK:    [[INPUT_ZP_I32:%.+]] = arith.extui [[INPUT_ZP_I8]] : i8 to i32
+  // CHECK:    [[OUTPUT_ZP_I8:%.+]]  = builtin.unrealized_conversion_cast [[ARG4]] : ui8 to i8
+  // CHECK:    [[OUTPUT_ZP_I32:%.+]] = arith.extui [[OUTPUT_ZP_I8]] : i8 to i32
+  // CHECK:    [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32
+  // CHECK:    [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32
+  // CHECK:    [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32
+  // CHECK:    [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32
+  // CHECK:    %c0_i32 = arith.constant 0 : i32
+  // CHECK:    %c255_i32 = arith.constant 255 : i32
+  // CHECK:    [[MAX:%.+]] = arith.maxsi %c0_i32, [[TMP3]] : i32
+  // CHECK:    [[MIN:%.+]] = arith.minsi %c255_i32, [[MAX]] : i32
+    %0 = tosa.rescale %arg0, %arg1, %arg2, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = true, input_unsigned = false, output_unsigned = true} : (tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<1xui8>, tensor<1xui8>) -> tensor<2xui8>
+  return %0 : tensor<2xui8>
+}
+
+// -----
+
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
 
 // CHECK-LABEL: @reverse
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
index c58b153..21b508e 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
@@ -65,13 +65,13 @@ func.func @main(%t: tensor<?xf32>, %sz: index, %idx: index) -> (f32, f32) {
 
 // -----
 
-func.func @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> {
   func.return %A : tensor<?xf32>
 }
-// CHECK-LABEL: func @return_arg
+// CHECK-LABEL: func private @return_arg
 // CHECK-SAME:      %[[A:.*]]: memref<?xf32
 //  CHECK-NOT:    return %[[A]]
 
-// NO-DROP-LABEL: func @return_arg
+// NO-DROP-LABEL: func private @return_arg
 //  NO-DROP-SAME:     %[[A:.*]]: memref<?xf32
 //       NO-DROP:   return %[[A]]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
index 6054a61..d5f834b 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -171,9 +171,9 @@ func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
 // Bufferization of a function that is reading and writing. %t0 is writable, so
 // no copy should be inserted.
 
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
 //  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
+func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
   // CHECK-NOT: copy
   %f = arith.constant 1.0 : f32
   %c0 = arith.constant 0 : index
@@ -186,9 +186,9 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
   return %0, %1 : tensor<?xf32>, f32
 }
 
-// CHECK-LABEL: func @call_func_with_non_tensor_return(
+// CHECK-LABEL: func private @call_func_with_non_tensor_return(
 //  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
-func.func @call_func_with_non_tensor_return(
+func.func private @call_func_with_non_tensor_return(
     %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) {
   // CHECK-NOT: alloc
   // CHECK-NOT: copy
@@ -203,9 +203,9 @@ func.func @call_func_with_non_tensor_return(
 // Bufferization of a function that is reading and writing. %t0 is not writable,
 // so a copy is needed.
 
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
 //  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
+func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
   // CHECK-NOT: copy
   %f = arith.constant 1.0 : f32
   %c0 = arith.constant 0 : index
@@ -276,10 +276,10 @@ func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
 
 // This function does not read, just write. We need an alloc, but no copy.
 
-// CHECK-LABEL: func @does_not_read(
+// CHECK-LABEL: func private @does_not_read(
 //   CHECK-NOT:   alloc
 //   CHECK-NOT:   copy
-func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
   %f0 = arith.constant 0.0 : f32
   %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
   return %r : tensor<?xf32>
@@ -354,9 +354,9 @@ func.func @main() {
 
 // A write inside an scf.execute_region. An equivalent tensor is yielded.
 
-// CHECK-LABEL: func @execute_region_test(
+// CHECK-LABEL: func private @execute_region_test(
 //  CHECK-SAME:     %[[m1:.*]]: memref<?xf32
-func.func @execute_region_test(%t1 : tensor<?xf32>)
+func.func private @execute_region_test(%t1 : tensor<?xf32>)
     -> (f32, tensor<?xf32>, f32)
 {
   %f1 = arith.constant 0.0 : f32
@@ -397,11 +397,11 @@ func.func @no_inline_execute_region_not_canonicalized() {
 //      CHECK:  func private @some_external_func(memref<?xf32, strided<[?], offset: ?>>)
 func.func private @some_external_func(tensor<?xf32>)
 
-//      CHECK:  func @scf_for_with_tensor_insert_slice(
+//      CHECK:  func private @scf_for_with_tensor_insert_slice(
 // CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
 // CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
 // CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @scf_for_with_tensor_insert_slice(
+func.func private @scf_for_with_tensor_insert_slice(
     %A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>,
     %lb : index, %ub : index, %step : index)
   -> (tensor<?xf32>, tensor<?xf32>)
@@ -456,11 +456,11 @@ func.func @bar(
 
 // -----
 
-//      CHECK:  func @init_and_dot(
+//      CHECK:  func private @init_and_dot(
 // CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
 // CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
 // CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<f32, strided<[], offset: ?>>
-func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
+func.func private @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
   // CHECK-NEXT:   %[[C0:.*]] = arith.constant 0{{.*}} : f32
   %v0 = arith.constant 0.0 : f32
 
@@ -574,9 +574,9 @@ func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i
 
 // No alloc or copy inside of the loop.
 
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
 //  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
   %f = arith.constant 1.0 : f32
   %c0 = arith.constant 0 : index
   // CHECK: memref.store %{{.*}}, %[[arg0]]
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
index 9616a3e..1df15e8 100644
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -10,10 +10,10 @@
 
 // TODO: Some test cases from this file should be moved to other dialects.
 
-// CHECK-LABEL: func @fill_inplace(
+// CHECK-LABEL: func private @fill_inplace(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
-// CHECK-NO-LAYOUT-MAP-LABEL: func @fill_inplace(%{{.*}}: memref<?xf32>) {
-func.func @fill_inplace(
+// CHECK-NO-LAYOUT-MAP-LABEL: func private @fill_inplace(%{{.*}}: memref<?xf32>) {
+func.func private @fill_inplace(
     %A : tensor<?xf32> {bufferization.writable = true})
   -> tensor<?xf32>
 {
@@ -56,10 +56,10 @@ func.func @not_inplace(
 // -----
 
 
-// CHECK-LABEL: func @not_inplace
+// CHECK-LABEL: func private @not_inplace
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?x?xf32, strided<[?, ?], offset: ?>>) {
-// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?x?xf32>) {
-func.func @not_inplace(
+// CHECK-NO-LAYOUT-MAP-LABEL: func private @not_inplace(%{{.*}}: memref<?x?xf32>) {
+func.func private @not_inplace(
     %A : tensor<?x?xf32> {bufferization.writable = true})
   -> tensor<?x?xf32>
 {
@@ -235,7 +235,7 @@ func.func @dominance_violation_bug_1(
 
 // -----
 
-func.func @gather_like(
+func.func private @gather_like(
     %arg0 : tensor<?x?xf32> {bufferization.writable = false},
     %arg1 : tensor<?xi32> {bufferization.writable = false},
     %arg2 : tensor<?x?xf32> {bufferization.writable = true})
@@ -254,7 +254,7 @@ func.func @gather_like(
       } -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
-// CHECK-LABEL: func @gather_like(
+// CHECK-LABEL: func private @gather_like(
 //  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?xf32,
 //  CHECK-SAME:     %[[ARG1:.+]]: memref<?xi32
 //  CHECK-SAME:     %[[ARG2:.+]]: memref<?x?xf32
diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir
new file mode 100644
index 0000000..35355c6
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir
@@ -0,0 +1,102 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=firstprivate})" | FileCheck %s
+
+// CHECK: acc.firstprivate.recipe @firstprivate_scalar : memref<f32> init {
+// CHECK: ^bb0(%{{.*}}: memref<f32>):
+// CHECK:   %[[ALLOC:.*]] = memref.alloca() : memref<f32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<f32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<f32>, %[[DST:.*]]: memref<f32>):
+// CHECK:   memref.copy %[[SRC]], %[[DST]] : memref<f32> to memref<f32>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+  %0 = memref.alloca() {test.var = "scalar"} : memref<f32>
+  return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_static_2d : memref<10x20xf32> init {
+// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>):
+// CHECK:   %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<10x20xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<10x20xf32>, %[[DST:.*]]: memref<10x20xf32>):
+// CHECK:   memref.copy %[[SRC]], %[[DST]] : memref<10x20xf32> to memref<10x20xf32>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_static_2d() {
+  %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32>
+  return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_dynamic_2d : memref<?x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>):
+// CHECK:   %[[C0:.*]] = arith.constant 0 : index
+// CHECK:   %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32>
+// CHECK:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK:   %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32>
+// CHECK:   %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<?x?xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<?x?xf32>, %[[DST:.*]]: memref<?x?xf32>):
+// CHECK:   memref.copy %[[SRC]], %[[DST]] : memref<?x?xf32> to memref<?x?xf32>
+// CHECK:   acc.terminator
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>):
+// CHECK:   memref.dealloc %[[VAL]] : memref<?x?xf32>
+// CHECK:   acc.terminator
+// CHECK: }
+
+func.func @test_dynamic_2d(%arg0: index, %arg1: index) {
+  %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32>
+  return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_mixed_dims : memref<10x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>):
+// CHECK:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK:   %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32>
+// CHECK:   %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<10x?xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<10x?xf32>, %[[DST:.*]]: memref<10x?xf32>):
+// CHECK:   memref.copy %[[SRC]], %[[DST]] : memref<10x?xf32> to memref<10x?xf32>
+// CHECK:   acc.terminator
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>):
+// CHECK:   memref.dealloc %[[VAL]] : memref<10x?xf32>
+// CHECK:   acc.terminator
+// CHECK: }
+
+func.func @test_mixed_dims(%arg0: index) {
+  %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32>
+  return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_scalar_int : memref<i32> init {
+// CHECK: ^bb0(%{{.*}}: memref<i32>):
+// CHECK:   %[[ALLOC:.*]] = memref.alloca() : memref<i32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<i32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<i32>, %[[DST:.*]]: memref<i32>):
+// CHECK:   memref.copy %[[SRC]], %[[DST]] : memref<i32> to memref<i32>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar_int() {
+  %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32>
+  return
+}
+
diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir
new file mode 100644
index 0000000..8403ee8
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir
@@ -0,0 +1,82 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=private})" | FileCheck %s
+
+// CHECK: acc.private.recipe @private_scalar : memref<f32> init {
+// CHECK: ^bb0(%{{.*}}: memref<f32>):
+// CHECK:   %[[ALLOC:.*]] = memref.alloca() : memref<f32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<f32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+  %0 = memref.alloca() {test.var = "scalar"} : memref<f32>
+  return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_static_2d : memref<10x20xf32> init {
+// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>):
+// CHECK:   %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<10x20xf32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_static_2d() {
+  %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32>
+  return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_dynamic_2d : memref<?x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>):
+// CHECK:   %[[C0:.*]] = arith.constant 0 : index
+// CHECK:   %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32>
+// CHECK:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK:   %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32>
+// CHECK:   %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<?x?xf32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>):
+// CHECK:   memref.dealloc %[[VAL]] : memref<?x?xf32>
+// CHECK:   acc.terminator
+// CHECK: }
+
+func.func @test_dynamic_2d(%arg0: index, %arg1: index) {
+  %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32>
+  return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_mixed_dims : memref<10x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>):
+// CHECK:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK:   %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32>
+// CHECK:   %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<10x?xf32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>):
+// CHECK:   memref.dealloc %[[VAL]] : memref<10x?xf32>
+// CHECK:   acc.terminator
+// CHECK: }
+
+func.func @test_mixed_dims(%arg0: index) {
+  %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32>
+  return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_scalar_int : memref<i32> init {
+// CHECK: ^bb0(%{{.*}}: memref<i32>):
+// CHECK:   %[[ALLOC:.*]] = memref.alloca() : memref<i32>
+// CHECK:   acc.yield %[[ALLOC]] : memref<i32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar_int() {
+  %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32>
+  return
+}
+
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index a1067ec..af09dc8 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -8,11 +8,11 @@
 // Test bufferization using memref types that have no layout map.
 // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
 
-// CHECK-LABEL: func @scf_for_yield_only(
+// CHECK-LABEL: func private @scf_for_yield_only(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
 //  CHECK-SAME:   %[[t:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
 //  CHECK-SAME:   ) -> memref<?xf32> {
-func.func @scf_for_yield_only(
+func.func private @scf_for_yield_only(
     %A : tensor<?xf32> {bufferization.writable = false},
     %B : tensor<?xf32> {bufferization.writable = true},
     %lb : index, %ub : index, %step : index)
@@ -85,11 +85,11 @@ func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true},
 
 // -----
 
-// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
+// CHECK-LABEL: func private @scf_for_with_tensor.insert_slice
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
 //  CHECK-SAME:   %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
 //  CHECK-SAME:   %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @scf_for_with_tensor.insert_slice(
+func.func private @scf_for_with_tensor.insert_slice(
     %A : tensor<?xf32> {bufferization.writable = false},
     %B : tensor<?xf32> {bufferization.writable = true},
     %C : tensor<4xf32> {bufferization.writable = false},
@@ -471,11 +471,11 @@ func.func @scf_while_iter_arg_result_mismatch(%arg0: tensor<5xi1>,
 
 // -----
 
-// CHECK-LABEL: func.func @parallel_insert_slice_no_conflict(
+// CHECK-LABEL: func private @parallel_insert_slice_no_conflict(
 //  CHECK-SAME:     %[[idx:.*]]: index, %[[idx2:.*]]: index,
 //  CHECK-SAME:     %[[arg1:.*]]: memref<?xf32, strided{{.*}}>,
 //  CHECK-SAME:     %[[arg2:.*]]: memref<?xf32, strided{{.*}}>
-func.func @parallel_insert_slice_no_conflict(
+func.func private @parallel_insert_slice_no_conflict(
     %idx: index,
     %idx2: index,
     %arg1: tensor<?xf32> {bufferization.writable = true},
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
index 5f95da2..b6c72be 100644
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -8,12 +8,12 @@
 // Test bufferization using memref types that have no layout map.
 // RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
 
-// CHECK-LABEL: func @insert_slice_fun
+// CHECK-LABEL: func private @insert_slice_fun
 //  CHECK-SAME:   %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
 //  CHECK-SAME:   %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
 //  CHECK-SAME:   %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>,
 //  CHECK-SAME:   %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @insert_slice_fun(
+func.func private @insert_slice_fun(
     %A0 : tensor<?xf32> {bufferization.writable = false},
     %A1 : tensor<?xf32> {bufferization.writable = true},
     %t0 : tensor<4xf32> {bufferization.writable = false},
@@ -331,12 +331,12 @@ func.func @dim_not_reading(%t: tensor<?xf32>, %f: f32, %pos: index)
 // -----
 
 //       CHECK: #[[$map:.*]] = affine_map<(d0) -> (d0 + 5)>
-// CHECK-LABEL: func.func @cast_retains_buffer_layout(
+// CHECK-LABEL: func.func private @cast_retains_buffer_layout(
 //  CHECK-SAME:     %[[t:.*]]: memref<?xf32, #[[$map]]>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> {
 //       CHECK:   %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, #[[$map]]> to memref<10xf32, #[[$map]]>
 //       CHECK:   %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, #[[$map]]> to memref<?xf32, strided<[1], offset: 7>>
 //       CHECK:   return %[[slice]]
-func.func @cast_retains_buffer_layout(
+func.func private @cast_retains_buffer_layout(
     %t: tensor<?xf32>
         {bufferization.buffer_layout = affine_map<(d0) -> (d0 + 5)>},
     %sz: index)
@@ -353,12 +353,12 @@ func.func @cast_retains_buffer_layout(
 
 // -----
 
-// CHECK-LABEL: func.func @cast_retains_buffer_layout_strided(
+// CHECK-LABEL: func private @cast_retains_buffer_layout_strided(
 //  CHECK-SAME:     %[[t:.*]]: memref<?xf32, strided<[1], offset: 5>>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> {
 //       CHECK:   %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, strided<[1], offset: 5>> to memref<10xf32, strided<[1], offset: 5>>
 //       CHECK:   %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, strided<[1], offset: 5>> to memref<?xf32, strided<[1], offset: 7>>
 //       CHECK:   return %[[slice]]
-func.func @cast_retains_buffer_layout_strided(
+func.func private @cast_retains_buffer_layout_strided(
     %t: tensor<?xf32>
         {bufferization.buffer_layout = strided<[1], offset: 5>},
     %sz: index)
diff --git a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
index cb16c37..b3154d4 100644
--- a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
+++ b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
@@ -3,7 +3,16 @@
 // RUN: | FileCheck %s
 
 // CHECK: data =
-// CHECK-RAW: [[[7.7,    0,    0], [7.7,    0,    0], [7.7,    0,    0]], [[0,    7.7,    0], [0,    7.7,    0], [0,    7.7,    0]], [[0,    0,    7.7], [0,    0,    7.7], [0,    0,    7.7]]]
+// CHECK{LITERAL}: [[[7.7,    0,    0],
+// CHECK{LITERAL}: [7.7,    0,    0],
+// CHECK{LITERAL}: [7.7,    0,    0]],
+// CHECK{LITERAL}: [[0,    7.7,    0],
+// CHECK{LITERAL}: [0,    7.7,    0],
+// CHECK{LITERAL}: [0,    7.7,    0]],
+// CHECK{LITERAL}: [[0,    0,    7.7],
+// CHECK{LITERAL}: [0,    0,    7.7],
+// CHECK{LITERAL}: [0,    0,    7.7]]]
+
 module attributes {
   gpu.container_module,
   spirv.target_env = #spirv.target_env<
diff --git a/mlir/test/Pass/remark-final.mlir b/mlir/test/Pass/remark-final.mlir
new file mode 100644
index 0000000..325271e
--- /dev/null
+++ b/mlir/test/Pass/remark-final.mlir
@@ -0,0 +1,17 @@
+// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final 2>&1 | FileCheck %s 
+// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final --remark-format=yaml --remarks-output-file=%t.yaml
+// RUN: FileCheck --check-prefix=CHECK-YAML %s < %t.yaml
+module @foo {
+  "test.op"() : () -> ()
+  
+}
+
+// CHECK-YAML-NOT: This is a test passed remark (should be dropped)
+// CHECK-YAML-DAG: !Analysis
+// CHECK-YAML-DAG: !Failure
+// CHECK-YAML-DAG: !Passed
+
+// CHECK-NOT: This is a test passed remark (should be dropped)
+// CHECK-DAG: remark: [Analysis] test-remark
+// CHECK-DAG: remark: [Failure] test-remark | Category:category-2-failed
+// CHECK-DAG: remark: [Passed] test-remark | Category:category-1-passed
diff --git a/mlir/test/lib/Analysis/CMakeLists.txt b/mlir/test/lib/Analysis/CMakeLists.txt
index 9187998..c37671a 100644
--- a/mlir/test/lib/Analysis/CMakeLists.txt
+++ b/mlir/test/lib/Analysis/CMakeLists.txt
@@ -17,6 +17,7 @@ add_mlir_library(MLIRTestAnalysis
   DataFlow/TestDenseForwardDataFlowAnalysis.cpp
   DataFlow/TestLivenessAnalysis.cpp
   DataFlow/TestSparseBackwardDataFlowAnalysis.cpp
+  DataFlow/TestStridedMetadataRangeAnalysis.cpp
 
   EXCLUDE_FROM_LIBMLIR
 
diff --git a/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp
new file mode 100644
index 0000000..6ac09fd
--- /dev/null
+++ b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp
@@ -0,0 +1,86 @@
+//===- TestStridedMetadataRangeAnalysis.cpp - Test strided md analysis ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
+#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
+#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
+#include "mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h"
+#include "mlir/Analysis/DataFlowFramework.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::dataflow;
+
+static void printAnalysisResults(DataFlowSolver &solver, Operation *op,
+                                 raw_ostream &os) {
+  // Collect the strided metadata of the op results.
+  SmallVector<std::pair<unsigned, const StridedMetadataRangeLattice *>> results;
+  for (OpResult result : op->getResults()) {
+    const auto *state = solver.lookupState<StridedMetadataRangeLattice>(result);
+    // Skip the result if it's uninitialized.
+    if (!state || state->getValue().isUninitialized())
+      continue;
+
+    // Skip the result if the range is empty.
+    const mlir::StridedMetadataRange &md = state->getValue();
+    if (md.getOffsets().empty() && md.getSizes().empty() &&
+        md.getStrides().empty())
+      continue;
+    results.push_back({result.getResultNumber(), state});
+  }
+
+  // Early exit if there's no metadata to print.
+  if (results.empty())
+    return;
+
+  // Print the metadata.
+  os << "Op: " << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "\n";
+  for (auto [idx, state] : results)
+    os << "  result[" << idx << "]: " << state->getValue() << "\n";
+  os << "\n";
+}
+
+namespace {
+struct TestStridedMetadataRangeAnalysisPass
+    : public PassWrapper<TestStridedMetadataRangeAnalysisPass,
+                         OperationPass<>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
+      TestStridedMetadataRangeAnalysisPass)
+
+  StringRef getArgument() const override {
+    return "test-strided-metadata-range-analysis";
+  }
+  void runOnOperation() override {
+    Operation *op = getOperation();
+
+    DataFlowSolver solver;
+    solver.load<DeadCodeAnalysis>();
+    solver.load<SparseConstantPropagation>();
+    solver.load<IntegerRangeAnalysis>();
+    solver.load<StridedMetadataRangeAnalysis>();
+    if (failed(solver.initializeAndRun(op)))
+      return signalPassFailure();
+
+    op->walk(
+        [&](Operation *op) { printAnalysisResults(solver, op, llvm::errs()); });
+  }
+};
+} // end anonymous namespace
+
+namespace mlir {
+namespace test {
+void registerTestStridedMetadataRangeAnalysisPass() {
+  PassRegistration<TestStridedMetadataRangeAnalysisPass>();
+}
+} // end namespace test
+} // end namespace mlir
diff --git a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
index f84055d..1e59338 100644
--- a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_mlir_library(MLIROpenACCTestPasses
   TestOpenACC.cpp
   TestPointerLikeTypeInterface.cpp
+  TestRecipePopulate.cpp
   
   EXCLUDE_FROM_LIBMLIR
 )
diff --git a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
index 9886240..bea21b9 100644
--- a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
+++ b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
@@ -15,9 +15,13 @@ namespace test {
 
 // Forward declarations of individual test pass registration functions
 void registerTestPointerLikeTypeInterfacePass();
+void registerTestRecipePopulatePass();
 
 // Unified registration function for all OpenACC tests
-void registerTestOpenACC() { registerTestPointerLikeTypeInterfacePass(); }
+void registerTestOpenACC() {
+  registerTestPointerLikeTypeInterfacePass();
+  registerTestRecipePopulatePass();
+}
 
 } // namespace test
 } // namespace mlir
diff --git a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
index 85f9283..027b0a1 100644
--- a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
+++ b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
@@ -196,13 +196,15 @@ void TestPointerLikeTypeInterfacePass::testGenAllocate(
   newBuilder.setInsertionPointAfter(op);
 
   // Call the genAllocate API
+  bool needsFree = false;
   Value allocRes = pointerType.genAllocate(newBuilder, loc, "test_alloc",
-                                           result.getType(), result);
+                                           result.getType(), result, needsFree);
 
   if (allocRes) {
     llvm::errs() << "Successfully generated alloc for operation: ";
     op->print(llvm::errs());
     llvm::errs() << "\n";
+    llvm::errs() << "\tneeds free: " << (needsFree ? "true" : "false") << "\n";
 
     // Print all operations that were inserted
     for (Operation *insertedOp : tracker.insertedOps) {
@@ -230,8 +232,8 @@ void TestPointerLikeTypeInterfacePass::testGenFree(Operation *op, Value result,
 
   // Call the genFree API
   auto typedResult = cast<TypedValue<PointerLikeType>>(result);
-  bool success =
-      pointerType.genFree(newBuilder, loc, typedResult, result.getType());
+  bool success = pointerType.genFree(newBuilder, loc, typedResult, result,
+                                     result.getType());
 
   if (success) {
     llvm::errs() << "Successfully generated free for operation: ";
diff --git a/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp
new file mode 100644
index 0000000..35f092c
--- /dev/null
+++ b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp
@@ -0,0 +1,110 @@
+//===- TestRecipePopulate.cpp - Test Recipe Population -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains test passes for testing the createAndPopulate methods
+// of the recipe operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace mlir;
+using namespace mlir::acc;
+
+namespace {
+
+struct TestRecipePopulatePass
+    : public PassWrapper<TestRecipePopulatePass, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestRecipePopulatePass)
+
+  TestRecipePopulatePass() = default;
+  TestRecipePopulatePass(const TestRecipePopulatePass &pass)
+      : PassWrapper(pass) {
+    recipeType = pass.recipeType;
+  }
+
+  Pass::Option<std::string> recipeType{
+      *this, "recipe-type",
+      llvm::cl::desc("Recipe type: private or firstprivate"),
+      llvm::cl::init("private")};
+
+  StringRef getArgument() const override { return "test-acc-recipe-populate"; }
+
+  StringRef getDescription() const override {
+    return "Test OpenACC recipe population";
+  }
+
+  void runOnOperation() override;
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<acc::OpenACCDialect>();
+    registry.insert<arith::ArithDialect>();
+    registry.insert<memref::MemRefDialect>();
+  }
+};
+
+void TestRecipePopulatePass::runOnOperation() {
+  auto module = getOperation();
+  OpBuilder builder(&getContext());
+
+  // Collect all test variables
+  SmallVector<std::tuple<Operation *, Value, std::string>> testVars;
+
+  module.walk([&](Operation *op) {
+    if (auto varName = op->getAttrOfType<StringAttr>("test.var")) {
+      for (auto result : op->getResults()) {
+        testVars.push_back({op, result, varName.str()});
+      }
+    }
+  });
+
+  // Generate recipes at module level
+  builder.setInsertionPoint(&module.getBodyRegion().front(),
+                            module.getBodyRegion().front().begin());
+
+  for (auto [op, var, varName] : testVars) {
+    Location loc = op->getLoc();
+
+    std::string recipeName = recipeType.getValue() + "_" + varName;
+    ValueRange bounds; // No bounds for memref tests
+
+    if (recipeType == "private") {
+      auto recipe = PrivateRecipeOp::createAndPopulate(
+          builder, loc, recipeName, var.getType(), varName, bounds);
+
+      if (!recipe) {
+        op->emitError("Failed to create private recipe for ") << varName;
+      }
+    } else if (recipeType == "firstprivate") {
+      auto recipe = FirstprivateRecipeOp::createAndPopulate(
+          builder, loc, recipeName, var.getType(), varName, bounds);
+
+      if (!recipe) {
+        op->emitError("Failed to create firstprivate recipe for ") << varName;
+      }
+    }
+  }
+}
+
+} // namespace
+
+namespace mlir {
+namespace test {
+
+void registerTestRecipePopulatePass() {
+  PassRegistration<TestRecipePopulatePass>();
+}
+
+} // namespace test
+} // namespace mlir
diff --git a/mlir/test/lib/Pass/TestRemarksPass.cpp b/mlir/test/lib/Pass/TestRemarksPass.cpp
index 3b25686..5ca2d1a 100644
--- a/mlir/test/lib/Pass/TestRemarksPass.cpp
+++ b/mlir/test/lib/Pass/TestRemarksPass.cpp
@@ -43,7 +43,12 @@ public:
           << remark::add("This is a test missed remark")
           << remark::reason("because we are testing the remark pipeline")
           << remark::suggest("try using the remark pipeline feature");
-
+      mlir::remark::passed(
+          loc,
+          remark::RemarkOpts::name("test-remark").category("category-1-passed"))
+          << remark::add("This is a test passed remark (should be dropped)")
+          << remark::reason("because we are testing the remark pipeline")
+          << remark::suggest("try using the remark pipeline feature");
       mlir::remark::passed(
           loc,
           remark::RemarkOpts::name("test-remark").category("category-1-passed"))
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index 6432fae..8842180 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -151,6 +151,7 @@ void registerTestSliceAnalysisPass();
 void registerTestSPIRVCPURunnerPipeline();
 void registerTestSPIRVFuncSignatureConversion();
 void registerTestSPIRVVectorUnrolling();
+void registerTestStridedMetadataRangeAnalysisPass();
 void registerTestTensorCopyInsertionPass();
 void registerTestTensorLikeAndBufferLikePass();
 void registerTestTensorTransforms();
@@ -299,6 +300,7 @@ void registerTestPasses() {
   mlir::test::registerTestSPIRVCPURunnerPipeline();
   mlir::test::registerTestSPIRVFuncSignatureConversion();
   mlir::test::registerTestSPIRVVectorUnrolling();
+  mlir::test::registerTestStridedMetadataRangeAnalysisPass();
   mlir::test::registerTestTensorCopyInsertionPass();
   mlir::test::registerTestTensorLikeAndBufferLikePass();
   mlir::test::registerTestTensorTransforms();
diff --git a/mlir/tools/mlir-tblgen/EnumsGen.cpp b/mlir/tools/mlir-tblgen/EnumsGen.cpp
index 11bf9ce..d55ad482 100644
--- a/mlir/tools/mlir-tblgen/EnumsGen.cpp
+++ b/mlir/tools/mlir-tblgen/EnumsGen.cpp
@@ -20,7 +20,6 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/CodeGenHelpers.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
@@ -702,7 +701,11 @@ static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
   StringRef underlyingToSymFnName = enumInfo.getUnderlyingToSymbolFnName();
   auto enumerants = enumInfo.getAllCases();
 
-  llvm::NamespaceEmitter ns(os, cppNamespace);
+  SmallVector<StringRef, 2> namespaces;
+  llvm::SplitString(cppNamespace, namespaces, "::");
+
+  for (auto ns : namespaces)
+    os << "namespace " << ns << " {\n";
 
   // Emit the enum class definition
   emitEnumClass(enumDef, enumName, underlyingType, description, enumerants, os);
@@ -763,7 +766,8 @@ public:
     os << formatv(attrClassDecl, enumName, attrClassName, baseAttrClassName);
   }
 
-  ns.close();
+  for (auto ns : llvm::reverse(namespaces))
+    os << "} // namespace " << ns << "\n";
 
   // Generate a generic parser and printer for the enum.
   std::string qualName =
@@ -786,8 +790,13 @@ static bool emitEnumDecls(const RecordKeeper &records, raw_ostream &os) {
 
 static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
   EnumInfo enumInfo(enumDef);
+  StringRef cppNamespace = enumInfo.getCppNamespace();
 
-  llvm::NamespaceEmitter ns(os, enumInfo.getCppNamespace());
+  SmallVector<StringRef, 2> namespaces;
+  llvm::SplitString(cppNamespace, namespaces, "::");
+
+  for (auto ns : namespaces)
+    os << "namespace " << ns << " {\n";
 
   if (enumInfo.isBitEnum()) {
     emitSymToStrFnForBitEnum(enumDef, os);
@@ -801,6 +810,10 @@ static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
 
   if (enumInfo.genSpecializedAttr())
     emitSpecializedAttrDef(enumDef, os);
+
+  for (auto ns : llvm::reverse(namespaces))
+    os << "} // namespace " << ns << "\n";
+  os << "\n";
 }
 
 static bool emitEnumDefs(const RecordKeeper &records, raw_ostream &os) {
diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
index ab8d534..730b5b2 100644
--- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
@@ -19,7 +19,6 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/CodeGenHelpers.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
@@ -343,7 +342,11 @@ void InterfaceGenerator::emitModelDecl(const Interface &interface) {
 }
 
 void InterfaceGenerator::emitModelMethodsDef(const Interface &interface) {
-  llvm::NamespaceEmitter ns(os, interface.getCppNamespace());
+  llvm::SmallVector<StringRef, 2> namespaces;
+  llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
+  for (StringRef ns : namespaces)
+    os << "namespace " << ns << " {\n";
+
   for (auto &method : interface.getMethods()) {
     os << "template<typename " << valueTemplate << ">\n";
     emitCPPType(method.getReturnType(), os);
@@ -439,11 +442,18 @@ void InterfaceGenerator::emitModelMethodsDef(const Interface &interface) {
                         method.isStatic() ? &ctx : &nonStaticMethodFmt);
     os << "\n}\n";
   }
+
+  for (StringRef ns : llvm::reverse(namespaces))
+    os << "} // namespace " << ns << "\n";
 }
 
 void InterfaceGenerator::emitInterfaceTraitDecl(const Interface &interface) {
-  auto cppNamespace = (interface.getCppNamespace() + "::detail").str();
-  llvm::NamespaceEmitter ns(os, cppNamespace);
+  llvm::SmallVector<StringRef, 2> namespaces;
+  llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
+  for (StringRef ns : namespaces)
+    os << "namespace " << ns << " {\n";
+
+  os << "namespace detail {\n";
 
   StringRef interfaceName = interface.getName();
   auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
@@ -494,6 +504,10 @@ void InterfaceGenerator::emitInterfaceTraitDecl(const Interface &interface) {
     os << tblgen::tgfmt(*extraTraitDecls, &traitMethodFmt) << "\n";
 
   os << "  };\n";
+  os << "}// namespace detail\n";
+
+  for (StringRef ns : llvm::reverse(namespaces))
+    os << "} // namespace " << ns << "\n";
 }
 
 static void emitInterfaceDeclMethods(const Interface &interface,
@@ -519,7 +533,10 @@ static void emitInterfaceDeclMethods(const Interface &interface,
 }
 
 void InterfaceGenerator::forwardDeclareInterface(const Interface &interface) {
-  llvm::NamespaceEmitter ns(os, interface.getCppNamespace());
+  llvm::SmallVector<StringRef, 2> namespaces;
+  llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
+  for (StringRef ns : namespaces)
+    os << "namespace " << ns << " {\n";
 
   // Emit a forward declaration of the interface class so that it becomes usable
   // in the signature of its methods.
@@ -528,10 +545,16 @@ void InterfaceGenerator::forwardDeclareInterface(const Interface &interface) {
 
   StringRef interfaceName = interface.getName();
   os << "class " << interfaceName << ";\n";
+
+  for (StringRef ns : llvm::reverse(namespaces))
+    os << "} // namespace " << ns << "\n";
 }
 
 void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) {
-  llvm::NamespaceEmitter ns(os, interface.getCppNamespace());
+  llvm::SmallVector<StringRef, 2> namespaces;
+  llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
+  for (StringRef ns : namespaces)
+    os << "namespace " << ns << " {\n";
 
   StringRef interfaceName = interface.getName();
   auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
@@ -608,6 +631,9 @@ void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) {
   }
 
   os << "};\n";
+
+  for (StringRef ns : llvm::reverse(namespaces))
+    os << "} // namespace " << ns << "\n";
 }
 
 bool InterfaceGenerator::emitInterfaceDecls() {
diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
index ca291b5..3ead2f0 100644
--- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
+++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
@@ -259,8 +259,8 @@ static void emitInterfaceDecl(const Availability &availability,
   std::string interfaceTraitsName =
       std::string(formatv("{0}Traits", interfaceName));
 
-  llvm::NamespaceEmitter nsEmitter(os,
-                                   availability.getInterfaceClassNamespace());
+  StringRef cppNamespace = availability.getInterfaceClassNamespace();
+  llvm::NamespaceEmitter nsEmitter(os, cppNamespace);
   os << "class " << interfaceName << ";\n\n";
 
   // Emit the traits struct containing the concept and model declarations.
@@ -418,9 +418,15 @@ static void emitAvailabilityQueryForBitEnum(const Record &enumDef,
 static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
   EnumInfo enumInfo(enumDef);
   StringRef enumName = enumInfo.getEnumClassName();
+  StringRef cppNamespace = enumInfo.getCppNamespace();
   auto enumerants = enumInfo.getAllCases();
 
-  llvm::NamespaceEmitter ns(os, enumInfo.getCppNamespace());
+  llvm::SmallVector<StringRef, 2> namespaces;
+  llvm::SplitString(cppNamespace, namespaces, "::");
+
+  for (auto ns : namespaces)
+    os << "namespace " << ns << " {\n";
+
   llvm::StringSet<> handledClasses;
 
   // Place all availability specifications to their corresponding
@@ -435,6 +441,9 @@ static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
                     enumName);
       handledClasses.insert(className);
     }
+
+  for (auto ns : llvm::reverse(namespaces))
+    os << "} // namespace " << ns << "\n";
 }
 
 static bool emitEnumDecls(const RecordKeeper &records, raw_ostream &os) {
@@ -450,19 +459,31 @@ static bool emitEnumDecls(const RecordKeeper &records, raw_ostream &os) {
 
 static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
   EnumInfo enumInfo(enumDef);
-  llvm::NamespaceEmitter ns(os, enumInfo.getCppNamespace());
+  StringRef cppNamespace = enumInfo.getCppNamespace();
+
+  llvm::SmallVector<StringRef, 2> namespaces;
+  llvm::SplitString(cppNamespace, namespaces, "::");
+
+  for (auto ns : namespaces)
+    os << "namespace " << ns << " {\n";
 
-  if (enumInfo.isBitEnum())
+  if (enumInfo.isBitEnum()) {
     emitAvailabilityQueryForBitEnum(enumDef, os);
-  else
+  } else {
     emitAvailabilityQueryForIntEnum(enumDef, os);
+  }
+
+  for (auto ns : llvm::reverse(namespaces))
+    os << "} // namespace " << ns << "\n";
+  os << "\n";
 }
 
 static bool emitEnumDefs(const RecordKeeper &records, raw_ostream &os) {
   llvm::emitSourceFileHeader("SPIR-V Enum Availability Definitions", os,
                              records);
 
-  for (const Record *def : records.getAllDerivedDefinitions("EnumInfo"))
+  auto defs = records.getAllDerivedDefinitions("EnumInfo");
+  for (const auto *def : defs)
     emitEnumDef(*def, os);
 
   return false;
diff --git a/mlir/unittests/IR/RemarkTest.cpp b/mlir/unittests/IR/RemarkTest.cpp
index bcbda90..09c576c 100644
--- a/mlir/unittests/IR/RemarkTest.cpp
+++ b/mlir/unittests/IR/RemarkTest.cpp
@@ -53,10 +53,12 @@ TEST(Remark, TestOutputOptimizationRemark) {
                                         /*missed=*/categoryUnroll,
                                         /*analysis=*/categoryRegister,
                                         /*failed=*/categoryInliner};
-
+    std::unique_ptr<remark::RemarkEmittingPolicyAll> policy =
+        std::make_unique<remark::RemarkEmittingPolicyAll>();
     LogicalResult isEnabled =
         mlir::remark::enableOptimizationRemarksWithLLVMStreamer(
-            context, yamlFile, llvm::remarks::Format::YAML, cats);
+            context, yamlFile, llvm::remarks::Format::YAML, std::move(policy),
+            cats);
     ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine";
 
     // PASS: something succeeded
@@ -202,9 +204,10 @@ TEST(Remark, TestOutputOptimizationRemarkDiagnostic) {
                                         /*missed=*/categoryUnroll,
                                         /*analysis=*/categoryRegister,
                                         /*failed=*/categoryUnroll};
-
-    LogicalResult isEnabled =
-        remark::enableOptimizationRemarks(context, nullptr, cats, true);
+    std::unique_ptr<remark::RemarkEmittingPolicyAll> policy =
+        std::make_unique<remark::RemarkEmittingPolicyAll>();
+    LogicalResult isEnabled = remark::enableOptimizationRemarks(
+        context, nullptr, std::move(policy), cats, true);
 
     ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine";
 
@@ -282,8 +285,11 @@ TEST(Remark, TestCustomOptimizationRemarkDiagnostic) {
                                         /*analysis=*/std::nullopt,
                                         /*failed=*/categoryLoopunroll};
 
+    std::unique_ptr<remark::RemarkEmittingPolicyAll> policy =
+        std::make_unique<remark::RemarkEmittingPolicyAll>();
     LogicalResult isEnabled = remark::enableOptimizationRemarks(
-        context, std::make_unique<MyCustomStreamer>(), cats, true);
+        context, std::make_unique<MyCustomStreamer>(), std::move(policy), cats,
+        true);
     ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine";
 
     // Remark 1: pass, category LoopUnroll
@@ -311,4 +317,66 @@ TEST(Remark, TestCustomOptimizationRemarkDiagnostic) {
   EXPECT_NE(errOut.find(pass2Msg), std::string::npos); // printed
   EXPECT_EQ(errOut.find(pass3Msg), std::string::npos); // filtered out
 }
+
+TEST(Remark, TestRemarkFinal) {
+  testing::internal::CaptureStderr();
+  const auto *pass1Msg = "I failed";
+  const auto *pass2Msg = "I failed too";
+  const auto *pass3Msg = "I succeeded";
+  const auto *pass4Msg = "I succeeded too";
+
+  std::string categoryLoopunroll("LoopUnroll");
+
+  std::string seenMsg = "";
+
+  {
+    MLIRContext context;
+    Location loc = FileLineColLoc::get(&context, "test.cpp", 1, 5);
+    Location locOther = FileLineColLoc::get(&context, "test.cpp", 55, 5);
+
+    // Setup the remark engine
+    mlir::remark::RemarkCategories cats{/*all=*/"",
+                                        /*passed=*/categoryLoopunroll,
+                                        /*missed=*/categoryLoopunroll,
+                                        /*analysis=*/categoryLoopunroll,
+                                        /*failed=*/categoryLoopunroll};
+
+    std::unique_ptr<remark::RemarkEmittingPolicyFinal> policy =
+        std::make_unique<remark::RemarkEmittingPolicyFinal>();
+    LogicalResult isEnabled = remark::enableOptimizationRemarks(
+        context, std::make_unique<MyCustomStreamer>(), std::move(policy), cats,
+        true);
+    ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine";
+
+    // Remark 1: failure
+    remark::failed(
+        loc, remark::RemarkOpts::name("Unroller").category(categoryLoopunroll))
+        << pass1Msg;
+
+    // Remark 2: failure
+    remark::missed(
+        loc, remark::RemarkOpts::name("Unroller").category(categoryLoopunroll))
+        << remark::reason(pass2Msg);
+
+    // Remark 3: pass
+    remark::passed(
+        loc, remark::RemarkOpts::name("Unroller").category(categoryLoopunroll))
+        << pass3Msg;
+
+    // Remark 4: pass
+    remark::passed(
+        locOther,
+        remark::RemarkOpts::name("Unroller").category(categoryLoopunroll))
+        << pass4Msg;
+  }
+
+  llvm::errs().flush();
+  std::string errOut = ::testing::internal::GetCapturedStderr();
+
+  // Containment checks for messages.
+  EXPECT_EQ(errOut.find(pass1Msg), std::string::npos); // dropped
+  EXPECT_EQ(errOut.find(pass2Msg), std::string::npos); // dropped
+  EXPECT_NE(errOut.find(pass3Msg), std::string::npos); // shown
+  EXPECT_NE(errOut.find(pass4Msg), std::string::npos); // shown
+}
 } // namespace
diff --git a/mlir/utils/generate-test-checks.py b/mlir/utils/generate-test-checks.py
index f80a181..3712a6b 100755
--- a/mlir/utils/generate-test-checks.py
+++ b/mlir/utils/generate-test-checks.py
@@ -31,13 +31,16 @@ import argparse
 import os  # Used to advertise this file's name ("autogenerated_note").
 import re
 import sys
+from collections import Counter
 
 ADVERT_BEGIN = "// NOTE: Assertions have been autogenerated by "
 ADVERT_END = """
-// The script is designed to make adding checks to
-// a test case fast, it is *not* designed to be authoritative
-// about what constitutes a good test! The CHECK should be
-// minimized and named to reflect the test intent.
+// This script is intended to make adding checks to a test case quick and easy.
+// It is *not* authoritative about what constitutes a good test. After using the
+// script, be sure to review and refine the generated checks. For example,
+// CHECK lines should be minimized and named to reflect the test’s intent.
+// For comprehensive guidelines, see:
+//   * https://mlir.llvm.org/getting_started/TestingGuide/
 """
 
 
@@ -45,6 +48,9 @@ ADVERT_END = """
 SSA_RE_STR = "[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*"
 SSA_RE = re.compile(SSA_RE_STR)
 
+# Regex matching `dialect.op_name` (e.g. `vector.transfer_read`).
+SSA_OP_NAME_RE = re.compile(r"\b(?:\s=\s[a-z_]+)[.]([a-z_]+)\b")
+
 # Regex matching the left-hand side of an assignment
 SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*='
 SSA_RESULTS_RE = re.compile(SSA_RESULTS_STR)
@@ -63,7 +69,12 @@ ATTR_DEF_RE = re.compile(ATTR_DEF_RE_STR)
 class VariableNamer:
     def __init__(self, variable_names):
         self.scopes = []
+        # Counter for generic FileCHeck names, e.g. VAL_#N
         self.name_counter = 0
+        # Counters for FileCheck names derived from Op names, e.g.
+        # TRANSFER_READ_#N (based on `vector.transfer_read`). Note, there's a
+        # dedicated counter for every Op type present in the input.
+        self.op_name_counter = Counter()
 
         # Number of variable names to still generate in parent scope
         self.generate_in_parent_scope_left = 0
@@ -77,17 +88,29 @@ class VariableNamer:
         self.generate_in_parent_scope_left = n
 
     # Generate a substitution name for the given ssa value name.
-    def generate_name(self, source_variable_name, use_ssa_name):
+    def generate_name(self, source_variable_name, use_ssa_name, op_name=""):
 
         # Compute variable name
-        variable_name = self.variable_names.pop(0) if len(self.variable_names) > 0 else ''
-        if variable_name == '':
+        variable_name = (
+            self.variable_names.pop(0) if len(self.variable_names) > 0 else ""
+        )
+        if variable_name == "":
             # If `use_ssa_name` is set, use the MLIR SSA value name to generate
             # a FileCHeck substation string. As FileCheck requires these
             # strings to start with a character, skip MLIR variables starting
             # with a digit (e.g. `%0`).
+            #
+            # The next fallback option is to use the op name, if the
+            # corresponding match succeeds.
+            #
+            # If neither worked, use a generic name: `VAL_#N`.
             if use_ssa_name and source_variable_name[0].isalpha():
                 variable_name = source_variable_name.upper()
+            elif op_name != "":
+                variable_name = (
+                    op_name.upper() + "_" + str(self.op_name_counter[op_name])
+                )
+                self.op_name_counter[op_name] += 1
             else:
                 variable_name = "VAL_" + str(self.name_counter)
                 self.name_counter += 1
@@ -123,6 +146,7 @@ class VariableNamer:
     def clear_names(self):
         self.name_counter = 0
         self.used_variable_names = set()
+        self.op_name_counter.clear()
 
 class AttributeNamer:
 
@@ -170,8 +194,12 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re
 
     # Process the rest that contained an SSA value name.
     for chunk in line_chunks:
-        m = SSA_RE.match(chunk)
-        ssa_name = m.group(0) if m is not None else ''
+        ssa = SSA_RE.match(chunk)
+        op_name_with_dialect = SSA_OP_NAME_RE.search(chunk)
+        ssa_name = ssa.group(0) if ssa is not None else ""
+        op_name = (
+            op_name_with_dialect.group(1) if op_name_with_dialect is not None else ""
+        )
 
         # Check if an existing variable exists for this name.
         variable = None
@@ -185,7 +213,7 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re
             output_line += "%[[" + variable + "]]"
         else:
             # Otherwise, generate a new variable.
-            variable = variable_namer.generate_name(ssa_name, use_ssa_name)
+            variable = variable_namer.generate_name(ssa_name, use_ssa_name, op_name)
             if strict_name_re:
                 # Use stricter regexp for the variable name, if requested.
                 # Greedy matching may cause issues with the generic '.*'
diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index ac27d85..b472236 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -140,9 +140,10 @@ def ol_dimensions_t : Struct {
 }
 
 def olInit : Function {
-  let desc = "Perform initialization of the Offload library and plugins";
+  let desc = "Perform initialization of the Offload library";
   let details = [
     "This must be the first API call made by a user of the Offload library",
+    "The underlying platforms are lazily initialized on their first use"
     "Each call will increment an internal reference count that is decremented by `olShutDown`"
   ];
   let params = [];
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index c549ae0..6d22fae 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -42,9 +42,7 @@ using namespace error;
 struct ol_platform_impl_t {
   ol_platform_impl_t(std::unique_ptr<GenericPluginTy> Plugin,
                      ol_platform_backend_t BackendType)
-      : Plugin(std::move(Plugin)), BackendType(BackendType) {}
-  std::unique_ptr<GenericPluginTy> Plugin;
-  llvm::SmallVector<std::unique_ptr<ol_device_impl_t>> Devices;
+      : BackendType(BackendType), Plugin(std::move(Plugin)) {}
   ol_platform_backend_t BackendType;
 
   /// Complete all pending work for this platform and perform any needed
@@ -53,6 +51,14 @@ struct ol_platform_impl_t {
   /// After calling this function, no liboffload functions should be called with
   /// this platform handle.
   llvm::Error destroy();
+
+  /// Initialize the associated plugin and devices.
+  llvm::Error init();
+
+  /// Direct access to the plugin, may be uninitialized if accessed here.
+  std::unique_ptr<GenericPluginTy> Plugin;
+
+  llvm::SmallVector<std::unique_ptr<ol_device_impl_t>> Devices;
 };
 
 // Handle type definitions. Ideally these would be 1:1 with the plugins, but
@@ -130,6 +136,28 @@ llvm::Error ol_platform_impl_t::destroy() {
   return Result;
 }
 
+llvm::Error ol_platform_impl_t::init() {
+  if (!Plugin)
+    return llvm::Error::success();
+
+  if (llvm::Error Err = Plugin->init())
+    return Err;
+
+  for (auto Id = 0, End = Plugin->getNumDevices(); Id != End; Id++) {
+    if (llvm::Error Err = Plugin->initDevice(Id))
+      return Err;
+
+    auto Device = &Plugin->getDevice(Id);
+    auto Info = Device->obtainInfoImpl();
+    if (llvm::Error Err = Info.takeError())
+      return Err;
+    Devices.emplace_back(std::make_unique<ol_device_impl_t>(Id, Device, *this,
+                                                            std::move(*Info)));
+  }
+
+  return llvm::Error::success();
+}
+
 struct ol_queue_impl_t {
   ol_queue_impl_t(__tgt_async_info *AsyncInfo, ol_device_handle_t Device)
       : AsyncInfo(AsyncInfo), Device(Device), Id(IdCounter++) {}
@@ -207,15 +235,11 @@ struct OffloadContext {
   std::mutex AllocInfoMapMutex{};
   // Partitioned list of memory base addresses. Each element in this list is a
   // key in AllocInfoMap
-  llvm::SmallVector<void *> AllocBases{};
+  SmallVector<void *> AllocBases{};
   SmallVector<std::unique_ptr<ol_platform_impl_t>, 4> Platforms{};
+  ol_device_handle_t HostDevice;
   size_t RefCount;
 
-  ol_device_handle_t HostDevice() {
-    // The host platform is always inserted last
-    return Platforms.back()->Devices[0].get();
-  }
-
   static OffloadContext &get() {
     assert(OffloadContextVal);
     return *OffloadContextVal;
@@ -259,28 +283,21 @@ Error initPlugins(OffloadContext &Context) {
   } while (false);
 #include "Shared/Targets.def"
 
-  // Preemptively initialize all devices in the plugin
+  // Eagerly initialize all of the plugins and devices. We need to make sure
+  // that the platform is initialized at a consistent point to maintain the
+  // expected teardown order in the vendor libraries.
   for (auto &Platform : Context.Platforms) {
-    auto Err = Platform->Plugin->init();
-    [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
-    for (auto DevNum = 0; DevNum < Platform->Plugin->number_of_devices();
-         DevNum++) {
-      if (Platform->Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) {
-        auto Device = &Platform->Plugin->getDevice(DevNum);
-        auto Info = Device->obtainInfoImpl();
-        if (auto Err = Info.takeError())
-          return Err;
-        Platform->Devices.emplace_back(std::make_unique<ol_device_impl_t>(
-            DevNum, Device, *Platform, std::move(*Info)));
-      }
-    }
+    if (Error Err = Platform->init())
+      return Err;
   }
 
-  // Add the special host device
+  // Add the special host device.
   auto &HostPlatform = Context.Platforms.emplace_back(
       std::make_unique<ol_platform_impl_t>(nullptr, OL_PLATFORM_BACKEND_HOST));
-  HostPlatform->Devices.emplace_back(std::make_unique<ol_device_impl_t>(
-      -1, nullptr, *HostPlatform, InfoTreeNode{}));
+  Context.HostDevice = HostPlatform->Devices
+                           .emplace_back(std::make_unique<ol_device_impl_t>(
+                               -1, nullptr, *HostPlatform, InfoTreeNode{}))
+                           .get();
 
   Context.TracingEnabled = std::getenv("OFFLOAD_TRACE");
   Context.ValidationEnabled = !std::getenv("OFFLOAD_DISABLE_VALIDATION");
@@ -312,16 +329,16 @@ Error olShutDown_impl() {
   if (--OffloadContext::get().RefCount != 0)
     return Error::success();
 
-  llvm::Error Result = Error::success();
+  Error Result = Error::success();
   auto *OldContext = OffloadContextVal.exchange(nullptr);
 
-  for (auto &P : OldContext->Platforms) {
+  for (auto &Platform : OldContext->Platforms) {
     // Host plugin is nullptr and has no deinit
-    if (!P->Plugin || !P->Plugin->is_initialized())
+    if (!Platform->Plugin || !Platform->Plugin->is_initialized())
       continue;
 
-    if (auto Res = P->destroy())
-      Result = llvm::joinErrors(std::move(Result), std::move(Res));
+    if (auto Res = Platform->destroy())
+      Result = joinErrors(std::move(Result), std::move(Res));
   }
 
   delete OldContext;
@@ -334,6 +351,8 @@ Error olGetPlatformInfoImplDetail(ol_platform_handle_t Platform,
   InfoWriter Info(PropSize, PropValue, PropSizeRet);
   bool IsHost = Platform->BackendType == OL_PLATFORM_BACKEND_HOST;
 
+  // Note that the plugin is potentially uninitialized here. It will need to be
+  // initialized once info is added that requires it to be initialized.
   switch (PropName) {
   case OL_PLATFORM_INFO_NAME:
     return Info.writeString(IsHost ? "Host" : Platform->Plugin->getName());
@@ -373,12 +392,12 @@ Error olGetPlatformInfoSize_impl(ol_platform_handle_t Platform,
 Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
                                 ol_device_info_t PropName, size_t PropSize,
                                 void *PropValue, size_t *PropSizeRet) {
-  assert(Device != OffloadContext::get().HostDevice());
+  assert(Device != OffloadContext::get().HostDevice);
   InfoWriter Info(PropSize, PropValue, PropSizeRet);
 
   auto makeError = [&](ErrorCode Code, StringRef Err) {
     std::string ErrBuffer;
-    llvm::raw_string_ostream(ErrBuffer) << PropName << ": " << Err;
+    raw_string_ostream(ErrBuffer) << PropName << ": " << Err;
     return Plugin::error(ErrorCode::UNIMPLEMENTED, ErrBuffer.c_str());
   };
 
@@ -511,7 +530,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
 Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
                                     ol_device_info_t PropName, size_t PropSize,
                                     void *PropValue, size_t *PropSizeRet) {
-  assert(Device == OffloadContext::get().HostDevice());
+  assert(Device == OffloadContext::get().HostDevice);
   InfoWriter Info(PropSize, PropValue, PropSizeRet);
 
   constexpr auto uint32_max = std::numeric_limits<uint32_t>::max();
@@ -579,7 +598,7 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
 
 Error olGetDeviceInfo_impl(ol_device_handle_t Device, ol_device_info_t PropName,
                            size_t PropSize, void *PropValue) {
-  if (Device == OffloadContext::get().HostDevice())
+  if (Device == OffloadContext::get().HostDevice)
     return olGetDeviceInfoImplDetailHost(Device, PropName, PropSize, PropValue,
                                          nullptr);
   return olGetDeviceInfoImplDetail(Device, PropName, PropSize, PropValue,
@@ -588,7 +607,7 @@ Error olGetDeviceInfo_impl(ol_device_handle_t Device, ol_device_info_t PropName,
 
 Error olGetDeviceInfoSize_impl(ol_device_handle_t Device,
                                ol_device_info_t PropName, size_t *PropSizeRet) {
-  if (Device == OffloadContext::get().HostDevice())
+  if (Device == OffloadContext::get().HostDevice)
     return olGetDeviceInfoImplDetailHost(Device, PropName, 0, nullptr,
                                          PropSizeRet);
   return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
@@ -598,7 +617,7 @@ Error olIterateDevices_impl(ol_device_iterate_cb_t Callback, void *UserData) {
   for (auto &Platform : OffloadContext::get().Platforms) {
     for (auto &Device : Platform->Devices) {
       if (!Callback(Device.get(), UserData)) {
-        break;
+        return Error::success();
       }
     }
   }
@@ -949,7 +968,7 @@ Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) {
 Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
                     ol_device_handle_t DstDevice, const void *SrcPtr,
                     ol_device_handle_t SrcDevice, size_t Size) {
-  auto Host = OffloadContext::get().HostDevice();
+  auto Host = OffloadContext::get().HostDevice;
   if (DstDevice == Host && SrcDevice == Host) {
     if (!Queue) {
       std::memcpy(DstPtr, SrcPtr, Size);
@@ -1138,7 +1157,7 @@ Error olGetSymbolInfoImplDetail(ol_symbol_handle_t Symbol,
   auto CheckKind = [&](ol_symbol_kind_t Required) {
     if (Symbol->Kind != Required) {
       std::string ErrBuffer;
-      llvm::raw_string_ostream(ErrBuffer)
+      raw_string_ostream(ErrBuffer)
           << PropName << ": Expected a symbol of Kind " << Required
           << " but given a symbol of Kind " << Symbol->Kind;
       return Plugin::error(ErrorCode::SYMBOL_KIND, ErrBuffer.c_str());
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 61c30b8..c83ab59 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5227,6 +5227,7 @@ cc_library(
         ":IR",
         ":InferTypeOpInterface",
         ":InliningUtils",
+        ":LLVMDialectBytecodeIncGen",
         ":LLVMDialectInterfaceIncGen",
         ":LLVMIntrinsicOpsIncGen",
         ":LLVMOpsIncGen",
@@ -5679,6 +5680,16 @@ td_library(
     ],
 )
 
+td_library(
+    name = "LLVMDialectBytecodeTdFiles",
+    srcs = ["include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.td"],
+    includes = ["include"],
+    deps = [
+        ":BytecodeTdFiles",
+        ":LLVMOpsTdFiles",
+    ],
+)
+
 cc_library(
     name = "GPUCommonTransforms",
     hdrs = [
@@ -6032,6 +6043,17 @@ gentbl_cc_library(
 )
 
 gentbl_cc_library(
+    name = "LLVMDialectBytecodeIncGen",
+    tbl_outs = {"include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.cpp.inc": [
+        "-gen-bytecode",
+        "-bytecode-dialect=LLVM",
+    ]},
+    tblgen = ":mlir-tblgen",
+    td_file = "include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.td",
+    deps = [":LLVMDialectBytecodeTdFiles"],
+)
+
+gentbl_cc_library(
     name = "LLVMTypesIncGen",
     tbl_outs = {
         "include/mlir/Dialect/LLVMIR/LLVMTypes.h.inc": [