diff options
376 files changed, 17832 insertions, 2209 deletions
diff --git a/.ci/generate_test_report_lib.py b/.ci/generate_test_report_lib.py index 5026c29..36c9585 100644 --- a/.ci/generate_test_report_lib.py +++ b/.ci/generate_test_report_lib.py @@ -98,6 +98,23 @@ def _format_ninja_failures(ninja_failures: list[tuple[str, str]]) -> list[str]: ) return output +def get_failures(junit_objects) -> dict[str, list[tuple[str, str]]]: + failures = {} + for results in junit_objects: + for testsuite in results: + for test in testsuite: + if ( + not test.is_passed + and test.result + and isinstance(test.result[0], Failure) + ): + if failures.get(testsuite.name) is None: + failures[testsuite.name] = [] + failures[testsuite.name].append( + (test.classname + "/" + test.name, test.result[0].text) + ) + return failures + # Set size_limit to limit the byte size of the report. The default is 1MB as this # is the most that can be put into an annotation. If the generated report exceeds @@ -113,7 +130,7 @@ def generate_report( size_limit=1024 * 1024, list_failures=True, ): - failures = {} + failures = get_failures(junit_objects) tests_run = 0 tests_skipped = 0 tests_failed = 0 @@ -124,18 +141,6 @@ def generate_report( tests_skipped += testsuite.skipped tests_failed += testsuite.failures - for test in testsuite: - if ( - not test.is_passed - and test.result - and isinstance(test.result[0], Failure) - ): - if failures.get(testsuite.name) is None: - failures[testsuite.name] = [] - failures[testsuite.name].append( - (test.classname + "/" + test.name, test.result[0].text) - ) - report = [f"# {title}", ""] if tests_run == 0: @@ -258,7 +263,7 @@ def generate_report( return report -def generate_report_from_files(title, return_code, build_log_files): +def load_info_from_files(build_log_files): junit_files = [ junit_file for junit_file in build_log_files if junit_file.endswith(".xml") ] @@ -271,6 +276,9 @@ def generate_report_from_files(title, return_code, build_log_files): ninja_logs.append( [log_line.strip() for log_line in ninja_log_file_handle.readlines()] ) - return generate_report( - title, return_code, [JUnitXml.fromfile(p) for p in junit_files], ninja_logs - ) + return [JUnitXml.fromfile(p) for p in junit_files], ninja_logs + + +def generate_report_from_files(title, return_code, build_log_files): + junit_objects, ninja_logs = load_info_from_files(build_log_files) + return generate_report(title, return_code, junit_objects, ninja_logs) diff --git a/.github/workflows/containers/github-action-ci-windows/Dockerfile b/.github/workflows/containers/github-action-ci-windows/Dockerfile index 640d34d..9ddf501 100644 --- a/.github/workflows/containers/github-action-ci-windows/Dockerfile +++ b/.github/workflows/containers/github-action-ci-windows/Dockerfile @@ -90,7 +90,7 @@ RUN powershell -Command \ RUN git config --system core.longpaths true & \ git config --global core.autocrlf false -ARG RUNNER_VERSION=2.328.0 +ARG RUNNER_VERSION=2.329.0 ENV RUNNER_VERSION=$RUNNER_VERSION RUN powershell -Command \ diff --git a/.github/workflows/containers/github-action-ci/Dockerfile b/.github/workflows/containers/github-action-ci/Dockerfile index 4540d64..1b376dd 100644 --- a/.github/workflows/containers/github-action-ci/Dockerfile +++ b/.github/workflows/containers/github-action-ci/Dockerfile @@ -99,7 +99,7 @@ WORKDIR /home/gha FROM ci-container AS ci-container-agent -ENV GITHUB_RUNNER_VERSION=2.328.0 +ENV GITHUB_RUNNER_VERSION=2.329.0 RUN mkdir actions-runner && \ cd actions-runner && \ diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml index 1e0dc70..2b85d8b 100644 --- a/.github/workflows/pr-code-format.yml +++ b/.github/workflows/pr-code-format.yml @@ -12,6 +12,8 @@ on: jobs: code_formatter: runs-on: ubuntu-24.04 + container: + image: 'ghcr.io/llvm/ci-ubuntu-24.04-format' timeout-minutes: 30 concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number }} @@ -23,6 +25,14 @@ jobs: with: fetch-depth: 2 + # We need to set the repo checkout as safe, otherwise tj-actions/changed-files + # will fail due to the changed ownership inside the container. + # TODO(boomanaiden154): We should probably fix this by having the default user + # in the container have the same ID as the GHA user on the host. + - name: Set Safe Directory + run: | + chown -R root $(pwd) + - name: Get changed files id: changed-files uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5 @@ -39,24 +49,6 @@ jobs: echo "Formatting files:" echo "$CHANGED_FILES" - # The clang format version should always be upgraded to the first version - # of a release cycle (x.1.0) or the last version of a release cycle, or - # if there have been relevant clang-format backports. - - name: Install clang-format - uses: aminya/setup-cpp@a276e6e3d1db9160db5edc458e99a30d3b109949 # v1.7.1 - with: - clangformat: 21.1.0 - - - name: Setup Python env - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.13' - cache: 'pip' - cache-dependency-path: 'llvm/utils/git/requirements_formatting.txt' - - - name: Install python dependencies - run: pip install -r llvm/utils/git/requirements_formatting.txt - - name: Run code formatter env: GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }} diff --git a/bolt/docs/BinaryAnalysis.md b/bolt/docs/BinaryAnalysis.md index b13410c..07f096e 100644 --- a/bolt/docs/BinaryAnalysis.md +++ b/bolt/docs/BinaryAnalysis.md @@ -1,7 +1,7 @@ # BOLT-based binary analysis As part of post-link-time optimizing, BOLT needs to perform a range of analyses -on binaries such as recontructing control flow graphs, and more. +on binaries such as reconstructing control flow graphs, and more. The `llvm-bolt-binary-analysis` tool enables running requested binary analyses on binaries, and generating reports. It does this by building on top of the diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 151399d..43cecee 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -375,7 +375,7 @@ - `--use-old-text` - Re-use space in old .text if possible (relocation mode) + Reuse space in old .text if possible (relocation mode) - `-v <uint>` diff --git a/bolt/docs/RuntimeLibrary.md b/bolt/docs/RuntimeLibrary.md index 58d9497..b969ebd 100644 --- a/bolt/docs/RuntimeLibrary.md +++ b/bolt/docs/RuntimeLibrary.md @@ -15,7 +15,7 @@ However, this approach quickly becomes awkward if we want to insert a lot of cod Currently, our runtime library is written in C++ and contains code that helps us instrument a binary. ### Limitations -Our library is not written with regular C++ code as it is not linked against any other libraries (this means we cannnot rely on anything defined on libstdc++, glibc, libgcc etc), but is self sufficient. In runtime/CMakeLists.txt, we can see it is built with -ffreestanding, which requires the compiler to avoid using a runtime library by itself. +Our library is not written with regular C++ code as it is not linked against any other libraries (this means we cannot rely on anything defined on libstdc++, glibc, libgcc etc), but is self sufficient. In runtime/CMakeLists.txt, we can see it is built with -ffreestanding, which requires the compiler to avoid using a runtime library by itself. While this requires us to make our own syscalls, it does simplify our linker a lot, which is very limited and can only do basic function name resolving. However, this is a big improvement in comparison with programmatically generating the code in assembly language using MCInsts. diff --git a/bolt/docs/doxygen.cfg.in b/bolt/docs/doxygen.cfg.in index 538285f..de8b1f7b 100644 --- a/bolt/docs/doxygen.cfg.in +++ b/bolt/docs/doxygen.cfg.in @@ -1070,7 +1070,7 @@ HTML_STYLESHEET = # defined cascading style sheet that is included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefor more robust against future updates. +# standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet file to the output directory. For an example # see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index f5e9887..7b10b2d 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -1336,7 +1336,7 @@ public: ColdCodeSectionName = Name.str(); } - /// Return true iif the function will halt execution on entry. + /// Return true if the function will halt execution on entry. bool trapsOnEntry() const { return TrapsOnEntry; } /// Make the function always trap on entry. Other than the trap instruction, diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h index 4c3c277..95e958f 100644 --- a/bolt/include/bolt/Core/DIEBuilder.h +++ b/bolt/include/bolt/Core/DIEBuilder.h @@ -60,7 +60,7 @@ public: uint32_t UnitLength = 0; bool IsConstructed = false; // A map of DIE offsets in original DWARF section to DIE ID. - // Whih is used to access DieInfoVector. + // Which is used to access DieInfoVector. std::unordered_map<uint64_t, uint32_t> DIEIDMap; // Some STL implementations don't have a noexcept move constructor for diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h index 8149789..7c8ea12ee 100644 --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -326,8 +326,8 @@ public: /// Write out entries in to .debug_addr section for CUs. virtual std::optional<uint64_t> finalize(const size_t BufferSize); - /// Return buffer with all the entries in .debug_addr already writen out using - /// update(...). + /// Return buffer with all the entries in .debug_addr already written out + /// using update(...). virtual std::unique_ptr<AddressSectionBuffer> releaseBuffer() { return std::move(Buffer); } @@ -409,7 +409,7 @@ protected: std::mutex WriterMutex; std::unique_ptr<AddressSectionBuffer> Buffer; std::unique_ptr<raw_svector_ostream> AddressStream; - /// Used to track sections that were not modified so that they can be re-used. + /// Used to track sections that were not modified so that they can be reused. static DenseMap<uint64_t, uint64_t> UnmodifiedAddressOffsets; }; diff --git a/bolt/include/bolt/Core/DebugNames.h b/bolt/include/bolt/Core/DebugNames.h index cc4e13a..4ec49ca 100644 --- a/bolt/include/bolt/Core/DebugNames.h +++ b/bolt/include/bolt/Core/DebugNames.h @@ -65,7 +65,7 @@ public: void setCurrentUnit(DWARFUnit &Unit, const uint64_t UnitStartOffset); /// Emit Accelerator table. void emitAccelTable(); - /// Returns true if the table was crated. + /// Returns true if the table was created. bool isCreated() const { return NeedToCreate; } /// Returns buffer containing the accelerator table. std::unique_ptr<DebugBufferVector> releaseBuffer() { @@ -91,7 +91,7 @@ private: uint64_t CurrentUnitOffset = 0; const DWARFUnit *CurrentUnit = nullptr; std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap; - /// Contains a map of TU hashes to a Foreign TU indecies. + /// Contains a map of TU hashes to a Foreign TU indices. /// This is used to reduce the size of Foreign TU list since there could be /// multiple TUs with the same hash. DenseMap<uint64_t, uint32_t> TUHashToIndexMap; diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 2772de7..d666c10 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -432,7 +432,7 @@ public: return Analysis->isConditionalBranch(Inst); } - /// Returns true if Inst is a condtional move instruction + /// Returns true if Inst is a conditional move instruction virtual bool isConditionalMove(const MCInst &Inst) const { llvm_unreachable("not implemented"); return false; @@ -1564,7 +1564,7 @@ public: } /// Get the default def_in and live_out registers for the function - /// Currently only used for the Stoke optimzation + /// Currently only used for the Stoke optimization virtual void getDefaultDefIn(BitVector &Regs) const { llvm_unreachable("not implemented"); } diff --git a/bolt/include/bolt/Passes/FrameAnalysis.h b/bolt/include/bolt/Passes/FrameAnalysis.h index d71c338..5ce85be 100644 --- a/bolt/include/bolt/Passes/FrameAnalysis.h +++ b/bolt/include/bolt/Passes/FrameAnalysis.h @@ -37,7 +37,7 @@ struct FrameIndexEntry { int64_t StackOffset; uint8_t Size; - /// If this is false, we will never atempt to remove or optimize this + /// If this is false, we will never attempt to remove or optimize this /// instruction. We just use it to keep track of stores we don't fully /// understand but we know it may write to a frame position. bool IsSimple; diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h index df3ea96..84da453 100644 --- a/bolt/include/bolt/Passes/LongJmp.h +++ b/bolt/include/bolt/Passes/LongJmp.h @@ -30,7 +30,7 @@ namespace bolt { /// 64-bit range, we guarantee it can reach any code location. /// class LongJmpPass : public BinaryFunctionPass { - /// Used to implement stub grouping (re-using a stub from one function into + /// Used to implement stub grouping (reusing a stub from one function into /// another) using StubTy = std::pair<uint64_t, BinaryBasicBlock *>; using StubGroupTy = SmallVector<StubTy, 4>; diff --git a/bolt/include/bolt/Passes/ProfileQualityStats.h b/bolt/include/bolt/Passes/ProfileQualityStats.h index 86fc88c..ee74b12 100644 --- a/bolt/include/bolt/Passes/ProfileQualityStats.h +++ b/bolt/include/bolt/Passes/ProfileQualityStats.h @@ -49,7 +49,7 @@ // aggregates the block gaps into 2 values for the function: "weighted" is the // weighted average of the block conservation gaps, where the weights depend on // each block's execution count and instruction count; "worst" is the worst -// (biggest) block gap acorss all basic blocks in the function with an execution +// (biggest) block gap across all basic blocks in the function with an execution // count of > 500. The pass then reports the 95th percentile of the weighted and // worst values of the 1000 functions in a single BOLT-INFO line. The smaller // the reported values are, the better the BOLT profile satisfies the function diff --git a/bolt/include/bolt/Passes/ReorderAlgorithm.h b/bolt/include/bolt/Passes/ReorderAlgorithm.h index 95d9e83..42bb333 100644 --- a/bolt/include/bolt/Passes/ReorderAlgorithm.h +++ b/bolt/include/bolt/Passes/ReorderAlgorithm.h @@ -26,7 +26,7 @@ namespace bolt { /// Objects of this class implement various basic block clustering algorithms. /// Basic block clusters are chains of basic blocks that should be laid out -/// in this order to maximize performace. These algorithms group basic blocks +/// in this order to maximize performance. These algorithms group basic blocks /// into clusters using execution profile data and various heuristics. class ClusterAlgorithm { public: diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h index 6242456..cab346b 100644 --- a/bolt/include/bolt/Rewrite/DWARFRewriter.h +++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h @@ -128,7 +128,7 @@ private: CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer, GDBIndex &GDBIndexSection); - /// Process and write out CUs that are passsed in. + /// Process and write out CUs that are passed in. void finalizeCompileUnits(DIEBuilder &DIEBlder, DIEStreamer &Streamer, CUOffsetMap &CUMap, const std::list<DWARFUnit *> &CUs, diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 206d8eef..46b3372 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -844,7 +844,7 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, auto isSibling = std::bind(&BinaryContext::areRelatedFragments, this, &Function, std::placeholders::_1); assert(llvm::all_of(JT->Parents, isSibling) && - "cannot re-use jump table of a different function"); + "cannot reuse jump table of a different function"); (void)isSibling; if (opts::Verbosity > 2) { this->outs() << "BOLT-INFO: multiple fragments access the same jump table" @@ -860,7 +860,7 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, return JT->getFirstLabel(); } - // Re-use the existing symbol if possible. + // Reuse the existing symbol if possible. MCSymbol *JTLabel = nullptr; if (BinaryData *Object = getBinaryDataAtAddress(Address)) { if (!isInternalSymbolName(Object->getSymbol()->getName())) diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 9687892..4dfd4ba 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -3875,7 +3875,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const { if (FunctionSymbol == Symbol) return 0; - // Check all secondary entries available as either basic blocks or lables. + // Check all secondary entries available as either basic blocks or labels. uint64_t NumEntries = 1; for (const BinaryBasicBlock *BB : BasicBlocks) { MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB); diff --git a/bolt/lib/Core/BinaryFunctionCallGraph.cpp b/bolt/lib/Core/BinaryFunctionCallGraph.cpp index f0c46a8..af22419 100644 --- a/bolt/lib/Core/BinaryFunctionCallGraph.cpp +++ b/bolt/lib/Core/BinaryFunctionCallGraph.cpp @@ -122,7 +122,7 @@ buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData, // create a node for a function unless it was the target of a call from // a hot block. The alternative would be to set the count to one or // accumulate the number of calls from the callsite into the function - // samples. Results from perfomance testing seem to favor the zero + // samples. Results from performance testing seem to favor the zero // count though, so I'm leaving it this way for now. return Cg.addNode(Function, Size, Function->getKnownExecutionCount()); } diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp index 7ce55f9..5b628f6 100644 --- a/bolt/lib/Core/DIEBuilder.cpp +++ b/bolt/lib/Core/DIEBuilder.cpp @@ -137,7 +137,7 @@ void DIEBuilder::updateReferences() { DIEInteger(NewAddr)); } - // Handling referenes in location expressions. + // Handling references in location expressions. for (LocWithReference &LocExpr : getState().LocWithReferencesToProcess) { SmallVector<uint8_t, 32> Buffer; DataExtractor Data(StringRef((const char *)LocExpr.BlockData.data(), @@ -336,7 +336,7 @@ void DIEBuilder::buildCompileUnits(const bool Init) { registerUnit(*DU, false); } - // Using DULIst since it can be modified by cross CU refrence resolution. + // Using DULIst since it can be modified by cross CU reference resolution. for (DWARFUnit *DU : getState().DUList) { if (DU->isTypeUnit()) continue; @@ -508,7 +508,7 @@ void DIEBuilder::finish() { UnitStartOffset += CurUnitInfo.UnitLength; }; // Computing offsets for .debug_types section. - // It's processed first when CU is registered so will be at the begginnig of + // It's processed first when CU is registered so will be at the beginning of // the vector. uint64_t TypeUnitStartOffset = 0; for (DWARFUnit *CU : getState().DUList) { diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp index e05f28f..24a4c85 100644 --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -876,7 +876,7 @@ void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit, DIEValue StrListBaseAttrInfo = Die.findAttribute(dwarf::DW_AT_str_offsets_base); auto RetVal = ProcessedBaseOffsets.find(*Val); - // Handling re-use of str-offsets section. + // Handling reuse of str-offsets section. if (RetVal == ProcessedBaseOffsets.end() || StrOffsetSectionWasModified) { initialize(Unit); // Update String Offsets that were modified. @@ -1167,7 +1167,7 @@ void DwarfLineTable::emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params, // For functions that we do not modify we output them as raw data. // Re-constructing .debug_line_str so that offsets are correct for those // debug line tables. -// Bonus is that when we output a final binary we can re-use .debug_line_str +// Bonus is that when we output a final binary we can reuse .debug_line_str // section. So we don't have to do the SHF_ALLOC trick we did with // .debug_line. static void parseAndPopulateDebugLineStr(BinarySection &LineStrSection, diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp index a9d98a6..6be2c5a 100644 --- a/bolt/lib/Core/DebugNames.cpp +++ b/bolt/lib/Core/DebugNames.cpp @@ -55,7 +55,7 @@ DWARF5AcceleratorTable::DWARF5AcceleratorTable( llvm::hash_value(llvm::StringRef(CStr)), StrOffset); if (!R.second) BC.errs() - << "BOLT-WARNING: [internal-dwarf-error]: collision occured on " + << "BOLT-WARNING: [internal-dwarf-error]: collision occurred on " << CStr << " at offset : 0x" << Twine::utohexstr(StrOffset) << ". Previous string offset is: 0x" << Twine::utohexstr(R.first->second) << ".\n"; @@ -86,7 +86,7 @@ void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit, if (Unit.isTypeUnit()) { if (DWOID) { // We adding an entry for a DWO TU. The DWO CU might not have any entries, - // so need to add it to the list pre-emptively. + // so need to add it to the list preemptively. auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()}); if (Iter.second) CUList.push_back(BADCUOFFSET); diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp index c3ddeda..5d21bdb 100644 --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -60,7 +60,7 @@ namespace llvm { namespace bolt { // Align function to the specified byte-boundary (typically, 64) offsetting -// the fuction by not more than the corresponding value +// the function by not more than the corresponding value static void alignMaxBytes(BinaryFunction &Function) { Function.setAlignment(opts::AlignFunctions); Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes); @@ -68,7 +68,7 @@ static void alignMaxBytes(BinaryFunction &Function) { } // Align function to the specified byte-boundary (typically, 64) offsetting -// the fuction by not more than the minimum over +// the function by not more than the minimum over // -- the size of the function // -- the specified number of bytes static void alignCompact(BinaryFunction &Function, diff --git a/bolt/lib/Passes/FrameAnalysis.cpp b/bolt/lib/Passes/FrameAnalysis.cpp index f568039..0b26da3 100644 --- a/bolt/lib/Passes/FrameAnalysis.cpp +++ b/bolt/lib/Passes/FrameAnalysis.cpp @@ -198,7 +198,7 @@ public: if (CFIStack.empty()) dbgs() << "Assertion is about to fail: " << BF.getPrintName() << "\n"; assert(!CFIStack.empty() && "Corrupt CFI stack"); - std::pair<int64_t, uint16_t> &Elem = CFIStack.top(); + std::pair<int64_t, uint16_t> Elem = CFIStack.top(); CFIStack.pop(); CfaOffset = Elem.first; CfaReg = Elem.second; diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp index 60349f1..0859cd2 100644 --- a/bolt/lib/Passes/RegReAssign.cpp +++ b/bolt/lib/Passes/RegReAssign.cpp @@ -145,7 +145,7 @@ void RegReAssign::rankRegisters(BinaryFunction &Function) { const bool CannotUseREX = BC.MIB->cannotUseREX(Inst); const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); - // Disallow substituitions involving regs in implicit uses lists + // Disallow substitutions involving regs in implicit uses lists for (MCPhysReg ImplicitUse : Desc.implicit_uses()) { const size_t RegEC = BC.MIB->getAliases(ImplicitUse, false).find_first(); @@ -153,7 +153,7 @@ void RegReAssign::rankRegisters(BinaryFunction &Function) { std::numeric_limits<decltype(RegScore)::value_type>::min(); } - // Disallow substituitions involving regs in implicit defs lists + // Disallow substitutions involving regs in implicit defs lists for (MCPhysReg ImplicitDef : Desc.implicit_defs()) { const size_t RegEC = BC.MIB->getAliases(ImplicitDef, false).find_first(); @@ -174,7 +174,7 @@ void RegReAssign::rankRegisters(BinaryFunction &Function) { if (RegEC == 0) continue; - // Disallow substituitions involving regs in instrs that cannot use REX + // Disallow substitutions involving regs in instrs that cannot use REX // The relationship of X86 registers is shown in the diagram. BL and BH // do not have a direct alias relationship. However, if the BH register // cannot be swapped, then the BX/EBX/RBX registers cannot be swapped as diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp index 4ea60f3..fe342cc 100644 --- a/bolt/lib/Passes/ShrinkWrapping.cpp +++ b/bolt/lib/Passes/ShrinkWrapping.cpp @@ -402,7 +402,7 @@ void StackLayoutModifier::classifyCFIs() { break; case MCCFIInstruction::OpRestoreState: { assert(!CFIStack.empty() && "Corrupt CFI stack"); - std::pair<int64_t, uint16_t> &Elem = CFIStack.top(); + std::pair<int64_t, uint16_t> Elem = CFIStack.top(); CFIStack.pop(); CfaOffset = Elem.first; CfaReg = Elem.second; diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp index eab669b..66a373a 100644 --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -386,7 +386,7 @@ private: } /// Compute sum of scores over jumps within \p BlockOrder given \p SplitIndex. - /// Increament Score.LocalScore in place by the sum. + /// Increment Score.LocalScore in place by the sum. void computeJumpScore(const BasicBlockOrder &BlockOrder, const size_t SplitIndex, SplitScore &Score) { @@ -413,7 +413,7 @@ private: } /// Compute sum of scores over calls originated in the current function - /// given \p SplitIndex. Increament Score.LocalScore in place by the sum. + /// given \p SplitIndex. Increment Score.LocalScore in place by the sum. void computeLocalCallScore(const BasicBlockOrder &BlockOrder, const size_t SplitIndex, SplitScore &Score) { if (opts::CallScale == 0) @@ -455,7 +455,7 @@ private: } /// Compute sum of splitting scores for cover calls of the input function. - /// Increament Score.CoverCallScore in place by the sum. + /// Increment Score.CoverCallScore in place by the sum. void computeCoverCallScore(const BasicBlockOrder &BlockOrder, const size_t SplitIndex, const std::vector<CallInfo> &CoverCalls, @@ -467,7 +467,7 @@ private: assert(CI.Length >= Score.HotSizeReduction && "Length of cover calls must exceed reduced size of hot fragment."); // Compute the new length of the call, which is shorter than the original - // one by the size of the splitted fragment minus the total size increase. + // one by the size of the split fragment minus the total size increase. const size_t NewCallLength = CI.Length - Score.HotSizeReduction; Score.CoverCallScore += computeCallScore(CI.Count, NewCallLength); } @@ -502,12 +502,12 @@ private: // First part of LocalScore is the sum over call edges originated in the // input function. These edges can get shorter or longer depending on - // SplitIndex. Score.LocalScore is increamented in place. + // SplitIndex. Score.LocalScore is incremented in place. computeLocalCallScore(BlockOrder, SplitIndex, Score); // Second part of LocalScore is the sum over jump edges with src basic block // and dst basic block in the current function. Score.LocalScore is - // increamented in place. + // incremented in place. computeJumpScore(BlockOrder, SplitIndex, Score); // Compute CoverCallScore and store in Score in place. diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index afe2421..277d4bb 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -907,7 +907,7 @@ ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) { StringRef NumStr = NumStrRes.get(); uint64_t Num; if (NumStr.getAsInteger(16, Num)) { - reportError("expected hexidecimal number"); + reportError("expected hexadecimal number"); Diag << "Found: " << NumStr << "\n"; return make_error_code(llvm::errc::io_error); } diff --git a/bolt/lib/Rewrite/BuildIDRewriter.cpp b/bolt/lib/Rewrite/BuildIDRewriter.cpp index d50416f..706a3d0 100644 --- a/bolt/lib/Rewrite/BuildIDRewriter.cpp +++ b/bolt/lib/Rewrite/BuildIDRewriter.cpp @@ -48,7 +48,7 @@ public: }; Error BuildIDRewriter::sectionInitializer() { - // Typically, build ID will reside in .note.gnu.build-id section. Howerver, + // Typically, build ID will reside in .note.gnu.build-id section. However, // a linker script can change the section name and such is the case with // the Linux kernel. Hence, we iterate over all note sections. for (BinarySection &NoteSection : BC.sections()) { diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 7366d2a..a7a4b66 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -1723,7 +1723,7 @@ StringRef getSectionName(const SectionRef &Section) { return Name; } -// Exctracts an appropriate slice if input is DWP. +// Extracts an appropriate slice if input is DWP. // Applies patches or overwrites the section. std::optional<StringRef> updateDebugData( DWARFContext &DWCtx, StringRef SectionName, StringRef SectionContents, @@ -1759,7 +1759,7 @@ std::optional<StringRef> updateDebugData( auto Iter = OverridenSections.find(Kind); if (Iter == OverridenSections.end()) { errs() - << "BOLT-WARNING: [internal-dwarf-error]: Could not find overriden " + << "BOLT-WARNING: [internal-dwarf-error]: Could not find overridden " "section for: " << Twine::utohexstr(DWOId) << ".\n"; return std::nullopt; @@ -1991,7 +1991,7 @@ void DWARFRewriter::convertToRangesPatchDebugInfo( } } - // HighPC was conveted into DW_AT_ranges. + // HighPC was converted into DW_AT_ranges. // For DWARF5 we only access ranges through index. DIEBldr.replaceValue(&Die, HighPCAttrInfo.getAttribute(), dwarf::DW_AT_ranges, diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index ee021fe..947d899 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -308,7 +308,7 @@ void PseudoProbeRewriter::encodePseudoProbes() { Contents.append(OSE.str().begin(), OSE.str().end()); }; - // Emit indiviual pseudo probes in a inline tree node + // Emit individual pseudo probes in a inline tree node // Probe index, type, attribute, address type and address are encoded // Address of the first probe is absolute. // Other probes' address are represented by delta diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index a0e7995..9580163 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1087,7 +1087,7 @@ void RewriteInstance::discoverFileObjects() { if (SymbolAddress == Section->getAddress() + Section->getSize()) { assert(SymbolSize == 0 && - "unexpect non-zero sized symbol at end of section"); + "unexpected non-zero sized symbol at end of section"); LLVM_DEBUG( dbgs() << "BOLT-DEBUG: rejecting as symbol points to end of its section\n"); @@ -2440,7 +2440,7 @@ void RewriteInstance::processDynamicRelocations() { } // The rest of dynamic relocations - DT_RELA. - // The static executable might have .rela.dyn secion and not have PT_DYNAMIC + // The static executable might have .rela.dyn section and not have PT_DYNAMIC if (!DynamicRelocationsSize && BC->IsStaticExecutable) { ErrorOr<BinarySection &> DynamicRelSectionOrErr = BC->getUniqueSectionByName(getRelaDynSectionName()); @@ -5017,7 +5017,7 @@ void RewriteInstance::updateELFSymbolTable( if (!Section) return false; - // Remove the section symbol iif the corresponding section was stripped. + // Remove the section symbol if the corresponding section was stripped. if (Symbol.getType() == ELF::STT_SECTION) { if (!getNewSectionIndex(Symbol.st_shndx)) return true; diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index df4f421..6954cb2 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -1296,7 +1296,7 @@ public: AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::getArithExtendType(OperandExtension); if (ShiftVal != 2) { - // TODO: Handle the patten where ShiftVal != 2. + // TODO: Handle the pattern where ShiftVal != 2. // The following code sequence below has no shift amount, // the range could be 0 to 4. // The pattern comes from libc, it occurs when the binary is static. @@ -1626,7 +1626,7 @@ public: int getUncondBranchEncodingSize() const override { return 28; } // This helper function creates the snippet of code that compares a register - // RegNo with an immedaite Imm, and jumps to Target if they are equal. + // RegNo with an immediate Imm, and jumps to Target if they are equal. // cmp RegNo, #Imm // b.eq Target // where cmp is an alias for subs, which results in the code below: @@ -1648,7 +1648,7 @@ public: } // This helper function creates the snippet of code that compares a register - // RegNo with an immedaite Imm, and jumps to Target if they are not equal. + // RegNo with an immediate Imm, and jumps to Target if they are not equal. // cmp RegNo, #Imm // b.ne Target // where cmp is an alias for subs, which results in the code below: diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index 9026a9d..5fca5e8 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -2715,7 +2715,7 @@ public: bool FoundOne = false; - // Iterate only through src operands that arent also dest operands + // Iterate only through src operands that aren't also dest operands for (unsigned Index = InstDesc.getNumDefs() + (HasLHS ? 1 : 0), E = InstDesc.getNumOperands(); Index != E; ++Index) { diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 095612a..5be04d2 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -285,7 +285,7 @@ cl::opt<bool> TimeRewrite("time-rewrite", cl::opt<bool> UseOldText( "use-old-text", - cl::desc("re-use space in old .text if possible (relocation mode)"), + cl::desc("reuse space in old .text if possible (relocation mode)"), cl::cat(BoltCategory)); cl::opt<bool> UpdateDebugSections( diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp index 672b042..de89630 100644 --- a/bolt/runtime/hugify.cpp +++ b/bolt/runtime/hugify.cpp @@ -24,7 +24,7 @@ {} #endif -// Function constains trampoline to _start, +// Function constrains trampoline to _start, // so we can resume regular execution of the function that we hooked. extern void __bolt_hugify_start_program(); diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp index 1f54a50..f586db2 100644 --- a/bolt/runtime/instr.cpp +++ b/bolt/runtime/instr.cpp @@ -214,7 +214,7 @@ private: /// __bolt_instr_setup, our initialization routine. BumpPtrAllocator *GlobalAlloc; -// Base address which we substract from recorded PC values when searching for +// Base address which we subtract from recorded PC values when searching for // indirect call description entries. Needed because indCall descriptions are // mapped read-only and contain static addresses. Initialized in // __bolt_instr_setup. @@ -261,7 +261,7 @@ struct SimpleHashTableEntryBase { // Currently we have to do it the ugly way because // we want every message to be printed atomically via a single call to // __write. If we use reportNumber() and others nultiple times, we'll get - // garbage in mulithreaded environment + // garbage in multithreaded environment char Buf[BufSize]; char *Ptr = Buf; Ptr = intToStr(Ptr, __getpid(), 10); @@ -1585,7 +1585,7 @@ __bolt_instr_data_dump(int FD, const char *LibPath = nullptr, /// at user-specified intervals void watchProcess() { timespec ts, rem; - uint64_t Ellapsed = 0ull; + uint64_t Elapsed = 0ull; int FD = openProfile(); uint64_t ppid; if (__bolt_instr_wait_forks) { @@ -1615,10 +1615,10 @@ void watchProcess() { break; } - if (++Ellapsed < __bolt_instr_sleep_time) + if (++Elapsed < __bolt_instr_sleep_time) continue; - Ellapsed = 0; + Elapsed = 0; __bolt_instr_data_dump(FD); if (__bolt_instr_no_counters_clear == false) __bolt_instr_clear_counters(); diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h index 77c9cfc..b1d04f9 100644 --- a/bolt/runtime/sys_aarch64.h +++ b/bolt/runtime/sys_aarch64.h @@ -41,7 +41,7 @@ // Anonymous namespace covering everything but our library entry point namespace { -// Get the difference between runtime addrress of .text section and +// Get the difference between runtime address of .text section and // static address in section header table. Can be extracted from arbitrary // pc value recorded at runtime to get the corresponding static address, which // in turn can be used to search for indirect call description. Needed because diff --git a/bolt/runtime/sys_riscv64.h b/bolt/runtime/sys_riscv64.h index 00a21e4..442fa2e0 100644 --- a/bolt/runtime/sys_riscv64.h +++ b/bolt/runtime/sys_riscv64.h @@ -105,7 +105,7 @@ // Anonymous namespace covering everything but our library entry point namespace { -// Get the difference between runtime addrress of .text section and +// Get the difference between runtime address of .text section and // static address in section header table. Can be extracted from arbitrary // pc value recorded at runtime to get the corresponding static address, which // in turn can be used to search for indirect call description. Needed because diff --git a/bolt/runtime/sys_x86_64.h b/bolt/runtime/sys_x86_64.h index ca2c693..933e939 100644 --- a/bolt/runtime/sys_x86_64.h +++ b/bolt/runtime/sys_x86_64.h @@ -40,7 +40,7 @@ namespace { -// Get the difference between runtime addrress of .text section and +// Get the difference between runtime address of .text section and // static address in section header table. Can be extracted from arbitrary // pc value recorded at runtime to get the corresponding static address, which // in turn can be used to search for indirect call description. Needed because @@ -171,8 +171,9 @@ uint64_t __exit(uint64_t code) { #if !defined(__APPLE__) // We use a stack-allocated buffer for string manipulation in many pieces of // this code, including the code that prints each line of the fdata file. This -// buffer needs to accomodate large function names, but shouldn't be arbitrarily -// large (dynamically allocated) for simplicity of our memory space usage. +// buffer needs to accommodate large function names, but shouldn't be +// arbitrarily large (dynamically allocated) for simplicity of our memory space +// usage. // Declare some syscall wrappers we use throughout this code to avoid linking // against system libc. diff --git a/bolt/test/AArch64/constant-island-alignment.s b/bolt/test/AArch64/constant-island-alignment.s index 957c470..99fe733 100644 --- a/bolt/test/AArch64/constant-island-alignment.s +++ b/bolt/test/AArch64/constant-island-alignment.s @@ -3,7 +3,7 @@ # RUN: split-file %s %t // For the first test case, in case the nop before .Lci will be removed -// the pointer to exit function won't be alinged and the test will fail. +// the pointer to exit function won't be aligned and the test will fail. # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ # RUN: %t/xword_align.s -o %t_xa.o diff --git a/bolt/test/AArch64/ifunc.test b/bolt/test/AArch64/ifunc.test index 3da42c6..15ecc35 100644 --- a/bolt/test/AArch64/ifunc.test +++ b/bolt/test/AArch64/ifunc.test @@ -9,7 +9,7 @@ // RUN: FileCheck --check-prefix=REL_CHECK %s // Non-pie static executable doesn't generate PT_DYNAMIC, check relocation -// is readed successfully and IPLT trampoline has been identified by bolt. +// is read successfully and IPLT trampoline has been identified by bolt. // RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -no-pie \ // RUN: -o %t.O3_nopie.exe -Wl,-q // RUN: llvm-readelf -l %t.O3_nopie.exe | \ diff --git a/bolt/test/X86/cdsplit-call-scale.s b/bolt/test/X86/cdsplit-call-scale.s index 66f3003..caa11b6 100644 --- a/bolt/test/X86/cdsplit-call-scale.s +++ b/bolt/test/X86/cdsplit-call-scale.s @@ -1,8 +1,8 @@ ## Test the control of aggressiveness of 3-way splitting by -call-scale. -## When -call-scale=0.0, the tested function is 2-way splitted. -## When -call-scale=1.0, the tested function is 3-way splitted with 5 blocks +## When -call-scale=0.0, the tested function is 2-way split. +## When -call-scale=1.0, the tested function is 3-way split with 5 blocks ## in warm because of the increased benefit of shortening the call edges. -## When -call-scale=1000.0, the tested function is still 3-way splitted with +## When -call-scale=1000.0, the tested function is still 3-way split with ## 5 blocks in warm because cdsplit does not allow hot-warm splitting to break ## a fall through branch from a basic block to its most likely successor. diff --git a/bolt/test/X86/dwarf5-two-cu-str-offset-table.test b/bolt/test/X86/dwarf5-two-cu-str-offset-table.test index e59664e..488635b 100644 --- a/bolt/test/X86/dwarf5-two-cu-str-offset-table.test +++ b/bolt/test/X86/dwarf5-two-cu-str-offset-table.test @@ -8,7 +8,7 @@ # RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt >> %t.txt # RUN: cat %t.txt | FileCheck --check-prefix=CHECK %s -## This test checks we correclty re-renerate .debug_str_offsets. +## This test checks we correctly re-renerate .debug_str_offsets. # CHECK: .debug_str_offsets contents # CHECK-NEXT: 0x00000000: Contribution size = 52, Format = DWARF32, Version = 5 diff --git a/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test index dc6255f..0cb62ed 100644 --- a/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test +++ b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test @@ -7,7 +7,7 @@ # RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.exe | FileCheck -check-prefix=PRE-BOLT %s # RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt | FileCheck -check-prefix=POST-BOLT %s -## This test checks we correclty re-generate .debug_str_offsets when there are type units that have an offset not shared with CU. +## This test checks we correctly re-generate .debug_str_offsets when there are type units that have an offset not shared with CU. # PRE-BOLT: .debug_str_offsets contents # PRE-BOLT-NEXT: Contribution size = 24, Format = DWARF32, Version = 5 diff --git a/bolt/test/X86/jt-symbol-disambiguation-3.s b/bolt/test/X86/jt-symbol-disambiguation-3.s index 22b34ce..c06fd3b 100644 --- a/bolt/test/X86/jt-symbol-disambiguation-3.s +++ b/bolt/test/X86/jt-symbol-disambiguation-3.s @@ -1,6 +1,6 @@ ## In this test case, we reproduce the behavior seen in gcc where the ## base address of a jump table is decremented by some number and ends up -## at the exact addess of a jump table from another function. After +## at the exact address of a jump table from another function. After ## linking, the instruction references another jump table and that ## confuses BOLT. ## We repro here the following issue: @@ -28,7 +28,7 @@ # ---- # Func foo contains a jump table whose start is colocated with a # jump table reference in another function. However, the other function -# does not use the first entries of it and is merely doing arithmetics +# does not use the first entries of it and is merely doing arithmetic # to save the creation of unused first entries. # ---- .globl foo diff --git a/bolt/test/X86/split-landing-pad.s b/bolt/test/X86/split-landing-pad.s index 681f14f..149193d 100644 --- a/bolt/test/X86/split-landing-pad.s +++ b/bolt/test/X86/split-landing-pad.s @@ -1,5 +1,5 @@ ## This test reproduces the case where C++ exception handling is used and split -## function optimization is enabled. In particular, function foo is splitted +## function optimization is enabled. In particular, function foo is split ## to two fragments: ## foo: contains 2 try blocks, which invokes bar to throw exception ## foo.cold.1: contains 2 corresponding catch blocks (landing pad) diff --git a/bolt/test/runtime/X86/asm-dump.c b/bolt/test/runtime/X86/asm-dump.c index 7656fda..fa0de9b 100644 --- a/bolt/test/runtime/X86/asm-dump.c +++ b/bolt/test/runtime/X86/asm-dump.c @@ -30,7 +30,7 @@ * Reconstruct fdata * RUN: link_fdata %t/main.s %t.o %t.fdata.reconst * - * XXX: reenable once dumping data is supported + * XXX: re-enable once dumping data is supported * Check if reoptimized file produces the same results * dontrun: %t.exe.reopt > %t.result.reopt * dontrun: cmp %t.result %t.result.reopt diff --git a/bolt/test/runtime/wait_file.sh b/bolt/test/runtime/wait_file.sh index 42d4c5b..7346476 100644 --- a/bolt/test/runtime/wait_file.sh +++ b/bolt/test/runtime/wait_file.sh @@ -12,7 +12,7 @@ check_file() { fuser -s "$file" local ret=$? - if [ $ret -eq 1 ]; then # noone has file open + if [ $ret -eq 1 ]; then # no one has file open return 0 fi if [ $ret -eq 0 ]; then # file open by some processes diff --git a/bolt/utils/bughunter.sh b/bolt/utils/bughunter.sh index c5dddc4..d5ce059 100755 --- a/bolt/utils/bughunter.sh +++ b/bolt/utils/bughunter.sh @@ -28,7 +28,7 @@ # # TIMEOUT_OR_CMD - optional timeout or command on optimized binary command # if the value is a number with an optional trailing letter -# [smhd] it is considered a paramter to "timeout", +# [smhd] it is considered a parameter to "timeout", # otherwise it's a shell command that wraps the optimized # binary command. # diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index 799c64b..5d5388e 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -1040,8 +1040,8 @@ private: if (auto *S = N.get<Stmt>()) return refInStmt(S, Resolver); if (auto *NNSL = N.get<NestedNameSpecifierLoc>()) { - if (auto TL = NNSL->getAsTypeLoc()) - return refInTypeLoc(NNSL->getAsTypeLoc(), Resolver); + if (TypeLoc TL = NNSL->getAsTypeLoc()) + return refInTypeLoc(TL, Resolver); // (!) 'DeclRelation::Alias' ensures we do not lose namespace aliases. NestedNameSpecifierLoc Qualifier = NNSL->getAsNamespaceAndPrefix().Prefix; SourceLocation NameLoc = NNSL->getLocalBeginLoc(); diff --git a/clang/docs/PointerAuthentication.rst b/clang/docs/PointerAuthentication.rst index 96eb498..7e65f4b 100644 --- a/clang/docs/PointerAuthentication.rst +++ b/clang/docs/PointerAuthentication.rst @@ -592,6 +592,36 @@ The result value is never zero and always within range for both the This can be used in constant expressions. +``ptrauth_type_discriminator`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + ptrauth_type_discriminator(type) + +Compute the constant discriminator derived from the given type, as is computed +for automatically type diversified schemas. + +``type`` must be a type. The result has the type ``ptrauth_extra_data_t``. + +This can be used in constant expressions. + +``ptrauth_function_pointer_type_discriminator`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + ptrauth_function_pointer_type_discriminator(function_type) + +Compute the constant discriminator derived from the provided function type, for +use in contexts where the default function authentication schema. If function +pointer type diversity is enabled, this is equivalent to +`ptrauth_type_discriminator(function_type)`, if it is not enabled this is `0`. + +``function_type`` must be a function type. The result has the type ``ptrauth_extra_data_t``. + +This can be used in constant expressions. + ``ptrauth_strip`` ^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index db2b0f6..79ff821 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -191,6 +191,8 @@ C23 Feature Support - Added ``FLT_SNAN``, ``DBL_SNAN``, and ``LDBL_SNAN`` to Clang's ``<float.h>`` header in C23 and later modes. This implements `WG14 N2710 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2710.htm>`_. +- Fixed accepting as compatible unnamed tag types with the same fields within + the same translation unit but from different types. Non-comprehensive list of changes in this release ------------------------------------------------- @@ -435,6 +437,7 @@ Bug Fixes to C++ Support - Suppress ``-Wdeprecated-declarations`` in implicitly generated functions. (#GH147293) - Fix a crash when deleting a pointer to an incomplete array (#GH150359). - Fixed a mismatched lambda scope bug when propagating up ``consteval`` within nested lambdas. (#GH145776) +- Disallow immediate escalation in destructors. (#GH109096) - Fix an assertion failure when expression in assumption attribute (``[[assume(expr)]]``) creates temporary objects. - Fix the dynamic_cast to final class optimization to correctly handle diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h index 1e351f3..83f2b18 100644 --- a/clang/include/clang/AST/OpenACCClause.h +++ b/clang/include/clang/AST/OpenACCClause.h @@ -1301,46 +1301,25 @@ struct OpenACCReductionRecipe { // AST), or in a separate collection when being semantically analyzed. llvm::ArrayRef<CombinerRecipe> CombinerRecipes; + bool isSet() const { return AllocaDecl; } + +private: + friend class OpenACCReductionClause; OpenACCReductionRecipe(VarDecl *A, llvm::ArrayRef<CombinerRecipe> Combiners) : AllocaDecl(A), CombinerRecipes(Combiners) {} - - bool isSet() const { return AllocaDecl; } }; // A version of the above that is used for semantic analysis, at a time before // the OpenACCReductionClause node has been created. This one has storage for // the CombinerRecipe, since Trailing storage for it doesn't exist yet. -struct OpenACCReductionRecipeWithStorage : OpenACCReductionRecipe { -private: - llvm::SmallVector<CombinerRecipe, 1> CombinerRecipeStorage; - -public: - OpenACCReductionRecipeWithStorage(VarDecl *A, - llvm::ArrayRef<CombinerRecipe> Combiners) - : OpenACCReductionRecipe(A, {}), CombinerRecipeStorage(Combiners) { - CombinerRecipes = CombinerRecipeStorage; - } +struct OpenACCReductionRecipeWithStorage { + VarDecl *AllocaDecl; + llvm::SmallVector<OpenACCReductionRecipe::CombinerRecipe, 1> CombinerRecipes; OpenACCReductionRecipeWithStorage( - const OpenACCReductionRecipeWithStorage &Other) - : OpenACCReductionRecipe(Other), - CombinerRecipeStorage(Other.CombinerRecipeStorage) { - CombinerRecipes = CombinerRecipeStorage; - } - - OpenACCReductionRecipeWithStorage(OpenACCReductionRecipeWithStorage &&Other) - : OpenACCReductionRecipe(std::move(Other)), - CombinerRecipeStorage(std::move(Other.CombinerRecipeStorage)) { - CombinerRecipes = CombinerRecipeStorage; - } - - // There is no real problem implementing these, we just have to make sure the - // array-ref this inherits from stays in sync. But as we don't need it at the - // moment, make sure we don't accidentially call these. - OpenACCReductionRecipeWithStorage & - operator=(OpenACCReductionRecipeWithStorage &&) = delete; - OpenACCReductionRecipeWithStorage & - operator=(const OpenACCReductionRecipeWithStorage &) = delete; + VarDecl *A, + llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> Combiners) + : AllocaDecl(A), CombinerRecipes(Combiners) {} static OpenACCReductionRecipeWithStorage Empty() { return OpenACCReductionRecipeWithStorage(/*AllocaDecl=*/nullptr, {}); diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b197d81..006a453 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -110,19 +110,20 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } let Features = "sse3" in { - foreach Op = ["addsub", "hadd", "hsub"] in { + foreach Op = ["addsub"] in { def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; } } - let Features = "ssse3" in { - foreach Op = ["phadd", "phsub"] in { - def Op#w128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def Op#sw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def Op#d128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + foreach Op = ["hadd", "hsub"] in { + def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; } + } + let Features = "ssse3" in { def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; @@ -137,7 +138,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { // AVX let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { - foreach Op = ["addsub", "hadd", "hsub", "max", "min"] in { + foreach Op = ["addsub", "max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; } @@ -316,6 +317,14 @@ let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def palignr128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant int)">; } +let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + foreach Op = ["phadd", "phsub"] in { + def Op#w128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def Op#sw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def Op#d128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + } +} + let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; @@ -515,6 +524,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; + + foreach Op = ["hadd", "hsub"] in { + def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; + def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; + } } let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { @@ -592,12 +606,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; - def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; - def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; - def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; + def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; @@ -666,6 +675,13 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; + def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; + def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; + def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; + def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; + def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; @@ -2121,24 +2137,18 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in { def movdqa64store256_mask : X86Builtin<"void(_Vector<4, long long int *>, _Vector<4, long long int>, unsigned char)">; } -let Features = "avx512ifma", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512ifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpmadd52huq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">; def vpmadd52luq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">; } -let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpmadd52huq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">; -} - -let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">; -} - -let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vpmadd52luq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">; } -let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">; def vpmadd52luq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">; } diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index ceb16174..ea73ed9 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1211,6 +1211,25 @@ public: TiedOperand = N; // Don't copy Name or constraint string. } + + // For output operand constraints, the target can set bounds to indicate + // that the result value is guaranteed to fall within a certain range. + // This will cause corresponding assertions to be emitted that will allow + // for potential optimization based of that guarantee. + // + // NOTE: This re-uses the `ImmRange` fields to store the range, which are + // otherwise unused for constraint types used for output operands. + void setOutputOperandBounds(unsigned Min, unsigned Max) { + ImmRange.Min = Min; + ImmRange.Max = Max; + ImmRange.isConstrained = true; + } + std::optional<std::pair<unsigned, unsigned>> + getOutputOperandBounds() const { + return ImmRange.isConstrained + ? std::make_pair(ImmRange.Min, ImmRange.Max) + : std::optional<std::pair<unsigned, unsigned>>(); + } }; /// Validate register name used for global register variables. diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 4c15d9e..baab156 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4329,7 +4329,7 @@ def CIR_AllocExceptionOp : CIR_Op<"alloc.exception"> { // Atomic operations //===----------------------------------------------------------------------===// -def CIR_AtomicXchg : CIR_Op<"atomic.xchg", [ +def CIR_AtomicXchgOp : CIR_Op<"atomic.xchg", [ AllTypesMatch<["result", "val"]>, TypesMatchWith<"type of 'val' must match the pointee type of 'ptr'", "ptr", "val", "mlir::cast<cir::PointerType>($_self).getPointee()"> @@ -4347,9 +4347,7 @@ def CIR_AtomicXchg : CIR_Op<"atomic.xchg", [ Example: ```mlir - %res = cir.atomic.xchg(%ptr : !cir.ptr<!u64i>, - %val : !u64i, - seq_cst) : !u64i + %res = cir.atomic.xchg seq_cst %ptr, %val : !cir.ptr<!u64i> -> !u64i ``` }]; @@ -4364,12 +4362,16 @@ def CIR_AtomicXchg : CIR_Op<"atomic.xchg", [ let assemblyFormat = [{ $mem_order (`volatile` $is_volatile^)? $ptr `,` $val - `:` qualified(type($ptr)) `->` type($result) attr-dict + `:` functional-type(operands, results) attr-dict }]; } -def CIR_AtomicCmpXchg : CIR_Op<"atomic.cmpxchg", [ - AllTypesMatch<["old", "expected", "desired"]> +def CIR_AtomicCmpXchgOp : CIR_Op<"atomic.cmpxchg", [ + AllTypesMatch<["old", "expected", "desired"]>, + TypesMatchWith<"type of 'expected' must match the pointee type of 'ptr'", + "ptr", "expected", "mlir::cast<cir::PointerType>($_self).getPointee()">, + TypesMatchWith<"type of 'desired' must match the pointee type of 'ptr'", + "ptr", "desired", "mlir::cast<cir::PointerType>($_self).getPointee()"> ]> { let summary = "Atomic compare and exchange"; let description = [{ @@ -4402,12 +4404,9 @@ def CIR_AtomicCmpXchg : CIR_Op<"atomic.cmpxchg", [ Example: ```mlir - %old, %success = cir.atomic.cmpxchg(%ptr : !cir.ptr<!u64i>, - %expected : !u64i, - %desired : !u64i, - success = seq_cst, - failure = seq_cst) weak - : (!u64i, !cir.bool) + %old, %success = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) + %ptr, %expected, %desired + : (!cir.ptr<!u64i>, !u64i, !u64i) -> (!u64i, !cir.bool) ``` }]; let results = (outs CIR_AnyType:$old, CIR_BoolType:$success); @@ -4421,20 +4420,13 @@ def CIR_AtomicCmpXchg : CIR_Op<"atomic.cmpxchg", [ UnitAttr:$is_volatile); let assemblyFormat = [{ - `(` - $ptr `:` qualified(type($ptr)) `,` - $expected `:` type($expected) `,` - $desired `:` type($desired) `,` - `success` `=` $succ_order `,` - `failure` `=` $fail_order - `)` - (`align` `(` $alignment^ `)`)? (`weak` $weak^)? + `success` `(` $succ_order `)` `failure` `(` $fail_order `)` + $ptr `,` $expected `,` $desired + (`align` `(` $alignment^ `)`)? (`volatile` $is_volatile^)? - `:` `(` type($old) `,` type($success) `)` attr-dict + `:` functional-type(operands, results) attr-dict }]; - - let hasVerifier = 1; } #endif // CLANG_CIR_DIALECT_IR_CIROPS_TD diff --git a/clang/include/clang/Sema/Template.h b/clang/include/clang/Sema/Template.h index 60c7d27..e963439b 100644 --- a/clang/include/clang/Sema/Template.h +++ b/clang/include/clang/Sema/Template.h @@ -205,8 +205,8 @@ enum class TemplateSubstitutionKind : char { /// Add a new outmost level to the multi-level template argument /// list. - /// A 'Final' substitution means that Subst* nodes won't be built - /// for the replacements. + /// A 'Final' substitution means that these Args don't need to be + /// resugared later. void addOuterTemplateArguments(Decl *AssociatedDecl, ArgList Args, bool Final) { assert(!NumRetainedOuterLevels && diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a8b41ba..3603e9cd 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11581,6 +11581,12 @@ QualType ASTContext::mergeTagDefinitions(QualType LHS, QualType RHS) { if (LangOpts.CPlusPlus || !LangOpts.C23) return {}; + // Nameless tags are comparable only within outer definitions. At the top + // level they are not comparable. + const TagDecl *LTagD = LHS->castAsTagDecl(), *RTagD = RHS->castAsTagDecl(); + if (!LTagD->getIdentifier() || !RTagD->getIdentifier()) + return {}; + // C23, on the other hand, requires the members to be "the same enough", so // we use a structural equivalence check. StructuralEquivalenceContext::NonEquivalentDeclSet NonEquivalentDecls; diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index 1557346..b17cd6f 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -1763,19 +1763,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, // another anonymous structure or union, respectively, if their members // fulfill the preceding requirements. ... Otherwise, the structure, union, // or enumerated types are incompatible. - - // Note: "the same tag" refers to the identifier for the structure; two - // structures without names are not compatible within a TU. In C23, if either - // declaration has no name, they're not equivalent. However, the paragraph - // after the bulleted list goes on to talk about compatibility of anonymous - // structure and union members, so this prohibition only applies to top-level - // declarations; if either declaration is not a member, they cannot be - // compatible. - if (Context.LangOpts.C23 && (!D1->getIdentifier() || !D2->getIdentifier()) && - (!D1->getDeclContext()->isRecord() || !D2->getDeclContext()->isRecord())) - return false; - - // Otherwise, check the names for equivalence. if (!NameIsStructurallyEquivalent(*D1, *D2)) return false; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 84c5ecc..2d3cb6a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2587,6 +2587,82 @@ static bool interp__builtin_ia32_pmul( return true; } +static bool interp_builtin_horizontal_int_binop( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) { + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + PrimType ElemT = *S.getContext().classify(VT->getElementType()); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + unsigned NumElts = VT->getNumElements(); + unsigned EltBits = S.getASTContext().getIntWidth(VT->getElementType()); + unsigned EltsPerLane = 128 / EltBits; + unsigned Lanes = NumElts * EltBits / 128; + unsigned DestIndex = 0; + + for (unsigned Lane = 0; Lane < Lanes; ++Lane) { + unsigned LaneStart = Lane * EltsPerLane; + for (unsigned I = 0; I < EltsPerLane; I += 2) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt(); + APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt(); + APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned); + Dst.elem<T>(DestIndex++) = static_cast<T>(ResL); + }); + } + + for (unsigned I = 0; I < EltsPerLane; I += 2) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt(); + APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt(); + APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned); + Dst.elem<T>(DestIndex++) = static_cast<T>(ResR); + }); + } + } + Dst.initializeAllElements(); + return true; +} + +static bool interp_builtin_horizontal_fp_binop( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APFloat(const APFloat &, const APFloat &, + llvm::RoundingMode)> + Fn) { + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + + unsigned NumElts = VT->getNumElements(); + unsigned EltBits = S.getASTContext().getTypeSize(VT->getElementType()); + unsigned NumLanes = NumElts * EltBits / 128; + unsigned NumElemsPerLane = NumElts / NumLanes; + unsigned HalfElemsPerLane = NumElemsPerLane / 2; + + for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) { + using T = PrimConv<PT_Float>::T; + for (unsigned E = 0; E != HalfElemsPerLane; ++E) { + APFloat Elem1 = LHS.elem<T>(L + (2 * E) + 0).getAPFloat(); + APFloat Elem2 = LHS.elem<T>(L + (2 * E) + 1).getAPFloat(); + Dst.elem<T>(L + E) = static_cast<T>(Fn(Elem1, Elem2, RM)); + } + for (unsigned E = 0; E != HalfElemsPerLane; ++E) { + APFloat Elem1 = RHS.elem<T>(L + (2 * E) + 0).getAPFloat(); + APFloat Elem2 = RHS.elem<T>(L + (2 * E) + 1).getAPFloat(); + Dst.elem<T>(L + E + HalfElemsPerLane) = + static_cast<T>(Fn(Elem1, Elem2, RM)); + } + } + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_elementwise_triop_fp( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APFloat(const APFloat &, const APFloat &, @@ -3665,6 +3741,53 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI__builtin_elementwise_min: return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID); + case clang::X86::BI__builtin_ia32_phaddw128: + case clang::X86::BI__builtin_ia32_phaddw256: + case clang::X86::BI__builtin_ia32_phaddd128: + case clang::X86::BI__builtin_ia32_phaddd256: + return interp_builtin_horizontal_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case clang::X86::BI__builtin_ia32_phaddsw128: + case clang::X86::BI__builtin_ia32_phaddsw256: + return interp_builtin_horizontal_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); }); + case clang::X86::BI__builtin_ia32_phsubw128: + case clang::X86::BI__builtin_ia32_phsubw256: + case clang::X86::BI__builtin_ia32_phsubd128: + case clang::X86::BI__builtin_ia32_phsubd256: + return interp_builtin_horizontal_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; }); + case clang::X86::BI__builtin_ia32_phsubsw128: + case clang::X86::BI__builtin_ia32_phsubsw256: + return interp_builtin_horizontal_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); }); + case clang::X86::BI__builtin_ia32_haddpd: + case clang::X86::BI__builtin_ia32_haddps: + case clang::X86::BI__builtin_ia32_haddpd256: + case clang::X86::BI__builtin_ia32_haddps256: + return interp_builtin_horizontal_fp_binop( + S, OpPC, Call, + [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) { + APFloat F = LHS; + F.add(RHS, RM); + return F; + }); + case clang::X86::BI__builtin_ia32_hsubpd: + case clang::X86::BI__builtin_ia32_hsubps: + case clang::X86::BI__builtin_ia32_hsubpd256: + case clang::X86::BI__builtin_ia32_hsubps256: + return interp_builtin_horizontal_fp_binop( + S, OpPC, Call, + [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) { + APFloat F = LHS; + F.subtract(RHS, RM); + return F; + }); + case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: case clang::X86::BI__builtin_ia32_pmuldq512: @@ -3695,6 +3818,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return F; }); + case X86::BI__builtin_ia32_vpmadd52luq128: + case X86::BI__builtin_ia32_vpmadd52luq256: + case X86::BI__builtin_ia32_vpmadd52luq512: + return interp__builtin_elementwise_triop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) { + return A + (B.trunc(52) * C.trunc(52)).zext(64); + }); + case X86::BI__builtin_ia32_vpmadd52huq128: + case X86::BI__builtin_ia32_vpmadd52huq256: + case X86::BI__builtin_ia32_vpmadd52huq512: + return interp__builtin_elementwise_triop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) { + return A + llvm::APIntOps::mulhu(B.trunc(52), C.trunc(52)).zext(64); + }); + case X86::BI__builtin_ia32_vpshldd128: case X86::BI__builtin_ia32_vpshldd256: case X86::BI__builtin_ia32_vpshldd512: diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c734155..69cbf6e 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3316,6 +3316,10 @@ bool FunctionDecl::isImmediateEscalating() const { CD && CD->isInheritingConstructor()) return CD->getInheritedConstructor().getConstructor(); + // Destructors are not immediate escalating. + if (isa<CXXDestructorDecl>(this)) + return false; + // - a function that results from the instantiation of a templated entity // defined with the constexpr specifier. TemplatedKind TK = getTemplatedKind(); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index dfdfef2..51c0382 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11974,6 +11974,54 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + + case X86::BI__builtin_ia32_vpmadd52luq128: + case X86::BI__builtin_ia32_vpmadd52luq256: + case X86::BI__builtin_ia32_vpmadd52luq512: { + APValue A, B, C; + if (!EvaluateAsRValue(Info, E->getArg(0), A) || + !EvaluateAsRValue(Info, E->getArg(1), B) || + !EvaluateAsRValue(Info, E->getArg(2), C)) + return false; + + unsigned ALen = A.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(ALen); + + for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) { + APInt AElt = A.getVectorElt(EltNum).getInt(); + APInt BElt = B.getVectorElt(EltNum).getInt().trunc(52); + APInt CElt = C.getVectorElt(EltNum).getInt().trunc(52); + APSInt ResElt(AElt + (BElt * CElt).zext(64), false); + ResultElements.push_back(APValue(ResElt)); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_vpmadd52huq128: + case X86::BI__builtin_ia32_vpmadd52huq256: + case X86::BI__builtin_ia32_vpmadd52huq512: { + APValue A, B, C; + if (!EvaluateAsRValue(Info, E->getArg(0), A) || + !EvaluateAsRValue(Info, E->getArg(1), B) || + !EvaluateAsRValue(Info, E->getArg(2), C)) + return false; + + unsigned ALen = A.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(ALen); + + for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) { + APInt AElt = A.getVectorElt(EltNum).getInt(); + APInt BElt = B.getVectorElt(EltNum).getInt().trunc(52); + APInt CElt = C.getVectorElt(EltNum).getInt().trunc(52); + APSInt ResElt(AElt + llvm::APIntOps::mulhu(BElt, CElt).zext(64), false); + ResultElements.push_back(APValue(ResElt)); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case clang::X86::BI__builtin_ia32_vprotbi: case clang::X86::BI__builtin_ia32_vprotdi: case clang::X86::BI__builtin_ia32_vprotqi: @@ -12381,6 +12429,169 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_phaddw128: + case clang::X86::BI__builtin_ia32_phaddw256: + case clang::X86::BI__builtin_ia32_phaddd128: + case clang::X86::BI__builtin_ia32_phaddd256: + case clang::X86::BI__builtin_ia32_phaddsw128: + case clang::X86::BI__builtin_ia32_phaddsw256: + + case clang::X86::BI__builtin_ia32_phsubw128: + case clang::X86::BI__builtin_ia32_phsubw256: + case clang::X86::BI__builtin_ia32_phsubd128: + case clang::X86::BI__builtin_ia32_phsubd256: + case clang::X86::BI__builtin_ia32_phsubsw128: + case clang::X86::BI__builtin_ia32_phsubsw256: { + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + + unsigned NumElts = SourceLHS.getVectorLength(); + unsigned EltBits = Info.Ctx.getIntWidth(DestEltTy); + unsigned EltsPerLane = 128 / EltBits; + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(NumElts); + + for (unsigned LaneStart = 0; LaneStart != NumElts; + LaneStart += EltsPerLane) { + for (unsigned I = 0; I != EltsPerLane; I += 2) { + APSInt LHSA = SourceLHS.getVectorElt(LaneStart + I).getInt(); + APSInt LHSB = SourceLHS.getVectorElt(LaneStart + I + 1).getInt(); + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_phaddw128: + case clang::X86::BI__builtin_ia32_phaddw256: + case clang::X86::BI__builtin_ia32_phaddd128: + case clang::X86::BI__builtin_ia32_phaddd256: { + APSInt Res(LHSA + LHSB, DestUnsigned); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phaddsw128: + case clang::X86::BI__builtin_ia32_phaddsw256: { + APSInt Res(LHSA.sadd_sat(LHSB)); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phsubw128: + case clang::X86::BI__builtin_ia32_phsubw256: + case clang::X86::BI__builtin_ia32_phsubd128: + case clang::X86::BI__builtin_ia32_phsubd256: { + APSInt Res(LHSA - LHSB, DestUnsigned); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phsubsw128: + case clang::X86::BI__builtin_ia32_phsubsw256: { + APSInt Res(LHSA.ssub_sat(LHSB)); + ResultElements.push_back(APValue(Res)); + break; + } + } + } + for (unsigned I = 0; I != EltsPerLane; I += 2) { + APSInt RHSA = SourceRHS.getVectorElt(LaneStart + I).getInt(); + APSInt RHSB = SourceRHS.getVectorElt(LaneStart + I + 1).getInt(); + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_phaddw128: + case clang::X86::BI__builtin_ia32_phaddw256: + case clang::X86::BI__builtin_ia32_phaddd128: + case clang::X86::BI__builtin_ia32_phaddd256: { + APSInt Res(RHSA + RHSB, DestUnsigned); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phaddsw128: + case clang::X86::BI__builtin_ia32_phaddsw256: { + APSInt Res(RHSA.sadd_sat(RHSB)); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phsubw128: + case clang::X86::BI__builtin_ia32_phsubw256: + case clang::X86::BI__builtin_ia32_phsubd128: + case clang::X86::BI__builtin_ia32_phsubd256: { + APSInt Res(RHSA - RHSB, DestUnsigned); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phsubsw128: + case clang::X86::BI__builtin_ia32_phsubsw256: { + APSInt Res(RHSA.ssub_sat(RHSB)); + ResultElements.push_back(APValue(Res)); + break; + } + } + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case clang::X86::BI__builtin_ia32_haddpd: + case clang::X86::BI__builtin_ia32_haddps: + case clang::X86::BI__builtin_ia32_haddps256: + case clang::X86::BI__builtin_ia32_haddpd256: + case clang::X86::BI__builtin_ia32_hsubpd: + case clang::X86::BI__builtin_ia32_hsubps: + case clang::X86::BI__builtin_ia32_hsubps256: + case clang::X86::BI__builtin_ia32_hsubpd256: { + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + unsigned NumElts = SourceLHS.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(NumElts); + llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned EltBits = Info.Ctx.getTypeSize(DestEltTy); + unsigned NumLanes = NumElts * EltBits / 128; + unsigned NumElemsPerLane = NumElts / NumLanes; + unsigned HalfElemsPerLane = NumElemsPerLane / 2; + + for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) { + for (unsigned I = 0; I != HalfElemsPerLane; ++I) { + APFloat LHSA = SourceLHS.getVectorElt(L + (2 * I) + 0).getFloat(); + APFloat LHSB = SourceLHS.getVectorElt(L + (2 * I) + 1).getFloat(); + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_haddpd: + case clang::X86::BI__builtin_ia32_haddps: + case clang::X86::BI__builtin_ia32_haddps256: + case clang::X86::BI__builtin_ia32_haddpd256: + LHSA.add(LHSB, RM); + break; + case clang::X86::BI__builtin_ia32_hsubpd: + case clang::X86::BI__builtin_ia32_hsubps: + case clang::X86::BI__builtin_ia32_hsubps256: + case clang::X86::BI__builtin_ia32_hsubpd256: + LHSA.subtract(LHSB, RM); + break; + } + ResultElements.push_back(APValue(LHSA)); + } + for (unsigned I = 0; I != HalfElemsPerLane; ++I) { + APFloat RHSA = SourceRHS.getVectorElt(L + (2 * I) + 0).getFloat(); + APFloat RHSB = SourceRHS.getVectorElt(L + (2 * I) + 1).getFloat(); + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_haddpd: + case clang::X86::BI__builtin_ia32_haddps: + case clang::X86::BI__builtin_ia32_haddps256: + case clang::X86::BI__builtin_ia32_haddpd256: + RHSA.add(RHSB, RM); + break; + case clang::X86::BI__builtin_ia32_hsubpd: + case clang::X86::BI__builtin_ia32_hsubps: + case clang::X86::BI__builtin_ia32_hsubps256: + case clang::X86::BI__builtin_ia32_hsubpd256: + RHSA.subtract(RHSB, RM); + break; + } + ResultElements.push_back(APValue(RHSA)); + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp index 17c6bec..142c932 100644 --- a/clang/lib/AST/OpenACCClause.cpp +++ b/clang/lib/AST/OpenACCClause.cpp @@ -509,7 +509,7 @@ OpenACCReductionClause *OpenACCReductionClause::Create( ArrayRef<OpenACCReductionRecipeWithStorage> Recipes, SourceLocation EndLoc) { size_t NumCombiners = llvm::accumulate( - Recipes, 0, [](size_t Num, const OpenACCReductionRecipe &R) { + Recipes, 0, [](size_t Num, const OpenACCReductionRecipeWithStorage &R) { return Num + R.CombinerRecipes.size(); }); diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 485308f..9b68de1 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -9,9 +9,11 @@ #include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" #include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/TimeProfiler.h" namespace clang::lifetimes::internal { +using llvm::isa_and_present; static bool isGslPointerType(QualType QT) { if (const auto *RD = QT->getAsCXXRecordDecl()) { @@ -108,7 +110,7 @@ void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { // Specifically for conversion operators, // like `std::string_view p = std::string{};` if (isGslPointerType(MCE->getType()) && - isa<CXXConversionDecl>(MCE->getCalleeDecl())) { + isa_and_present<CXXConversionDecl>(MCE->getCalleeDecl())) { // The argument is the implicit object itself. handleFunctionCall(MCE, MCE->getMethodDecl(), {MCE->getImplicitObjectArgument()}, diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 9e03a08..18641a9 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1568,6 +1568,7 @@ bool AArch64TargetInfo::validateAsmConstraint( if (const unsigned Len = matchAsmCCConstraint(Name)) { Name += Len - 1; Info.setAllowsRegister(); + Info.setOutputOperandBounds(0, 2); return true; } } diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp index 13b8623..30f846c 100644 --- a/clang/lib/Basic/Targets/SystemZ.cpp +++ b/clang/lib/Basic/Targets/SystemZ.cpp @@ -99,6 +99,16 @@ bool SystemZTargetInfo::validateAsmConstraint( case 'T': // Likewise, plus an index Info.setAllowsMemory(); return true; + case '@': + // CC condition changes. + if (StringRef(Name) == "@cc") { + Name += 2; + Info.setAllowsRegister(); + // SystemZ has 2-bits CC, and hence Interval [0, 4). + Info.setOutputOperandBounds(0, 4); + return true; + } + return false; } } @@ -161,6 +171,9 @@ unsigned SystemZTargetInfo::getMinGlobalAlign(uint64_t Size, void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { + // Inline assembly supports SystemZ flag outputs. + Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__"); + Builder.defineMacro("__s390__"); Builder.defineMacro("__s390x__"); Builder.defineMacro("__zarch__"); diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index dc2185e..4e15d5a 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -136,6 +136,12 @@ public: std::string convertConstraint(const char *&Constraint) const override { switch (Constraint[0]) { + case '@': // Flag output operand. + if (llvm::StringRef(Constraint) == "@cc") { + Constraint += 2; + return std::string("{@cc}"); + } + break; case 'p': // Keep 'p' constraint. return std::string("p"); case 'Z': diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 6eb4db5..187815c 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1516,6 +1516,7 @@ bool X86TargetInfo::validateAsmConstraint( if (auto Len = matchAsmCCConstraint(Name)) { Name += Len - 1; Info.setAllowsRegister(); + Info.setOutputOperandBounds(0, 2); return true; } return false; diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 0f4d6d2..a9983f8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -255,7 +255,7 @@ static void emitAtomicCmpXchg(CIRGenFunction &cgf, AtomicExpr *e, bool isWeak, mlir::Value expected = builder.createLoad(loc, val1); mlir::Value desired = builder.createLoad(loc, val2); - auto cmpxchg = cir::AtomicCmpXchg::create( + auto cmpxchg = cir::AtomicCmpXchgOp::create( builder, loc, expected.getType(), builder.getBoolTy(), ptr.getPointer(), expected, desired, cir::MemOrderAttr::get(&cgf.getMLIRContext(), successOrder), @@ -404,7 +404,7 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: - opName = cir::AtomicXchg::getOperationName(); + opName = cir::AtomicXchgOp::getOperationName(); break; case AtomicExpr::AO__opencl_atomic_init: diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index 3d86f71..ce4ae7e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -1005,7 +1005,7 @@ public: /*temporary=*/nullptr, OpenACCReductionOperator::Invalid, Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType, opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType, - privateOp); + privateOp, /*reductionCombinerRecipes=*/{}); // TODO: OpenACC: The dialect is going to change in the near future to // have these be on a different operation, so when that changes, we // probably need to change these here. @@ -1046,7 +1046,7 @@ public: OpenACCReductionOperator::Invalid, Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType, opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType, - firstPrivateOp); + firstPrivateOp, /*reductionCombinerRecipe=*/{}); // TODO: OpenACC: The dialect is going to change in the near future to // have these be on a different operation, so when that changes, we @@ -1088,7 +1088,7 @@ public: /*temporary=*/nullptr, clause.getReductionOp(), Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType, opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType, - reductionOp); + reductionOp, varRecipe.CombinerRecipes); operation.addReduction(builder.getContext(), reductionOp, recipe); } diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp index 24a5fc2..ce14aa8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp @@ -527,16 +527,142 @@ void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy( // doesn't restore it aftewards. void OpenACCRecipeBuilderBase::createReductionRecipeCombiner( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - mlir::acc::ReductionRecipeOp recipe, size_t numBounds) { + mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType, + llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> combinerRecipes) { mlir::Block *block = createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc, numBounds, /*isInit=*/false); builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); - mlir::BlockArgument lhsArg = block->getArgument(0); + mlir::Value lhsArg = block->getArgument(0); + mlir::Value rhsArg = block->getArgument(1); + llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = + block->getArguments().drop_front(2); + + if (llvm::any_of(combinerRecipes, [](auto &r) { return r.Op == nullptr; })) { + cgf.cgm.errorNYI(loc, "OpenACC Reduction combiner not generated"); + mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); + return; + } + + // apply the bounds so that we can get our bounds emitted correctly. + for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) + std::tie(lhsArg, rhsArg) = + createBoundsLoop(lhsArg, rhsArg, boundArg, loc, /*inverse=*/false); + + // Emitter for when we know this isn't a struct or array we have to loop + // through. This should work for the 'field' once the get-element call has + // been made. + auto emitSingleCombiner = + [&](mlir::Value lhsArg, mlir::Value rhsArg, + const OpenACCReductionRecipe::CombinerRecipe &combiner) { + mlir::Type elementTy = + mlir::cast<cir::PointerType>(lhsArg.getType()).getPointee(); + CIRGenFunction::DeclMapRevertingRAII declMapRAIILhs{cgf, combiner.LHS}; + cgf.setAddrOfLocalVar( + combiner.LHS, Address{lhsArg, elementTy, + cgf.getContext().getDeclAlign(combiner.LHS)}); + CIRGenFunction::DeclMapRevertingRAII declMapRAIIRhs{cgf, combiner.RHS}; + cgf.setAddrOfLocalVar( + combiner.RHS, Address{rhsArg, elementTy, + cgf.getContext().getDeclAlign(combiner.RHS)}); + + [[maybe_unused]] mlir::LogicalResult stmtRes = + cgf.emitStmt(combiner.Op, /*useCurrentScope=*/true); + }; + + // Emitter for when we know this is either a non-array or element of an array + // (which also shouldn't be an array type?). This function should generate the + // initialization code for an entire 'array-element'/non-array, including + // diving into each element of a struct (if necessary). + auto emitCombiner = [&](mlir::Value lhsArg, mlir::Value rhsArg, QualType ty) { + assert(!ty->isArrayType() && "Array type shouldn't get here"); + if (const auto *rd = ty->getAsRecordDecl()) { + if (combinerRecipes.size() == 1 && + cgf.getContext().hasSameType(ty, combinerRecipes[0].LHS->getType())) { + // If this is a 'top level' operator on the type we can just emit this + // as a simple one. + emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); + } else { + // else we have to handle each individual field after after a + // get-element. + for (const auto &[field, combiner] : + llvm::zip_equal(rd->fields(), combinerRecipes)) { + mlir::Type fieldType = cgf.convertType(field->getType()); + auto fieldPtr = cir::PointerType::get(fieldType); + + mlir::Value lhsField = builder.createGetMember( + loc, fieldPtr, lhsArg, field->getName(), field->getFieldIndex()); + mlir::Value rhsField = builder.createGetMember( + loc, fieldPtr, rhsArg, field->getName(), field->getFieldIndex()); + + emitSingleCombiner(lhsField, rhsField, combiner); + } + } + + } else { + // if this is a single-thing (because we should know this isn't an array, + // as Sema wouldn't let us get here), we can just do a normal emit call. + emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); + } + }; + + if (const auto *cat = cgf.getContext().getAsConstantArrayType(origType)) { + // If we're in an array, we have to emit the combiner for each element of + // the array. + auto itrTy = mlir::cast<cir::IntType>(cgf.PtrDiffTy); + auto itrPtrTy = cir::PointerType::get(itrTy); + + mlir::Value zero = + builder.getConstInt(loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), 0); + mlir::Value itr = + cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", + cgf.cgm.getSize(cgf.getPointerAlign())); + builder.CIRBaseBuilderTy::createStore(loc, zero, itr); + + builder.setInsertionPointAfter(builder.createFor( + loc, + /*condBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + auto loadItr = cir::LoadOp::create(builder, loc, {itr}); + mlir::Value arraySize = builder.getConstInt( + loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), cat->getZExtSize()); + auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadItr, + arraySize); + builder.createCondition(cmp); + }, + /*bodyBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + auto loadItr = cir::LoadOp::create(builder, loc, {itr}); + auto lhsElt = builder.getArrayElement( + loc, loc, lhsArg, cgf.convertType(cat->getElementType()), loadItr, + /*shouldDecay=*/true); + auto rhsElt = builder.getArrayElement( + loc, loc, rhsArg, cgf.convertType(cat->getElementType()), loadItr, + /*shouldDecay=*/true); + + emitCombiner(lhsElt, rhsElt, cat->getElementType()); + builder.createYield(loc); + }, + /*stepBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + auto loadItr = cir::LoadOp::create(builder, loc, {itr}); + auto inc = cir::UnaryOp::create(builder, loc, loadItr.getType(), + cir::UnaryOpKind::Inc, loadItr); + builder.CIRBaseBuilderTy::createStore(loc, inc, itr); + builder.createYield(loc); + })); - mlir::acc::YieldOp::create(builder, locEnd, lhsArg); + } else if (origType->isArrayType()) { + cgf.cgm.errorNYI(loc, + "OpenACC Reduction combiner non-constant array recipe"); + } else { + emitCombiner(lhsArg, rhsArg, origType); + } + + builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); + mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); } } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h index a5da744..745d424 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h @@ -64,10 +64,10 @@ protected: // that this function is not 'insertion point' clean, in that it alters the // insertion point to be inside of the 'combiner' section of the recipe, but // doesn't restore it aftewards. - void createReductionRecipeCombiner(mlir::Location loc, mlir::Location locEnd, - mlir::Value mainOp, - mlir::acc::ReductionRecipeOp recipe, - size_t numBounds); + void createReductionRecipeCombiner( + mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, + mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType, + llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> combinerRecipes); void createInitRecipe(mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, @@ -169,7 +169,9 @@ public: const Expr *varRef, const VarDecl *varRecipe, const VarDecl *temporary, OpenACCReductionOperator reductionOp, DeclContext *dc, QualType origType, size_t numBounds, llvm::ArrayRef<QualType> boundTypes, QualType baseType, - mlir::Value mainOp) { + mlir::Value mainOp, + llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> + reductionCombinerRecipes) { assert(!varRecipe->getType()->isSpecificBuiltinType( BuiltinType::ArraySection) && "array section shouldn't make it to recipe creation"); @@ -208,7 +210,8 @@ public: createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, recipe.getInitRegion(), numBounds, boundTypes, varRecipe, origType, /*emitInitExpr=*/true); - createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds); + createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds, + origType, reductionCombinerRecipes); } else { static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>); createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index cfd48a2..5ba64dd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -536,7 +536,7 @@ mlir::LogicalResult CIRGenFunction::emitLabel(const clang::LabelDecl &d) { mlir::Block *currBlock = builder.getBlock(); mlir::Block *labelBlock = currBlock; - if (!currBlock->empty()) { + if (!currBlock->empty() || currBlock->isEntryBlock()) { { mlir::OpBuilder::InsertionGuard guard(builder); labelBlock = builder.createBlock(builder.getBlock()->getParent()); diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 12837d9..7af3dc1 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -2901,20 +2901,6 @@ mlir::LogicalResult cir::ThrowOp::verify() { } //===----------------------------------------------------------------------===// -// AtomicCmpXchg -//===----------------------------------------------------------------------===// - -LogicalResult cir::AtomicCmpXchg::verify() { - mlir::Type pointeeType = getPtr().getType().getPointee(); - - if (pointeeType != getExpected().getType() || - pointeeType != getDesired().getType()) - return emitOpError("ptr, expected and desired types must match"); - - return success(); -} - -//===----------------------------------------------------------------------===// // TypeInfoAttr //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index f0d73ac..3abba3d 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -694,8 +694,8 @@ getLLVMMemOrder(std::optional<cir::MemOrder> memorder) { llvm_unreachable("unknown memory order"); } -mlir::LogicalResult CIRToLLVMAtomicCmpXchgLowering::matchAndRewrite( - cir::AtomicCmpXchg op, OpAdaptor adaptor, +mlir::LogicalResult CIRToLLVMAtomicCmpXchgOpLowering::matchAndRewrite( + cir::AtomicCmpXchgOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { mlir::Value expected = adaptor.getExpected(); mlir::Value desired = adaptor.getDesired(); @@ -719,8 +719,8 @@ mlir::LogicalResult CIRToLLVMAtomicCmpXchgLowering::matchAndRewrite( return mlir::success(); } -mlir::LogicalResult CIRToLLVMAtomicXchgLowering::matchAndRewrite( - cir::AtomicXchg op, OpAdaptor adaptor, +mlir::LogicalResult CIRToLLVMAtomicXchgOpLowering::matchAndRewrite( + cir::AtomicXchgOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { assert(!cir::MissingFeatures::atomicSyncScopeID()); mlir::LLVM::AtomicOrdering llvmOrder = getLLVMMemOrder(adaptor.getMemOrder()); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 92636f2..fdc1a11 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2674,7 +2674,8 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, const llvm::ArrayRef<LValue> ResultRegDests, const llvm::ArrayRef<QualType> ResultRegQualTys, const llvm::BitVector &ResultTypeRequiresCast, - const llvm::BitVector &ResultRegIsFlagReg) { + const std::vector<std::optional<std::pair<unsigned, unsigned>>> + &ResultBounds) { CGBuilderTy &Builder = CGF.Builder; CodeGenModule &CGM = CGF.CGM; llvm::LLVMContext &CTX = CGF.getLLVMContext(); @@ -2685,18 +2686,20 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, // ResultRegDests can be also populated by addReturnRegisterOutputs() above, // in which case its size may grow. assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); - assert(ResultRegIsFlagReg.size() <= ResultRegDests.size()); + assert(ResultBounds.size() <= ResultRegDests.size()); for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { llvm::Value *Tmp = RegResults[i]; llvm::Type *TruncTy = ResultTruncRegTypes[i]; - if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) { - // Target must guarantee the Value `Tmp` here is lowered to a boolean - // value. - llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2); + if ((i < ResultBounds.size()) && ResultBounds[i].has_value()) { + const auto [LowerBound, UpperBound] = ResultBounds[i].value(); + // FIXME: Support for nonzero lower bounds not yet implemented. + assert(LowerBound == 0 && "Output operand lower bound is not zero."); + llvm::Constant *UpperBoundConst = + llvm::ConstantInt::get(Tmp->getType(), UpperBound); llvm::Value *IsBooleanValue = - Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two); + Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, UpperBoundConst); llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume); Builder.CreateCall(FnAssume, IsBooleanValue); } @@ -2825,7 +2828,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::vector<llvm::Type *> ArgElemTypes; std::vector<llvm::Value*> Args; llvm::BitVector ResultTypeRequiresCast; - llvm::BitVector ResultRegIsFlagReg; + std::vector<std::optional<std::pair<unsigned, unsigned>>> ResultBounds; // Keep track of inout constraints. std::string InOutConstraints; @@ -2883,8 +2886,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { ResultRegQualTys.push_back(QTy); ResultRegDests.push_back(Dest); - bool IsFlagReg = llvm::StringRef(OutputConstraint).starts_with("{@cc"); - ResultRegIsFlagReg.push_back(IsFlagReg); + ResultBounds.emplace_back(Info.getOutputOperandBounds()); llvm::Type *Ty = ConvertTypeForMem(QTy); const bool RequiresCast = Info.allowsRegister() && @@ -3231,7 +3233,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes, ResultRegDests, ResultRegQualTys, ResultTypeRequiresCast, - ResultRegIsFlagReg); + ResultBounds); // If this is an asm goto with outputs, repeat EmitAsmStores, but with a // different insertion point; one for each indirect destination and with @@ -3242,7 +3244,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { Builder.SetInsertPoint(Succ, --(Succ->end())); EmitAsmStores(*this, S, CBRRegResults[Succ], ResultRegTypes, ResultTruncRegTypes, ResultRegDests, ResultRegQualTys, - ResultTypeRequiresCast, ResultRegIsFlagReg); + ResultTypeRequiresCast, ResultBounds); } } } diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index c2956a1..cb3fc1c 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -41,8 +41,7 @@ static constexpr std::array<StringRef, 14> QtPropertyKeywords = { bool FormatToken::isQtProperty() const { assert(llvm::is_sorted(QtPropertyKeywords)); - return std::binary_search(QtPropertyKeywords.begin(), - QtPropertyKeywords.end(), TokenText); + return llvm::binary_search(QtPropertyKeywords, TokenText); } // Sorted common C++ non-keyword types. diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 4aaca2d..e150aa6 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -834,10 +834,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit @@ -866,10 +865,9 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit @@ -901,10 +899,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadds_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadds_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -937,10 +934,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit @@ -969,10 +965,9 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -1005,10 +1000,9 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsubs_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsubs_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a diff --git a/clang/lib/Headers/avx512ifmaintrin.h b/clang/lib/Headers/avx512ifmaintrin.h index f01b322..625a8ff 100644 --- a/clang/lib/Headers/avx512ifmaintrin.h +++ b/clang/lib/Headers/avx512ifmaintrin.h @@ -15,54 +15,53 @@ #define __IFMAINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + constexpr \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \ + __min_vector_width__(512))) +#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \ __min_vector_width__(512))) +#endif static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y, - (__v8di) __Z); +_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di)__X, (__v8di)__Y, + (__v8di)__Z); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), - (__v8di)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52hi_epu64( + __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), - (__v8di)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64( + __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y, - (__v8di) __Z); +_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di)__X, (__v8di)__Y, + (__v8di)__Z); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), - (__v8di)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52lo_epu64( + __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), - (__v8di)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52lo_epu64( + __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512ifmavlintrin.h b/clang/lib/Headers/avx512ifmavlintrin.h index a72b561..c4449c7 100644 --- a/clang/lib/Headers/avx512ifmavlintrin.h +++ b/clang/lib/Headers/avx512ifmavlintrin.h @@ -8,13 +8,24 @@ *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H -#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." +#error \ + "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." #endif #ifndef __IFMAVLINTRIN_H #define __IFMAVLINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl"), \ + __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512ifma,avx512vl"), \ @@ -24,6 +35,8 @@ __target__("avx512ifma,avx512vl"), \ __min_vector_width__(256))) +#endif + #define _mm_madd52hi_epu64(X, Y, Z) \ ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ (__v2di)(Z))) @@ -41,70 +54,57 @@ (__v4di)(Z))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), - (__v2di)__W); +_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), - (__v2di)_mm_setzero_si128()); +_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), - (__v4di)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64( + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), - (__v4di)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64( + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), - (__v2di)__W); +_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), - (__v2di)_mm_setzero_si128()); +_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), - (__v4di)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64( + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), - (__v4di)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64( + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); } - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avxifmaintrin.h b/clang/lib/Headers/avxifmaintrin.h index 5c782d2a..a2ef601 100644 --- a/clang/lib/Headers/avxifmaintrin.h +++ b/clang/lib/Headers/avxifmaintrin.h @@ -15,12 +15,21 @@ #define __AVXIFMAINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avxifma"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avxifma"), __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(256))) +#endif // must vex-encoding diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 123fa79..696ec31 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -694,9 +694,8 @@ _mm256_xor_ps(__m256 __a, __m256 __b) /// elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of /// both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hadd_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hadd_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); } @@ -717,9 +716,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b) /// index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of /// both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hadd_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); } @@ -740,9 +738,8 @@ _mm256_hadd_ps(__m256 __a, __m256 __b) /// odd-indexed elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal /// differences of both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hsub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); } @@ -763,9 +760,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b) /// elements with index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal /// differences of both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hsub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); } diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index f0c9b2b..42bd343 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -83,9 +83,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b) /// destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of /// both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_hadd_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); } @@ -106,9 +105,8 @@ _mm_hadd_ps(__m128 __a, __m128 __b) /// bits of the destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal /// differences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_hsub_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); } @@ -168,9 +166,8 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_addsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } @@ -191,9 +188,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b) /// destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of /// both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hadd_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_pd(__m128d __a, __m128d __b) { return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); } @@ -214,9 +210,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b) /// the destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal /// differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); } diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h index f902ca1..ad28f06 100644 --- a/clang/lib/Headers/ptrauth.h +++ b/clang/lib/Headers/ptrauth.h @@ -241,6 +241,18 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; #define ptrauth_type_discriminator(__type) \ __builtin_ptrauth_type_discriminator(__type) +/* Compute the constant discriminator used by Clang to sign pointers with the + given C function pointer type. + + A call to this function is an integer constant expression. */ +#if __has_feature(ptrauth_function_pointer_type_discrimination) +#define ptrauth_function_pointer_type_discriminator(__type) \ + __builtin_ptrauth_type_discriminator(__type) +#else +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) +#endif + /* Compute a signature for the given pair of pointer-sized values. The order of the arguments is significant. @@ -372,6 +384,8 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; }) #define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0) +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) #define ptrauth_sign_generic_data(__value, __data) \ ({ \ diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index 3fc9f98..9d007c8 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -202,10 +202,9 @@ _mm_abs_epi32(__m128i __a) { /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -225,10 +224,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -248,11 +246,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -272,11 +269,10 @@ _mm_hadd_pi16(__m64 __a, __m64 __b) /// destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -299,10 +295,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b) /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadds_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadds_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -325,11 +320,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadds_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -349,10 +343,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -372,10 +365,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -395,11 +387,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -419,11 +410,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -446,10 +436,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsubs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsubs_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -472,11 +461,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsubs_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -556,10 +544,9 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) { /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mulhrs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, + __m128i __b) { + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 9cbd1bd..87dd682 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -606,10 +606,9 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments( Constraint.mappingOccurenceList(); // The empty MLTAL situation should only occur when evaluating non-dependent // constraints. - if (!MLTAL.getNumSubstitutedLevels()) - MLTAL.addOuterTemplateArguments(TD, {}, /*Final=*/false); - SubstitutedOuterMost = - llvm::to_vector_of<TemplateArgument>(MLTAL.getOutermost()); + if (MLTAL.getNumSubstitutedLevels()) + SubstitutedOuterMost = + llvm::to_vector_of<TemplateArgument>(MLTAL.getOutermost()); unsigned Offset = 0; for (unsigned I = 0, MappedIndex = 0; I < Used.size(); I++) { TemplateArgument Arg; @@ -627,8 +626,10 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments( if (Offset < SubstitutedOuterMost.size()) SubstitutedOuterMost.erase(SubstitutedOuterMost.begin() + Offset); - MLTAL.replaceOutermostTemplateArguments(TD, SubstitutedOuterMost); - return std::move(MLTAL); + MultiLevelTemplateArgumentList SubstitutedTemplateArgs; + SubstitutedTemplateArgs.addOuterTemplateArguments(TD, SubstitutedOuterMost, + /*Final=*/false); + return std::move(SubstitutedTemplateArgs); } ExprResult ConstraintSatisfactionChecker::EvaluateSlow( diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp index ead9781..17078e8 100644 --- a/clang/lib/Sema/SemaOpenACCClause.cpp +++ b/clang/lib/Sema/SemaOpenACCClause.cpp @@ -1924,7 +1924,7 @@ bool SemaOpenACC::CheckReductionVarType(Expr *VarExpr) { // off here. This will result in CurType being the actual 'type' of the // expression, which is what we are looking to check. QualType CurType = isa<ArraySectionExpr>(VarExpr) - ? ArraySectionExpr::getBaseOriginalType(VarExpr) + ? cast<ArraySectionExpr>(VarExpr)->getElementType() : VarExpr->getType(); // This can happen when we have a dependent type in an array element that the diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 85e3d20..73fd33a 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -5727,7 +5727,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, Function->setDeclarationNameLoc(NameLocPointsToPattern()); EnterExpressionEvaluationContextForFunction EvalContext( - *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); + *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, Function); Qualifiers ThisTypeQuals; CXXRecordDecl *ThisContext = nullptr; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index 66cfccb..c1a5000 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -26,6 +26,7 @@ bool tryToFindPtrOrigin( const Expr *E, bool StopAtFirstRefCountedObj, std::function<bool(const clang::CXXRecordDecl *)> isSafePtr, std::function<bool(const clang::QualType)> isSafePtrType, + std::function<bool(const clang::Decl *)> isSafeGlobalDecl, std::function<bool(const clang::Expr *, bool)> callback) { while (E) { if (auto *DRE = dyn_cast<DeclRefExpr>(E)) { @@ -34,6 +35,8 @@ bool tryToFindPtrOrigin( auto IsImmortal = safeGetName(VD) == "NSApp"; if (VD->hasGlobalStorage() && (IsImmortal || QT.isConstQualified())) return callback(E, true); + if (VD->hasGlobalStorage() && isSafeGlobalDecl(VD)) + return callback(E, true); } } if (auto *tempExpr = dyn_cast<MaterializeTemporaryExpr>(E)) { @@ -71,9 +74,11 @@ bool tryToFindPtrOrigin( } if (auto *Expr = dyn_cast<ConditionalOperator>(E)) { return tryToFindPtrOrigin(Expr->getTrueExpr(), StopAtFirstRefCountedObj, - isSafePtr, isSafePtrType, callback) && + isSafePtr, isSafePtrType, isSafeGlobalDecl, + callback) && tryToFindPtrOrigin(Expr->getFalseExpr(), StopAtFirstRefCountedObj, - isSafePtr, isSafePtrType, callback); + isSafePtr, isSafePtrType, isSafeGlobalDecl, + callback); } if (auto *cast = dyn_cast<CastExpr>(E)) { if (StopAtFirstRefCountedObj) { @@ -93,7 +98,8 @@ bool tryToFindPtrOrigin( if (auto *call = dyn_cast<CallExpr>(E)) { if (auto *Callee = call->getCalleeDecl()) { if (Callee->hasAttr<CFReturnsRetainedAttr>() || - Callee->hasAttr<NSReturnsRetainedAttr>()) { + Callee->hasAttr<NSReturnsRetainedAttr>() || + Callee->hasAttr<NSReturnsAutoreleasedAttr>()) { return callback(E, true); } } diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h index 3a009d6..9fff456 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h @@ -56,6 +56,7 @@ bool tryToFindPtrOrigin( const clang::Expr *E, bool StopAtFirstRefCountedObj, std::function<bool(const clang::CXXRecordDecl *)> isSafePtr, std::function<bool(const clang::QualType)> isSafePtrType, + std::function<bool(const clang::Decl *)> isSafeGlobalDecl, std::function<bool(const clang::Expr *, bool)> callback); /// For \p E referring to a ref-countable/-counted pointer/reference we return diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp index 9585ceb..791e709 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp @@ -29,12 +29,12 @@ namespace { class RawPtrRefCallArgsChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { BugType Bug; - mutable BugReporter *BR; TrivialFunctionAnalysis TFA; EnsureFunctionAnalysis EFA; protected: + mutable BugReporter *BR; mutable std::optional<RetainTypeChecker> RTC; public: @@ -46,6 +46,7 @@ public: virtual bool isSafePtr(const CXXRecordDecl *Record) const = 0; virtual bool isSafePtrType(const QualType type) const = 0; virtual bool isSafeExpr(const Expr *) const { return false; } + virtual bool isSafeDecl(const Decl *) const { return false; } virtual const char *ptrKind() const = 0; void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, @@ -214,6 +215,7 @@ public: Arg, /*StopAtFirstRefCountedObj=*/true, [&](const clang::CXXRecordDecl *Record) { return isSafePtr(Record); }, [&](const clang::QualType T) { return isSafePtrType(T); }, + [&](const clang::Decl *D) { return isSafeDecl(D); }, [&](const clang::Expr *ArgOrigin, bool IsSafe) { if (IsSafe) return true; @@ -479,6 +481,11 @@ public: isa<ObjCMessageExpr>(E); } + bool isSafeDecl(const Decl *D) const final { + // Treat NS/CF globals in system header as immortal. + return BR->getSourceManager().isInSystemHeader(D->getLocation()); + } + const char *ptrKind() const final { return "unretained"; } }; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp index dd9701f..c13df479 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp @@ -166,10 +166,10 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded, class RawPtrRefLocalVarsChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { BugType Bug; - mutable BugReporter *BR; EnsureFunctionAnalysis EFA; protected: + mutable BugReporter *BR; mutable std::optional<RetainTypeChecker> RTC; public: @@ -180,6 +180,7 @@ public: virtual bool isSafePtr(const CXXRecordDecl *) const = 0; virtual bool isSafePtrType(const QualType) const = 0; virtual bool isSafeExpr(const Expr *) const { return false; } + virtual bool isSafeDecl(const Decl *) const { return false; } virtual const char *ptrKind() const = 0; void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, @@ -288,6 +289,7 @@ public: return isSafePtr(Record); }, [&](const clang::QualType Type) { return isSafePtrType(Type); }, + [&](const clang::Decl *D) { return isSafeDecl(D); }, [&](const clang::Expr *InitArgOrigin, bool IsSafe) { if (!InitArgOrigin || IsSafe) return true; @@ -443,6 +445,10 @@ public: return ento::cocoa::isCocoaObjectRef(E->getType()) && isa<ObjCMessageExpr>(E); } + bool isSafeDecl(const Decl *D) const final { + // Treat NS/CF globals in system header as immortal. + return BR->getSourceManager().isInSystemHeader(D->getLocation()); + } const char *ptrKind() const final { return "unretained"; } }; diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 84a9c43..6108931 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -1111,6 +1111,10 @@ SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state, assert(!BinaryOperator::isComparisonOp(op) && "arguments to comparison ops must be of the same type"); + SVal simplifiedRhs = simplifySVal(state, rhs); + if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>()) + rhs = *simplifiedRhsAsNonLoc; + // Special case: rhs is a zero constant. if (rhs.isZeroConstant()) return lhs; diff --git a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h index 1e44de8..d55b3ab 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h @@ -34,6 +34,8 @@ void os_log_msg(os_log_t oslog, os_log_type_t type, const char *msg, ...); typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStringRef; +extern CFStringRef const kCFURLTagNamesKey; + #ifdef __OBJC__ @class NSString; @interface SystemObject { @@ -41,4 +43,8 @@ typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStrin CFStringRef cf_string; } @end + +typedef NSString *NSNotificationName; +extern "C" NSNotificationName NSApplicationDidBecomeActiveNotification; + #endif diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm index a517dbc..5dc3b38 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm @@ -567,6 +567,17 @@ struct Derived : Base { } // namespace ns_retained_return_value +namespace autoreleased { + +NSString *provideAutoreleased() __attribute__((ns_returns_autoreleased)); +void consume(NSString *); + +void foo() { + consume(provideAutoreleased()); +} + +} // autoreleased + @interface TestObject : NSObject - (void)doWork:(NSString *)msg, ...; - (void)doWorkOnSelf; diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm index 307a4d03..f49e7bd 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm @@ -1,8 +1,11 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedLocalVarsChecker -verify %s #import "objc-mock-types.h" +#import "mock-system-header.h" void someFunction(); +extern "C" CFStringRef LocalGlobalCFString; +extern "C" NSString *LocalGlobalNSString; namespace raw_ptr { void foo() { @@ -535,6 +538,41 @@ unsigned foo() { } // namespace ns_retained_return_value +namespace autoreleased { + +NSString *provideAutoreleased() __attribute__((ns_returns_autoreleased)); +void consume(NSString *); + +void foo() { + auto *string = provideAutoreleased(); + consume(string); +} + +} // autoreleased + +namespace ns_global { + +void consumeCFString(CFStringRef); +void consumeNSString(NSString *); + +void cf() { + auto *str = kCFURLTagNamesKey; + consumeCFString(str); + auto *localStr = LocalGlobalCFString; + // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}} + consumeCFString(localStr); +} + +void ns() { + auto *str = NSApplicationDidBecomeActiveNotification; + consumeNSString(str); + auto *localStr = LocalGlobalNSString; + // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}} + consumeNSString(localStr); +} + +} + bool doMoreWorkOpaque(OtherObj*); SomeObj* provide(); diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm new file mode 100644 index 0000000..5c78b21 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm @@ -0,0 +1,18 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedCallArgsChecker -verify %s + +#import "mock-types.h" +#import "mock-system-header.h" + +void consumeCFString(CFStringRef); +extern "C" CFStringRef LocalGlobalCFString; +void consumeNSString(NSString *); +extern "C" NSString *LocalGlobalNSString; + +void foo() { + consumeCFString(kCFURLTagNamesKey); + consumeCFString(LocalGlobalCFString); + // expected-warning@-1{{Call argument is unretained and unsafe}} + consumeNSString(NSApplicationDidBecomeActiveNotification); + consumeNSString(LocalGlobalNSString); + // expected-warning@-1{{Call argument is unretained and unsafe}} +} diff --git a/clang/test/Analysis/loc-folding.cpp b/clang/test/Analysis/loc-folding.cpp new file mode 100644 index 0000000..1fcb066 --- /dev/null +++ b/clang/test/Analysis/loc-folding.cpp @@ -0,0 +1,61 @@ +// RUN: %clang_analyze_cc1 -verify %s -analyzer-config eagerly-assume=false \ +// RUN: -analyzer-checker=core,debug.ExprInspection + +void clang_analyzer_eval(bool); + +void element_constant() { + char arr[10]; + clang_analyzer_eval(arr + 1 > arr); // expected-warning{{TRUE}} +} + +void element_known() { + char arr[10]; + int off = 1; + clang_analyzer_eval(arr + off > arr); // expected-warning{{TRUE}} +} + +void element_constrained(int off) { + char arr[10]; + if (off == 1) { + clang_analyzer_eval(arr + off > arr); // expected-warning{{TRUE}} + } +} + +void element_unknown(int off) { + char arr[10]; + clang_analyzer_eval(arr + off > arr); // expected-warning{{UNKNOWN}} +} + +void element_complex(int off) { + char arr[10]; + int comp = off * 2; + if (off == 1) { + clang_analyzer_eval(arr + comp); // expected-warning{{TRUE}} + } +} + +void base_constant(int *arr) { + clang_analyzer_eval(arr + 1 > arr); // expected-warning{{TRUE}} +} + +void base_known(int *arr) { + int off = 1; + clang_analyzer_eval(arr + off > arr); // expected-warning{{TRUE}} +} + +void base_constrained(int *arr, int off) { + if (off == 1) { + clang_analyzer_eval(arr + off > arr); // expected-warning{{TRUE}} + } +} + +void base_unknown(int *arr, int off) { + clang_analyzer_eval(arr + off > arr); // expected-warning{{UNKNOWN}} +} + +void base_complex(int *arr, int off) { + int comp = off * 2; + if (off == 1) { + clang_analyzer_eval(arr + comp > arr); // expected-warning{{TRUE}} + } +} diff --git a/clang/test/C/C23/n3037.c b/clang/test/C/C23/n3037.c index 3748375..113ecf7 100644 --- a/clang/test/C/C23/n3037.c +++ b/clang/test/C/C23/n3037.c @@ -30,11 +30,24 @@ void func2(PRODUCT(int, SUM(float, double)) y) { // c17-warning {{declaration of struct foop { struct { int x; }; }; // c17-note {{previous definition is here}} struct foop { struct { int x; }; }; // c17-error {{redefinition of 'foop'}} +// Test the field lookup compatibility isn't sufficient, the structure of types should be compatible. +struct AnonymousStructNotMatchingFields { // c17-note {{previous definition is here}} + struct { // c23-note {{field has name '' here}} + int x; + }; +}; +struct AnonymousStructNotMatchingFields { // c23-error {{type 'struct AnonymousStructNotMatchingFields' has incompatible definitions}} \ + c17-error {{redefinition of 'AnonymousStructNotMatchingFields'}} + int x; // c23-note {{field has name 'x' here}} +}; + union barp { int x; float y; }; // c17-note {{previous definition is here}} union barp { int x; float y; }; // c17-error {{redefinition of 'barp'}} typedef struct q { int x; } q_t; // c17-note 2 {{previous definition is here}} typedef struct q { int x; } q_t; // c17-error {{redefinition of 'q'}} \ c17-error-re {{typedef redefinition with different types ('struct (unnamed struct at {{.*}})' vs 'struct q')}} +typedef struct { int x; } untagged_q_t; // both-note {{previous definition is here}} +typedef struct { int x; } untagged_q_t; // both-error {{typedef redefinition with different types}} void func3(void) { struct S { int x; }; // c17-note {{previous definition is here}} struct T { struct S s; }; // c17-note {{previous definition is here}} @@ -389,13 +402,40 @@ void nontag_both_in_params(struct { int i; } Arg1, struct { int i; } Arg2) { _Static_assert(0 == _Generic(__typeof__(Arg1), __typeof__(Arg2) : 1, default : 0)); // both-warning {{passing a type argument as the first operand to '_Generic' is a C2y extension}} } -struct InnerAnonStruct { +struct InnerUnnamedStruct { struct { int i; } untagged; -} inner_anon_tagged; +} inner_unnamed_tagged; +_Static_assert(0 == _Generic(inner_unnamed_tagged.untagged, struct { int i; } : 1, default : 0)); -_Static_assert(0 == _Generic(inner_anon_tagged.untagged, struct { int i; } : 1, default : 0)); +struct InnerUnnamedStruct_same { + struct { + int i; + } untagged; +}; +struct InnerUnnamedStruct_differentNaming { + struct { + int i; + } untaggedDifferent; +}; +struct InnerUnnamedStruct_differentShape { + float x; + struct { + int i; + } untagged; + int y; +}; +void compare_unnamed_struct_from_different_outer_type( + struct InnerUnnamedStruct sameOuterType, + struct InnerUnnamedStruct_same matchingType, + struct InnerUnnamedStruct_differentNaming differentFieldName, + struct InnerUnnamedStruct_differentShape differentType) { + inner_unnamed_tagged.untagged = sameOuterType.untagged; + inner_unnamed_tagged.untagged = matchingType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}} + inner_unnamed_tagged.untagged = differentFieldName.untaggedDifferent; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}} + inner_unnamed_tagged.untagged = differentType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}} +} // Test the same thing with enumerations (test for unions is omitted because // unions and structures are both RecordDecl objects, whereas EnumDecl is not). diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c index 76289c5..440010a 100644 --- a/clang/test/CIR/CodeGen/atomic.c +++ b/clang/test/CIR/CodeGen/atomic.c @@ -211,7 +211,7 @@ void c11_atomic_cmpxchg_strong(_Atomic(int) *ptr, int *expected, int desired) { __c11_atomic_compare_exchange_strong(ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); - // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) : (!s32i, !cir.bool) + // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) // CIR-NEXT: %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool // CIR-NEXT: cir.if %[[FAILED]] { // CIR-NEXT: cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i> @@ -249,7 +249,7 @@ void c11_atomic_cmpxchg_weak(_Atomic(int) *ptr, int *expected, int desired) { __c11_atomic_compare_exchange_weak(ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); - // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) weak : (!s32i, !cir.bool) + // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) // CIR-NEXT: %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool // CIR-NEXT: cir.if %[[FAILED]] { // CIR-NEXT: cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i> @@ -286,7 +286,7 @@ void atomic_cmpxchg(int *ptr, int *expected, int *desired) { // OGCG-LABEL: @atomic_cmpxchg __atomic_compare_exchange(ptr, expected, desired, /*weak=*/0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); - // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) : (!s32i, !cir.bool) + // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) // CIR-NEXT: %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool // CIR-NEXT: cir.if %[[FAILED]] { // CIR-NEXT: cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i> @@ -317,7 +317,7 @@ void atomic_cmpxchg(int *ptr, int *expected, int *desired) { // OGCG-NEXT: store i8 %[[SUCCESS_2]], ptr %{{.+}}, align 1 __atomic_compare_exchange(ptr, expected, desired, /*weak=*/1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); - // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) weak : (!s32i, !cir.bool) + // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) // CIR-NEXT: %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool // CIR-NEXT: cir.if %[[FAILED]] { // CIR-NEXT: cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i> @@ -354,7 +354,7 @@ void atomic_cmpxchg_n(int *ptr, int *expected, int desired) { // OGCG-LABEL: @atomic_cmpxchg_n __atomic_compare_exchange_n(ptr, expected, desired, /*weak=*/0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); - // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) : (!s32i, !cir.bool) + // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) // CIR-NEXT: %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool // CIR-NEXT: cir.if %[[FAILED]] { // CIR-NEXT: cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i> @@ -385,7 +385,7 @@ void atomic_cmpxchg_n(int *ptr, int *expected, int desired) { // OGCG-NEXT: store i8 %[[SUCCESS_2]], ptr %{{.+}}, align 1 __atomic_compare_exchange_n(ptr, expected, desired, /*weak=*/1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); - // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg(%{{.+}} : !cir.ptr<!s32i>, %{{.+}} : !s32i, %{{.+}} : !s32i, success = seq_cst, failure = acquire) align(4) weak : (!s32i, !cir.bool) + // CIR: %[[OLD:.+]], %[[SUCCESS:.+]] = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} align(4) : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) // CIR-NEXT: %[[FAILED:.+]] = cir.unary(not, %[[SUCCESS]]) : !cir.bool, !cir.bool // CIR-NEXT: cir.if %[[FAILED]] { // CIR-NEXT: cir.store align(4) %[[OLD]], %{{.+}} : !s32i, !cir.ptr<!s32i> @@ -427,12 +427,12 @@ void c11_atomic_exchange(_Atomic(int) *ptr, int value) { __c11_atomic_exchange(ptr, value, __ATOMIC_RELEASE); __c11_atomic_exchange(ptr, value, __ATOMIC_ACQ_REL); __c11_atomic_exchange(ptr, value, __ATOMIC_SEQ_CST); - // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} monotonic, align 4 // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} acquire, align 4 @@ -460,12 +460,12 @@ void atomic_exchange(int *ptr, int *value, int *old) { __atomic_exchange(ptr, value, old, __ATOMIC_RELEASE); __atomic_exchange(ptr, value, old, __ATOMIC_ACQ_REL); __atomic_exchange(ptr, value, old, __ATOMIC_SEQ_CST); - // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} monotonic, align 4 // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} acquire, align 4 @@ -493,12 +493,12 @@ void atomic_exchange_n(int *ptr, int value) { __atomic_exchange_n(ptr, value, __ATOMIC_RELEASE); __atomic_exchange_n(ptr, value, __ATOMIC_ACQ_REL); __atomic_exchange_n(ptr, value, __ATOMIC_SEQ_CST); - // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CIR: %{{.+}} = cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} monotonic, align 4 // LLVM: %{{.+}} = atomicrmw xchg ptr %{{.+}}, i32 %{{.+}} acquire, align 4 diff --git a/clang/test/CIR/CodeGen/goto.cpp b/clang/test/CIR/CodeGen/goto.cpp index 48cb44e..257c255 100644 --- a/clang/test/CIR/CodeGen/goto.cpp +++ b/clang/test/CIR/CodeGen/goto.cpp @@ -205,6 +205,8 @@ extern "C" void case_follow_label(int v) { // CIR: cir.func dso_local @case_follow_label // CIR: cir.switch // CIR: cir.case(equal, [#cir.int<1> : !s32i]) { +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "label" // CIR: cir.case(equal, [#cir.int<2> : !s32i]) { // CIR: cir.call @action1() @@ -215,9 +217,11 @@ extern "C" void case_follow_label(int v) { // LLVM: define dso_local void @case_follow_label // LLVM: switch i32 {{.*}}, label %[[SWDEFAULT:.*]] [ -// LLVM: i32 1, label %[[LABEL:.*]] +// LLVM: i32 1, label %[[CASE1:.*]] // LLVM: i32 2, label %[[CASE2:.*]] // LLVM: ] +// LLVM: [[CASE1]]: +// LLVM: br label %[[LABEL:.*]] // LLVM: [[LABEL]]: // LLVM: br label %[[CASE2]] // LLVM: [[CASE2]]: @@ -303,3 +307,24 @@ extern "C" void default_follow_label(int v) { // OGCG: br label %label // OGCG: sw.epilog: // OGCG: ret void + +void g3() { +label: + goto label; +} + +// CIR: cir.func dso_local @_Z2g3v +// CIR: cir.br ^bb1 +// CIR: ^bb1: +// CIR: cir.label "label" +// CIR: cir.goto "label" + +// LLVM: define dso_local void @_Z2g3v() +// LLVM: br label %1 +// LLVM: 1: +// LLVM: br label %1 + +// OGCG: define dso_local void @_Z2g3v() +// OGCG: br label %label +// OGCG: label: +// OGCG: br label %label diff --git a/clang/test/CIR/CodeGen/label.c b/clang/test/CIR/CodeGen/label.c index a050094..f5345ef 100644 --- a/clang/test/CIR/CodeGen/label.c +++ b/clang/test/CIR/CodeGen/label.c @@ -11,10 +11,14 @@ labelA: } // CIR: cir.func no_proto dso_local @label +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "labelA" // CIR: cir.return // LLVM:define dso_local void @label +// LLVM: br label %1 +// LLVM: 1: // LLVM: ret void // OGCG: define dso_local void @label @@ -29,15 +33,19 @@ labelC: } // CIR: cir.func no_proto dso_local @multiple_labels -// CIR: cir.label "labelB" // CIR: cir.br ^bb1 -// CIR: ^bb1: // pred: ^bb0 +// CIR: ^bb1: +// CIR: cir.label "labelB" +// CIR: cir.br ^bb2 +// CIR: ^bb2: // CIR: cir.label "labelC" // CIR: cir.return // LLVM: define dso_local void @multiple_labels() // LLVM: br label %1 // LLVM: 1: +// LLVM: br label %2 +// LLVM: 2: // LLVM: ret void // OGCG: define dso_local void @multiple_labels @@ -56,6 +64,8 @@ labelD: // CIR: cir.func dso_local @label_in_if // CIR: cir.if {{.*}} { +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "labelD" // CIR: [[LOAD:%.*]] = cir.load align(4) [[COND:%.*]] : !cir.ptr<!s32i>, !s32i // CIR: [[INC:%.*]] = cir.unary(inc, %3) nsw : !s32i, !s32i @@ -68,15 +78,17 @@ labelD: // LLVM: 3: // LLVM: [[LOAD:%.*]] = load i32, ptr [[COND:%.*]], align 4 // LLVM: [[CMP:%.*]] = icmp ne i32 [[LOAD]], 0 -// LLVM: br i1 [[CMP]], label %6, label %9 +// LLVM: br i1 [[CMP]], label %6, label %10 // LLVM: 6: +// LLVM: br label %7 +// LLVM: 7: // LLVM: [[LOAD2:%.*]] = load i32, ptr [[COND]], align 4 // LLVM: [[ADD1:%.*]] = add nsw i32 [[LOAD2]], 1 // LLVM: store i32 [[ADD1]], ptr [[COND]], align 4 -// LLVM: br label %9 -// LLVM: 9: // LLVM: br label %10 // LLVM: 10: +// LLVM: br label %11 +// LLVM: 11: // LLVM: ret void // OGCG: define dso_local void @label_in_if @@ -142,11 +154,15 @@ end: return; } // CIR: cir.func no_proto dso_local @labelWithoutMatch +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "end" // CIR: cir.return // CIR: } // LLVM: define dso_local void @labelWithoutMatch +// LLVM: br label %1 +// LLVM: 1: // LLVM: ret void // OGCG: define dso_local void @labelWithoutMatch @@ -167,13 +183,17 @@ void foo() { // CIR: cir.func no_proto dso_local @foo // CIR: cir.scope { -// CIR: cir.label "label" // CIR: %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["agg.tmp0"] +// CIR: cir.br ^bb1 +// CIR: ^bb1: +// CIR: cir.label "label" // LLVM:define dso_local void @foo() { // LLVM: [[ALLOC:%.*]] = alloca %struct.S, i64 1, align 1 // LLVM: br label %2 // LLVM:2: +// LLVM: br label %3 +// LLVM:3: // LLVM: [[CALL:%.*]] = call %struct.S @get() // LLVM: store %struct.S [[CALL]], ptr [[ALLOC]], align 1 // LLVM: [[LOAD:%.*]] = load %struct.S, ptr [[ALLOC]], align 1 diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp index 040ddd3..ee4fffe 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct DefaultOperators { int i; @@ -43,12 +43,43 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(*:someVar) - // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] @@ -71,10 +102,42 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(max:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) @@ -101,7 +164,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(min:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) @@ -128,7 +191,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(&:someVarNoFloats) // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <iand> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) @@ -146,7 +209,27 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -167,7 +250,27 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -188,10 +291,30 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(&&:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) @@ -218,7 +341,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(||:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) @@ -245,7 +368,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(+:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_add__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init { @@ -286,10 +409,65 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(*:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) @@ -388,11 +566,67 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(max:someVarArr) + // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] @@ -493,7 +727,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(min:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) @@ -664,10 +898,53 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(|:someVarArrNoFloats) // CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) @@ -702,7 +979,50 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -739,10 +1059,53 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) @@ -844,9 +1207,9 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(||:someVarArr) -// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init { +// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] @@ -888,7 +1251,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(+:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_add__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init { @@ -935,9 +1298,67 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(*:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_mul__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -983,9 +1404,67 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(max:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_max__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1033,7 +1512,7 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(min:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_min__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1081,7 +1560,7 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(&:someVarArrNoFloats[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1121,9 +1600,55 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(|:someVarArrNoFloats[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1163,9 +1688,55 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(^:someVarArrNoFloats[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1205,9 +1776,55 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1255,7 +1872,7 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(||:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_lor__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1303,26 +1920,26 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(+:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(*:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(max:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(min:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(&:someVarArrNoFloats[1:1]) for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(|:someVarArrNoFloats[1:1]) for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(^:someVarArrNoFloats[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc parallel loop reduction(||:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); // CHECK-NEXT: cir.func {{.*}}@_Z12acc_combined } diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp index 6e885cc..472e4ac 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s template<typename T> void acc_combined() { T someVar; @@ -13,7 +13,10 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -26,7 +29,10 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } @@ -111,7 +117,31 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -142,7 +172,31 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -302,6 +356,35 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -337,6 +420,35 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -372,6 +484,7 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -407,6 +520,7 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -442,6 +556,7 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -477,6 +592,7 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp index 3d46ac7..112ff656 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct HasOperatorsInline { int i; @@ -48,7 +48,7 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -79,7 +79,7 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -88,7 +88,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(max:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init { +// CHECK: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -172,7 +172,7 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -181,7 +181,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(|:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init { +// CHECK: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -203,7 +203,7 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -212,7 +212,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(^:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init { +// CHECK: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -234,7 +234,7 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -243,7 +243,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init { +// CHECK: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -344,7 +344,29 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -466,7 +488,28 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -832,7 +875,28 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -896,7 +960,28 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -959,7 +1044,28 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -1216,6 +1322,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1293,6 +1425,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1524,6 +1682,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1601,6 +1785,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1678,6 +1888,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp index e9669d3..7eaa822b 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s template<typename T> void acc_combined() { @@ -14,7 +14,10 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -28,7 +31,10 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -67,7 +73,10 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -81,7 +90,10 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -95,7 +107,10 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } @@ -155,7 +170,31 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -186,7 +225,31 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -279,7 +342,31 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -309,7 +396,31 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -339,7 +450,31 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -437,6 +572,35 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -472,6 +636,35 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -507,6 +700,7 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -542,6 +736,7 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -577,6 +772,35 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -612,6 +836,35 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -647,6 +900,35 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -682,6 +964,7 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -717,6 +1000,7 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp index c99e3c1..c2c0c77 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct HasOperatorsOutline { int i; unsigned u; @@ -47,7 +47,7 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -55,7 +55,6 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } for(int i=0;i < 5; ++i); - #pragma acc parallel loop reduction(*:someVar) // CHECK: acc.reduction.recipe @reduction_mul__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) @@ -78,17 +77,17 @@ void acc_combined() { // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { - for(int i=0;i < 5; ++i); -#pragma acc parallel loop reduction(max:someVar) // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): // CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_HasOperatorsOutline>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } -// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init { + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(max:someVar) +// CHECK: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -172,7 +171,7 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -181,7 +180,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(|:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init { +// CHECK: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -203,7 +202,7 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -212,7 +211,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(^:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init { +// CHECK: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -234,7 +233,7 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -243,7 +242,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init { +// CHECK: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -344,7 +343,29 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -466,7 +487,28 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -832,7 +874,28 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -896,7 +959,28 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -959,7 +1043,28 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -982,6 +1087,7 @@ void acc_combined() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } + for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init { @@ -1106,8 +1212,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(||:someVarArr) - -// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init { +// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"] @@ -1216,6 +1321,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1293,6 +1424,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1524,6 +1681,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1601,6 +1784,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1678,6 +1887,32 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c index c99dc09..b439623 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -std=c23 -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -std=c23 -triple x86_64-linux-pc %s -o - | FileCheck %s struct DefaultOperators { int i; @@ -42,7 +42,39 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; @@ -69,7 +101,39 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; @@ -144,7 +208,27 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } ; @@ -165,7 +249,27 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } ; @@ -186,7 +290,27 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } ; @@ -271,7 +395,62 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -373,7 +552,62 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -650,7 +884,50 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -680,7 +957,50 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -710,7 +1030,50 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -892,6 +1255,64 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -940,6 +1361,64 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -1056,7 +1535,6 @@ void acc_compute() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i - // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> @@ -1079,6 +1557,52 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -1121,6 +1645,52 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -1163,6 +1733,52 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp index b90a2fc..f997902 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct DefaultOperators { int i; @@ -21,7 +21,6 @@ void acc_compute() { struct DefaultOperatorsNoFloats someVarNoFloats; struct DefaultOperatorsNoFloats someVarArrNoFloats[5]; #pragma acc parallel reduction(+:someVar) - ; // CHECK: acc.reduction.recipe @reduction_add__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <add> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] @@ -44,11 +43,43 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } + ; #pragma acc parallel reduction(*:someVar) - // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] @@ -71,7 +102,39 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; @@ -146,7 +209,27 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } ; @@ -167,7 +250,27 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } ; @@ -188,7 +291,27 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } ; @@ -286,7 +409,62 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -388,11 +566,67 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; #pragma acc parallel reduction(max:someVarArr) + // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] @@ -664,7 +898,50 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -702,7 +979,50 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -739,7 +1059,50 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -846,7 +1209,7 @@ void acc_compute() { // CHECK-NEXT: } ; #pragma acc parallel reduction(||:someVarArr) -// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init { +// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] @@ -935,6 +1298,64 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -983,6 +1404,64 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; @@ -1121,6 +1600,52 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -1163,6 +1688,52 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -1205,6 +1776,52 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } ; @@ -1323,9 +1940,11 @@ void acc_compute() { ; #pragma acc parallel reduction(||:someVarArr[1:1]) ; + // CHECK-NEXT: cir.func {{.*}}@_Z11acc_compute } void uses() { acc_compute<DefaultOperators>(); } + diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c index 0f7fd84..3e4583f 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s void acc_compute() { float someVar; @@ -13,7 +13,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } ; @@ -26,7 +29,10 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } ; @@ -110,7 +116,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -141,7 +171,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -301,6 +355,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -336,6 +419,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -371,6 +483,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -406,6 +519,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -441,6 +555,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -476,6 +591,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp index 4d99a43..833cfad 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s template<typename T> void acc_compute() { @@ -14,7 +14,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } ; @@ -27,7 +30,10 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } @@ -112,7 +118,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -143,7 +173,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -303,6 +357,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -338,6 +421,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -373,6 +485,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -443,6 +556,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; @@ -478,6 +592,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp index ea00c07..ec4372d 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct HasOperatorsInline { int i; @@ -48,7 +48,7 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -79,7 +79,7 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -88,7 +88,7 @@ void acc_compute() { // CHECK-NEXT: } ; #pragma acc parallel reduction(max:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init { +// CHECK: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -172,7 +172,7 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -181,7 +181,7 @@ void acc_compute() { // CHECK-NEXT: } ; #pragma acc parallel reduction(|:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init { +// CHECK: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -203,7 +203,7 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -212,7 +212,7 @@ void acc_compute() { // CHECK-NEXT: } ; #pragma acc parallel reduction(^:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init { +// CHECK: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -234,7 +234,7 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -243,7 +243,7 @@ void acc_compute() { // CHECK-NEXT: } ; #pragma acc parallel reduction(&&:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init { +// CHECK: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -344,7 +344,29 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -466,7 +488,28 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -832,7 +875,28 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -896,7 +960,28 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -959,7 +1044,28 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -1216,6 +1322,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1293,6 +1425,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1524,6 +1682,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1601,6 +1785,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1678,6 +1888,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c index b170ed0bf..0cee5c6 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s void acc_compute() { int someVar; @@ -13,7 +13,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -27,7 +30,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -66,7 +72,10 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -80,7 +89,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -94,9 +106,13 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } + ; #pragma acc parallel reduction(&&:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSi : !cir.ptr<!s32i> reduction_operator <land> init { @@ -153,7 +169,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -184,7 +224,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -277,7 +341,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -307,7 +395,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -337,7 +449,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -435,6 +571,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -470,6 +635,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -505,6 +699,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -540,6 +735,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -575,6 +771,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -610,6 +835,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -645,6 +899,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -680,6 +963,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -715,6 +999,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp index c678eae..822dd9f6 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s template<typename T> void acc_compute() { @@ -14,7 +14,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -28,7 +31,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -67,7 +73,10 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -81,7 +90,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } ; @@ -95,7 +107,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } @@ -155,7 +170,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -186,7 +225,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -279,7 +342,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -309,7 +396,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -339,7 +450,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -437,6 +572,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -472,6 +636,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -507,6 +700,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -542,6 +736,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -577,6 +772,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -612,6 +836,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -647,6 +900,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -682,6 +964,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; @@ -717,6 +1000,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp index 9ccaea2..873bf51 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct HasOperatorsOutline { int i; unsigned u; @@ -24,7 +24,7 @@ template<typename T> void acc_compute() { T someVar; T someVarArr[5]; -#pragma acc parallel reduction(+:someVar) +#pragma acc parallel reduction(+:someVar) // CHECK: acc.reduction.recipe @reduction_add__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <add> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] @@ -47,7 +47,7 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -55,8 +55,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; - -#pragma acc parallel reduction(*:someVar) +#pragma acc parallel reduction(*:someVar) // CHECK: acc.reduction.recipe @reduction_mul__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] @@ -78,17 +77,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { - ; -#pragma acc parallel reduction(max:someVar) // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): // CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_HasOperatorsOutline>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } -// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init { + ; +#pragma acc parallel reduction(max:someVar) +// CHECK: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -118,7 +117,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(min:someVar) +#pragma acc parallel reduction(min:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] @@ -149,7 +148,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(&:someVar) +#pragma acc parallel reduction(&:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <iand> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] @@ -172,7 +171,7 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -180,8 +179,8 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(|:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init { +#pragma acc parallel reduction(|:someVar) +// CHECK: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -203,7 +202,7 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -211,8 +210,8 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(^:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init { +#pragma acc parallel reduction(^:someVar) +// CHECK: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -234,7 +233,7 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -242,8 +241,8 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(&&:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init { +#pragma acc parallel reduction(&&:someVar) +// CHECK: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -273,7 +272,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(||:someVar) +#pragma acc parallel reduction(||:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] @@ -305,7 +304,7 @@ void acc_compute() { // CHECK-NEXT: } ; -#pragma acc parallel reduction(+:someVarArr) +#pragma acc parallel reduction(+:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_add__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <add> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] @@ -344,7 +343,29 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -368,7 +389,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(*:someVarArr) +#pragma acc parallel reduction(*:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] @@ -466,7 +487,28 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -490,7 +532,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(max:someVarArr) +#pragma acc parallel reduction(max:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] @@ -612,7 +654,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(min:someVarArr) +#pragma acc parallel reduction(min:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] @@ -734,7 +776,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(&:someVarArr) +#pragma acc parallel reduction(&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <iand> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] @@ -832,7 +874,28 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -856,7 +919,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(|:someVarArr) +#pragma acc parallel reduction(|:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] @@ -896,7 +959,28 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -920,7 +1004,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(^:someVarArr) +#pragma acc parallel reduction(^:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] @@ -959,7 +1043,28 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -982,8 +1087,9 @@ void acc_compute() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } + ; -#pragma acc parallel reduction(&&:someVarArr) +#pragma acc parallel reduction(&&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] @@ -1105,9 +1211,8 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(||:someVarArr) - -// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init { +#pragma acc parallel reduction(||:someVarArr) +// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"] @@ -1171,7 +1276,7 @@ void acc_compute() { // CHECK-NEXT: } ; -#pragma acc parallel reduction(+:someVarArr[2]) +#pragma acc parallel reduction(+:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_add__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <add> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"] @@ -1216,6 +1321,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1248,7 +1379,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(*:someVarArr[2]) +#pragma acc parallel reduction(*:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_mul__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"] @@ -1293,6 +1424,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1325,7 +1482,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(max:someVarArr[2]) +#pragma acc parallel reduction(max:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_max__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"] @@ -1402,7 +1559,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(min:someVarArr[2]) +#pragma acc parallel reduction(min:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_min__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"] @@ -1479,7 +1636,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(&:someVarArr[2]) +#pragma acc parallel reduction(&:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <iand> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"] @@ -1524,6 +1681,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1556,7 +1739,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(|:someVarArr[2]) +#pragma acc parallel reduction(|:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"] @@ -1601,6 +1784,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1633,7 +1842,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(^:someVarArr[2]) +#pragma acc parallel reduction(^:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"] @@ -1678,6 +1887,32 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1710,7 +1945,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(&&:someVarArr[2]) +#pragma acc parallel reduction(&&:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"] @@ -1787,7 +2022,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } ; -#pragma acc parallel reduction(||:someVarArr[2]) +#pragma acc parallel reduction(||:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_lor__Bcnt1__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init"] diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c index 783aa9a..b2d1362 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s void acc_compute() { unsigned int someVar; @@ -13,7 +13,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i> // CHECK-NEXT: } ; @@ -27,7 +30,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i> // CHECK-NEXT: } ; @@ -66,7 +72,10 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i> // CHECK-NEXT: } ; @@ -80,7 +89,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i> // CHECK-NEXT: } ; @@ -94,7 +106,10 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!u32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!u32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!u32i> +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!u32i> // CHECK-NEXT: } ; @@ -153,7 +168,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -184,7 +223,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -277,7 +340,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -307,7 +394,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -337,7 +448,31 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !s64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -435,6 +570,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -470,6 +634,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -505,6 +698,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -540,6 +734,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -575,6 +770,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -610,6 +834,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -645,6 +898,35 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!u32i>, !u64i) -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -680,6 +962,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; @@ -715,6 +998,7 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!u32i x 5>> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp index 038afcaa..349e0fb 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct DefaultOperators { int i; @@ -15,7 +15,7 @@ struct DefaultOperatorsNoFloats { }; template<typename T> -void acc_combined() { +void acc_loop() { T someVar; T someVarArr[5]; struct DefaultOperatorsNoFloats someVarNoFloats; @@ -43,12 +43,43 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(*:someVar) - // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] @@ -71,10 +102,42 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(max:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) @@ -101,7 +164,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(min:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) @@ -128,7 +191,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(&:someVarNoFloats) // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <iand> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) @@ -146,7 +209,27 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -167,7 +250,27 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -188,10 +291,30 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(&&:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) @@ -218,7 +341,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(||:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) @@ -245,7 +368,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(+:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_add__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init { @@ -286,10 +409,65 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(*:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) @@ -388,11 +566,67 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(max:someVarArr) + // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] @@ -493,7 +727,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(min:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) @@ -664,10 +898,53 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(|:someVarArrNoFloats) // CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) @@ -702,7 +979,50 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -739,10 +1059,53 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(&&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) @@ -844,9 +1207,9 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(||:someVarArr) -// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init { +// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] @@ -888,7 +1251,7 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(+:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_add__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init { @@ -935,9 +1298,67 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ADD]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(*:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_mul__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -983,9 +1404,67 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.double>, !cir.double +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) nsw : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[MUL]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(max:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_max__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1033,7 +1512,7 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(min:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_min__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1081,7 +1560,7 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(&:someVarArrNoFloats[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1121,9 +1600,55 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[AND]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(|:someVarArrNoFloats[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1163,9 +1688,55 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[OR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(^:someVarArrNoFloats[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1205,9 +1776,55 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!u32i>, !u32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[RHS_INT_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr<!cir.bool>, !cir.bool +// CHECK-NEXT: %[[LHS_INT_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_INT_CAST]], %[[RHS_INT_CAST]]) : !s32i +// CHECK-NEXT: %[[RES_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[XOR]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store {{.*}} %[[RES_TO_BOOL_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr<!cir.bool> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(&&:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1255,7 +1872,7 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(||:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_lor__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -1303,29 +1920,29 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(+:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(*:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(max:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(min:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(&:someVarArrNoFloats[1:1]) for(int i = 0; i < 5; ++i); #pragma acc loop reduction(|:someVarArrNoFloats[1:1]) for(int i = 0; i < 5; ++i); #pragma acc loop reduction(^:someVarArrNoFloats[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(&&:someVarArr[1:1]) - for(int i=0;i < 5; ++i); + for(int i = 0; i < 5; ++i); #pragma acc loop reduction(||:someVarArr[1:1]) - for(int i=0;i < 5; ++i); - // CHECK-NEXT: cir.func {{.*}}@_Z12acc_combined + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: cir.func {{.*}}@_Z8acc_loop } void uses() { - acc_combined<DefaultOperators>(); + acc_loop<DefaultOperators>(); } diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp index 11b7c35..8d9269b 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s template<typename T> void acc_loop() { @@ -14,7 +14,10 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -27,7 +30,10 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } @@ -112,7 +118,31 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -143,7 +173,31 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -303,6 +357,35 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -338,6 +421,35 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -373,6 +485,7 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -408,6 +521,7 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -443,6 +557,7 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -478,6 +593,7 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp index d95da8c..1c89515 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct HasOperatorsInline { int i; @@ -48,7 +48,7 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -79,7 +79,7 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -88,7 +88,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(max:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init { +// CHECK: acc.reduction.recipe @reduction_max__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -172,7 +172,7 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -181,7 +181,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(|:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init { +// CHECK: acc.reduction.recipe @reduction_ior__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -203,7 +203,7 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -212,7 +212,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(^:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init { +// CHECK: acc.reduction.recipe @reduction_xor__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -234,7 +234,7 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsInline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline> {{.*}}): @@ -243,7 +243,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init { +// CHECK: acc.reduction.recipe @reduction_land__ZTS18HasOperatorsInline : !cir.ptr<!rec_HasOperatorsInline> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsInline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr<!rec_HasOperatorsInline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i> @@ -344,7 +344,29 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -466,7 +488,28 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -832,7 +875,28 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -896,7 +960,28 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -959,7 +1044,28 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !s64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}): @@ -1216,6 +1322,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinepLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1293,6 +1425,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlinemLERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1524,6 +1682,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaNERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1601,6 +1785,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineoRERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1678,6 +1888,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsInline>, !u64i) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineeOERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!rec_HasOperatorsInline>) -> !cir.ptr<!rec_HasOperatorsInline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp index d207e07b..72e9d1f 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s template<typename T> void acc_loop() { @@ -14,7 +14,10 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -28,7 +31,10 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -67,7 +73,10 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -81,7 +90,10 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -95,7 +107,10 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!s32i> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!s32i> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr<!s32i> +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHSARG]] // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!s32i> // CHECK-NEXT: } @@ -155,7 +170,31 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -186,7 +225,31 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -279,7 +342,31 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -309,7 +396,31 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -339,7 +450,31 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -437,6 +572,35 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[ADD]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -472,6 +636,35 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[MUL]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -507,6 +700,7 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -542,6 +736,7 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -577,6 +772,35 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[AND:.*]] = cir.binop(and, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[AND]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -612,6 +836,35 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OR:.*]] = cir.binop(or, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[OR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -647,6 +900,35 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!s32i>, !u64i) -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[XOR:.*]] = cir.binop(xor, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i +// CHECK-NEXT: cir.store {{.*}} %[[XOR]], %[[LHS_STRIDE]] +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -682,6 +964,7 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -717,6 +1000,7 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!s32i x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp index a33c25a..a36d41c1 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct HasOperatorsOutline { int i; unsigned u; @@ -47,7 +47,7 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -55,7 +55,6 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } for(int i=0;i < 5; ++i); - #pragma acc loop reduction(*:someVar) // CHECK: acc.reduction.recipe @reduction_mul__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <mul> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) @@ -78,17 +77,17 @@ void acc_loop() { // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { - for(int i=0;i < 5; ++i); -#pragma acc loop reduction(max:someVar) // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): // CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_HasOperatorsOutline>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } -// CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init { + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(max:someVar) +// CHECK: acc.reduction.recipe @reduction_max__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <max> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -172,7 +171,7 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -181,7 +180,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(|:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init { +// CHECK: acc.reduction.recipe @reduction_ior__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <ior> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -203,7 +202,7 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -212,7 +211,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(^:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init { +// CHECK: acc.reduction.recipe @reduction_xor__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <xor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -234,7 +233,7 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_HasOperatorsOutline> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}, %[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline> {{.*}}): @@ -243,7 +242,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init { +// CHECK: acc.reduction.recipe @reduction_land__ZTS19HasOperatorsOutline : !cir.ptr<!rec_HasOperatorsOutline> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasOperatorsOutline>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr<!rec_HasOperatorsOutline>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i> @@ -344,7 +343,29 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -466,7 +487,28 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -832,7 +874,28 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -896,7 +959,28 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -959,7 +1043,28 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !s64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!s64i>, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr<!s64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}): @@ -982,6 +1087,7 @@ void acc_loop() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } + for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init { @@ -1106,8 +1212,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(||:someVarArr) - -// CHECK-NEXT: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init { +// CHECK: acc.reduction.recipe @reduction_lor__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <lor> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init] // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"] @@ -1216,6 +1321,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZpLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1293,6 +1424,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZmLR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1524,6 +1681,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZaNR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1601,6 +1784,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZoRR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1678,6 +1887,32 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_HasOperatorsOutline>, !u64i) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.call @_ZeOR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!rec_HasOperatorsOutline>) -> !cir.ptr<!rec_HasOperatorsOutline> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp b/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp index fc696ff..20ad7a3 100644 --- a/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp +++ b/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s // Note: unlike the 'private' recipe checks, this is just for spot-checking, // so this test isn't as comprehensive. The same code paths are used for @@ -90,7 +90,88 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: %[[LHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[LHS_TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, !u64i) -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: %[[RHS_TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: %[[RHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[RHS_TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, !u64i) -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHS_BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: %[[LHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[LHS_BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr<!cir.array<!rec_NoOps x 5>>, !u64i) -> !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: %[[RHS_BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHS_BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: %[[RHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr<!cir.array<!rec_NoOps x 5>>, !u64i) -> !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHS_BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[LHS_BOUND1_STRIDE:.*]] = cir.ptr_stride %[[LHS_BOUND2_STRIDE_DECAY]], %[[ITR1_LOAD]] : (!cir.ptr<!rec_NoOps>, !u64i) -> !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[RHS_BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHS_BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[RHS_BOUND1_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND2_STRIDE_DECAY]], %[[ITR1_LOAD]] : (!cir.ptr<!rec_NoOps>, !u64i) -> !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[LHS_GET_I:.*]] = cir.get_member %[[LHS_BOUND1_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_NoOps> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_GET_I:.*]] = cir.get_member %[[RHS_BOUND1_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_NoOps> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_GET_I]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_GET_I]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OP:.*]] = cir.binop(add, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw +// CHECK-NEXT: cir.store{{.*}} %[[OP]], %[[LHS_GET_I]] : !s32i +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT:} destroy { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): @@ -355,7 +436,89 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> + +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_TLA_LOAD:.*]] = cir.load %[[LHSARG]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: %[[LHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[LHS_TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !u64i) -> !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: %[[RHS_TLA_LOAD:.*]] = cir.load %[[RHSARG]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: %[[RHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[RHS_TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !u64i) -> !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_BOUND3_STRIDE_LOAD:.*]] = cir.load %[[LHS_BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: %[[LHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[LHS_BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr<!cir.ptr<!rec_NoOps>>, !u64i) -> !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: %[[RHS_BOUND3_STRIDE_LOAD:.*]] = cir.load %[[RHS_BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: %[[RHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr<!cir.ptr<!rec_NoOps>>, !u64i) -> !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[LHS_BOUND2_STRIDE_LOAD:.*]] = cir.load %[[LHS_BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_BOUND2_STRIDE_LOAD]], %[[ITR1_LOAD]] : (!cir.ptr<!rec_NoOps>, !u64i) -> !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[RHS_BOUND2_STRIDE_LOAD:.*]] = cir.load %[[RHS_BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND2_STRIDE_LOAD]], %[[ITR1_LOAD]] : (!cir.ptr<!rec_NoOps>, !u64i) -> !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[LHS_GET_I:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_NoOps> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_GET_I:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr<!rec_NoOps> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_GET_I]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_GET_I]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: %[[OP:.*]] = cir.binop(mul, %[[LHS_LOAD]], %[[RHS_LOAD]]) nsw +// CHECK-NEXT: cir.store{{.*}} %[[OP]], %[[LHS_GET_I]] : !s32i +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): diff --git a/clang/test/CIR/IR/atomic.cir b/clang/test/CIR/IR/atomic.cir index 8520763..790297f 100644 --- a/clang/test/CIR/IR/atomic.cir +++ b/clang/test/CIR/IR/atomic.cir @@ -5,17 +5,30 @@ cir.func @atomic_xchg(%ptr: !cir.ptr<!s32i>, %val: !s32i) { // CHECK-LABEL: @atomic_xchg - %0 = cir.atomic.xchg relaxed %ptr, %val : !cir.ptr<!s32i> -> !s32i - // CHECK: cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - %1 = cir.atomic.xchg consume %ptr, %val : !cir.ptr<!s32i> -> !s32i - // CHECK: cir.atomic.xchg consume %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - %2 = cir.atomic.xchg acquire %ptr, %val : !cir.ptr<!s32i> -> !s32i - // CHECK: cir.atomic.xchg acquire %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - %3 = cir.atomic.xchg release %ptr, %val : !cir.ptr<!s32i> -> !s32i - // CHECK: cir.atomic.xchg release %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - %4 = cir.atomic.xchg acq_rel %ptr, %val : !cir.ptr<!s32i> -> !s32i - // CHECK: cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i - %5 = cir.atomic.xchg seq_cst %ptr, %val : !cir.ptr<!s32i> -> !s32i - // CHECK: cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : !cir.ptr<!s32i> -> !s32i + %0 = cir.atomic.xchg relaxed %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CHECK: cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + %1 = cir.atomic.xchg consume %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CHECK: cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + %2 = cir.atomic.xchg acquire %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CHECK: cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + %3 = cir.atomic.xchg release %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CHECK: cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + %4 = cir.atomic.xchg acq_rel %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CHECK: cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + %5 = cir.atomic.xchg seq_cst %ptr, %val : (!cir.ptr<!s32i>, !s32i) -> !s32i + // CHECK: cir.atomic.xchg seq_cst %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i + cir.return +} + +cir.func @atomic_cmpxchg(%ptr: !cir.ptr<!s32i>, %expected: !s32i, %desired: !s32i) { + // CHECK-LABEL: @atomic_cmpxchg + %0, %1 = cir.atomic.cmpxchg success(relaxed) failure(relaxed) %ptr, %expected, %desired : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) + // CHECK: cir.atomic.cmpxchg success(relaxed) failure(relaxed) %{{.+}}, %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) + %2, %3 = cir.atomic.cmpxchg weak success(relaxed) failure(relaxed) %ptr, %expected, %desired : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) + // CHECK: cir.atomic.cmpxchg weak success(relaxed) failure(relaxed) %{{.+}}, %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) + %4, %5 = cir.atomic.cmpxchg success(seq_cst) failure(acquire) %ptr, %expected, %desired : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) + // CHECK: cir.atomic.cmpxchg success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) + %6, %7 = cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %ptr, %expected, %desired : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) + // CHECK: cir.atomic.cmpxchg weak success(seq_cst) failure(acquire) %{{.+}}, %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i, !s32i) -> (!s32i, !cir.bool) cir.return } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 8f3d459..bcffd861 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1100,6 +1100,7 @@ __m256d test_mm256_hadd_pd(__m256d A, __m256d B) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_hadd_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_hadd_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +3.0, +11.0, +7.0, +15.0)); __m256 test_mm256_hadd_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_hadd_ps @@ -1107,17 +1108,27 @@ __m256 test_mm256_hadd_ps(__m256 A, __m256 B) { return _mm256_hadd_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_hadd_ps( + (__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, + (__m256){+9.0f, +10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f}), + +3.0f, +7.0f, +19.0f, +23.0f, +11.0f, +15.0f, +27.0f, +31.0f)); + __m256d test_mm256_hsub_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_hsub_pd // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_hsub_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_hsub_pd((__m256d){+1.0, +2.0, +4.0, +3.0}, (__m256d){+10.0, +6.0, +16.0, +8.0}), -1.0,+4.0,+1.0,+8.0)); __m256 test_mm256_hsub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_hsub_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_hsub_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_hsub_ps( + (__m256){1.0f, 2.0f, 4.0f, 3.0f, 5.0f, 7.0f, 7.0f, 5.0f}, + (__m256){6.0f, 9.0f, 11.0f, 8.0f, 13.0f, 17.0f, 15.0f, 11.0f}), + -1.0f, 1.0f, -3.0f, 3.0f, -2.0f, 2.0f, -4.0f, 4.0f)); __m256i test_mm256_insert_epi8(__m256i x, char b) { // CHECK-LABEL: test_mm256_insert_epi8 diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 55f18f9..dc64f96 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -485,36 +485,60 @@ __m256i test_mm256_hadd_epi16(__m256i a, __m256i b) { // CHECK: call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_hadd_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_hadd_epi16( + (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, + (__m256i)(__v16hi){17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}), + 3,7,11,15,35,39,43,47,19,23,27,31,51,55,59,63)); __m256i test_mm256_hadd_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_hadd_epi32 // CHECK: call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_hadd_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_hadd_epi32( + (__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80}, + (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75}), + 30,70,20,60,110,150,100,140)); __m256i test_mm256_hadds_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_hadds_epi16 // CHECK:call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_hadds_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi( _mm256_hadds_epi16( + (__m256i)(__v16hi){32767, 32767, 1,2,3,4,5,6,7,8,9,10,11,12,13,14}, + (__m256i)(__v16hi){19,20,21,22,23,24,25,26,27,28,29,30,31,32, 32767, 5}), + 32767, 3,7,11, 39,43,47,51,15,19,23,27, 55,59,63, 32767)); __m256i test_mm256_hsub_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_hsub_epi16 // CHECK: call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_hsub_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_hsub_epi16( + (__m256i)(__v16hi){2,1,1,2,5,3,3,5,7,4,4,7,9,5,5,9}, + (__m256i)(__v16hi){10,5,5,10,12,6,6,12,21,14,14,21,24,16,16,24}), + 1,-1,2,-2,5,-5,6,-6,3,-3,4,-4, 7,-7,8,-8)); __m256i test_mm256_hsub_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_hsub_epi32 // CHECK: call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_hsub_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_hsub_epi32( + (__m256i)(__v8si){10, 20, 30,50,60,90,100,140}, + (__m256i)(__v8si){200,150,260,200,420,350,800,720}), + -10,-20,50,60, -30,-40, 70,80)); __m256i test_mm256_hsubs_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_hsubs_epi16 // CHECK:call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_hsubs_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_hsubs_epi16( + (__m256i)(__v16hi){32726, -100, 3, 2, 6, 4, 8, 5,15,10 ,21, 14, 27, 18, 100, 90}, + (__m256i)(__v16hi){40, 20, 100, 70, 200,150, 100,40, 1000,900,300,150, 500,300, 1, 1}), + 32767, 1, 2, 3, 20, 30, 50, 60, 5, 7, 9, 10, 100, 150, 200, 0)); __m128i test_mm_i32gather_epi32(int const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_epi32 diff --git a/clang/test/CodeGen/X86/avx512ifma-builtins.c b/clang/test/CodeGen/X86/avx512ifma-builtins.c index eebefb0..f90697e 100644 --- a/clang/test/CodeGen/X86/avx512ifma-builtins.c +++ b/clang/test/CodeGen/X86/avx512ifma-builtins.c @@ -8,45 +8,230 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s - #include <immintrin.h> +#include "builtin_test_helpers.h" __m512i test_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) { // CHECK-LABEL: test_mm512_madd52hi_epu64 // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - return _mm512_madd52hi_epu64(__X, __Y, __Z); + return _mm512_madd52hi_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64( + (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}), + 100, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64( + (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0, + 0, 0, 0, 0}, + (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0, + 0, 0, 0, 0}), + 0xFFFFFFFFFFFFEull, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64( + (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8}, + (__m512i)(__v8du){0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull}, + (__m512i)(__v8du){0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull}), + 4503599627370495ull, 4503599627370496ull, + 4503599627370497ull, 4503599627370498ull, + 4503599627370499ull, 4503599627370500ull, + 4503599627370501ull, 4503599627370502ull)); + __m512i test_mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_madd52hi_epu64 // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} - return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y); + return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64( + (__m512i)(__v8du){111, 222, 333, 444, 555, 666, + 777, 888}, + 0x00, + (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8}, + (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70, + 80}), + 111, 222, 333, 444, 555, 666, 777, 888)); + +TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64( + (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70, + 80}, + 0xFF, + (__m512i)(__v8du){100, 200, 300, 400, 500, 600, + 700, 800}, + (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70, + 80}), + 10, 20, 30, 40, 50, 60, 70, 80)); + __m512i test_mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { // CHECK-LABEL: test_mm512_maskz_madd52hi_epu64 // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} - return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z); + return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64( + 0x00, + (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8}, + (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70, + 80}, + (__m512i)(__v8du){100, 200, 300, 400, 500, 600, + 700, 800}), + 0, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64( + 0xFF, + (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8}, + (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70, + 80}, + (__m512i)(__v8du){100, 200, 300, 400, 500, 600, + 700, 800}), + 1, 2, 3, 4, 5, 6, 7, 8)); + __m512i test_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) { // CHECK-LABEL: test_mm512_madd52lo_epu64 // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - return _mm512_madd52lo_epu64(__X, __Y, __Z); + return _mm512_madd52lo_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64( + (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}), + 50, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64( + (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){20, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){30, 0, 0, 0, 0, 0, 0, 0}), + 700, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64( + (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0, + 0, 0, 0, 0}, + (__m512i)(__v8du){1, 0, 0, 0, 0, 0, 0, 0}), + 0xFFFFFFFFFFFFFull, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64( + (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){0x1F000000000000ull, 0, 0, 0, + 0, 0, 0, 0}, + (__m512i)(__v8du){2, 0, 0, 0, 0, 0, 0, 0}), + 0xE000000000000ull, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64( + (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8}, + (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70, + 80}, + (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}), + 21, 62, 123, 204, 305, 426, 567, 728)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64( + (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0, + 0, 0, 0, 0}, + (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}), + 4503599627370545ull, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64( + (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70, + 80}, + (__m512i)(__v8du){100, 200, 300, 400, 500, 600, + 700, 800}, + (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}), + 210, 620, 1230, 2040, 3050, 4260, 5670, 7280)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64( + (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){0x1F000000000000ull, + 0x1F000000000000ull, 0, 0, 0, + 0, 0, 0}, + (__m512i)(__v8du){2, 3, 0, 0, 0, 0, 0, 0}), + 0xE000000000000ull, 0xD000000000000ull, 0, 0, 0, 0, + 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64( + (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0}, + (__m512i)(__v8du){0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull}, + (__m512i)(__v8du){1, 1, 1, 1, 1, 1, 1, 1}), + 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull)); + __m512i test_mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_madd52lo_epu64 // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} - return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y); + return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64( + (__m512i)(__v8du){111, 222, 333, 444, 555, 666, + 777, 888}, + 0x00, + (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8}, + (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70, + 80}), + 111, 222, 333, 444, 555, 666, 777, 888)); + +TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64( + (__m512i)(__v8du){1000, 2000, 3000, 4000, 5000, + 6000, 7000, 8000}, + 0xFF, + (__m512i)(__v8du){100, 200, 300, 400, 500, 600, + 700, 800}, + (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80, + 90}), + 3000, 8000, 15000, 24000, 35000, 48000, 63000, + 80000)); + __m512i test_mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { // CHECK-LABEL: test_mm512_maskz_madd52lo_epu64 // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} - return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z); + return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z); } + +TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64( + 0x00, + (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8}, + (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70, + 80}, + (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}), + 0, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64( + 0xFF, + (__m512i)(__v8du){100, 200, 300, 400, 500, 600, + 700, 800}, + (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80, + 90}, + (__m512i)(__v8du){30, 40, 50, 60, 70, 80, 90, + 100}), + 700, 1400, 2300, 3400, 4700, 6200, 7900, 9800)); diff --git a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c index 89108fc..1cbb580 100644 --- a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c +++ b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c @@ -8,85 +8,241 @@ // RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s // RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=i386-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s - #include <immintrin.h> +#include "builtin_test_helpers.h" __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) { // CHECK-LABEL: test_mm_madd52hi_epu64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) - return _mm_madd52hi_epu64(__X, __Y, __Z); + return _mm_madd52hi_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64( + (__m128i)((__v2du){100, 0}), + (__m128i)((__v2du){10, 0}), + (__m128i)((__v2du){5, 0})), + 100, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64( + (__m128i)((__v2du){0, 0}), + (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}), + (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})), + 0xFFFFFFFFFFFFEull, 0)); + __m128i test_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_mask_madd52hi_epu64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} - return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y); + return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y); } +TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){111, 222}), + 0x0, + (__m128i)((__v2du){1, 2}), + (__m128i)((__v2du){10, 20})), + 111, 222)); + +TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){10, 20}), + 0x2, + (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}), + (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL})), + 10, 0x100000000014ULL)); + __m128i test_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { // CHECK-LABEL: test_mm_maskz_madd52hi_epu64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} - return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z); + return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x3, + (__m128i)((__v2du){1, 2}), + (__m128i)((__v2du){10, 20}), + (__m128i)((__v2du){100, 200})), + 1, 2)); + +TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x1, + (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}), + (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}), + (__m128i)((__v2du){0, 0})), + 0x1000000000000ULL, 0)); + __m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) { // CHECK-LABEL: test_mm256_madd52hi_epu64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) - return _mm256_madd52hi_epu64(__X, __Y, __Z); + return _mm256_madd52hi_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64( + (__m256i)((__v4du){100, 200, 300, 400}), + (__m256i)((__v4du){10, 20, 30, 40}), + (__m256i)((__v4du){5, 6, 7, 8})), + 100, 200, 300, 400)); + +TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64( + (__m256i)((__v4du){0, 0, 0, 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0, + 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0, + 0})), + 0xFFFFFFFFFFFFEull, 0, 0, 0)); + __m256i test_mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_madd52hi_epu64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} - return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y); + return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){111, 222, 333, 444}), + 0x0, + (__m256i)((__v4du){1, 2, 3, 4}), + (__m256i)((__v4du){10, 20, 30, 40})), + 111, 222, 333, 444)); + +TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){10, 20, 30, 40}), + 0xA, + (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL, + 0x1000000000000ULL, 0x1000000000000ULL}), + (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL, + 0x1000000000000ULL, 0x1000000000000ULL})), + 10, 0x100000000014ULL, 30, 0x100000000028ULL)); + __m256i test_mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { // CHECK-LABEL: test_mm256_maskz_madd52hi_epu64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} - return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z); + return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0xF, + (__m256i)((__v4du){1, 2, 3, 4}), + (__m256i)((__v4du){10, 20, 30, 40}), + (__m256i)((__v4du){100, 200, 300, 400})), + 1, 2, 3, 4)); + +TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0x5, + (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL, + 0x1000000000000ULL, 0x1000000000000ULL}), + (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL, + 0x1000000000000ULL, 0x1000000000000ULL}), + (__m256i)((__v4du){0, 0, 0, 0})), + 0x1000000000000ULL, 0, 0x1000000000000ULL, 0)); + __m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) { // CHECK-LABEL: test_mm_madd52lo_epu64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) - return _mm_madd52lo_epu64(__X, __Y, __Z); + return _mm_madd52lo_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64( + (__m128i)((__v2du){0, 0}), + (__m128i)((__v2du){10, 0}), + (__m128i)((__v2du){5, 0})), + 50, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64( + (__m128i)((__v2du){100, 0}), + (__m128i)((__v2du){20, 0}), + (__m128i)((__v2du){30, 0})), + 700, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64( + (__m128i)((__v2du){1, 2}), + (__m128i)((__v2du){10, 20}), + (__m128i)((__v2du){2, 3})), + 21, 62)); + __m128i test_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_mask_madd52lo_epu64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} - return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y); + return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y); } +TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){1000, 2000}), + 0x3, + (__m128i)((__v2du){100, 200}), + (__m128i)((__v2du){20, 30})), + 3000, 8000)); + +TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){111, 222}), + 0x0, + (__m128i)((__v2du){1, 2}), + (__m128i)((__v2du){10, 20})), + 111, 222)); + __m128i test_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { // CHECK-LABEL: test_mm_maskz_madd52lo_epu64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} - return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z); + return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x3, + (__m128i)((__v2du){100, 200}), + (__m128i)((__v2du){20, 30}), + (__m128i)((__v2du){30, 40})), + 700, 1400)); + +TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x1, + (__m128i)((__v2du){100, 0}), + (__m128i)((__v2du){20, 0}), + (__m128i)((__v2du){30, 0})), + 700, 0)); + __m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) { // CHECK-LABEL: test_mm256_madd52lo_epu64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) - return _mm256_madd52lo_epu64(__X, __Y, __Z); + return _mm256_madd52lo_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64( + (__m256i)((__v4du){1, 2, 3, 4}), + (__m256i)((__v4du){10, 20, 30, 40}), + (__m256i)((__v4du){2, 3, 4, 5})), + 21, 62, 123, 204)); + __m256i test_mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_madd52lo_epu64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} - return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y); + return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){1000, 2000, 3000, 4000}), + 0xF, + (__m256i)((__v4du){100, 200, 300, 400}), + (__m256i)((__v4du){20, 30, 40, 50})), + 3000, 8000, 15000, 24000)); + +TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){111, 222, 333, 444}), + 0x0, + (__m256i)((__v4du){1, 2, 3, 4}), + (__m256i)((__v4du){10, 20, 30, 40})), + 111, 222, 333, 444)); + +TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){11, 22, 33, 44}), + 0x5, + (__m256i)((__v4du){100, 200, 300, 400}), + (__m256i)((__v4du){10, 20, 30, 40})), + 1011, 22, 9033, 44)); + __m256i test_mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { // CHECK-LABEL: test_mm256_maskz_madd52lo_epu64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} - return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z); + return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z); } + +TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0xF, + (__m256i)((__v4du){100, 200, 300, 400}), + (__m256i)((__v4du){20, 30, 40, 50}), + (__m256i)((__v4du){30, 40, 50, 60})), + 700, 1400, 2300, 3400)); + +TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0x9, + (__m256i)((__v4du){100, 200, 300, 400}), + (__m256i)((__v4du){10, 20, 30, 40}), + (__m256i)((__v4du){5, 10, 15, 20})), + 150, 0, 0, 1200)); diff --git a/clang/test/CodeGen/X86/avxifma-builtins.c b/clang/test/CodeGen/X86/avxifma-builtins.c index aa15159..70531da 100644 --- a/clang/test/CodeGen/X86/avxifma-builtins.c +++ b/clang/test/CodeGen/X86/avxifma-builtins.c @@ -8,8 +8,9 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s - #include <immintrin.h> +#include "builtin_test_helpers.h" + __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) { // CHECK-LABEL: test_mm_madd52hi_epu64 @@ -17,44 +18,207 @@ __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) { return _mm_madd52hi_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64( + (__m128i)((__v2du){50, 100}), + (__m128i)((__v2du){10, 20}), + (__m128i)((__v2du){5, 6})), + 50, 100)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64( + (__m128i)((__v2du){0, 0}), + (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}), + (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})), + 0xFFFFFFFFFFFFEull, 0)); + __m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) { // CHECK-LABEL: test_mm256_madd52hi_epu64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_madd52hi_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64( + (__m256i)((__v4du){100, 200, 300, 400}), + (__m256i)((__v4du){10, 20, 30, 40}), + (__m256i)((__v4du){5, 6, 7, 8})), + 100, 200, 300, 400)); + +TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64( + (__m256i)((__v4du){0, 0, 0, 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, 0, 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, 0, 0})), + 0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0)); + __m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) { // CHECK-LABEL: test_mm_madd52lo_epu64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_madd52lo_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64( + (__m128i)((__v2du){0, 0}), + (__m128i)((__v2du){10, 0}), + (__m128i)((__v2du){5, 0})), + 50, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64( + (__m128i)((__v2du){1, 2}), + (__m128i)((__v2du){10, 20}), + (__m128i)((__v2du){2, 3})), + 21, 62)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64( + (__m128i)((__v2du){0, 0}), + (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}), + (__m128i)((__v2du){1, 0})), + 0xFFFFFFFFFFFFFull, 0)); + __m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) { // CHECK-LABEL: test_mm256_madd52lo_epu64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_madd52lo_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64( + (__m256i)((__v4du){1, 2, 3, 4}), + (__m256i)((__v4du){10, 20, 30, 40}), + (__m256i)((__v4du){2, 3, 4, 5})), + 21, 62, 123, 204)); + +TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64( + (__m256i)((__v4du){0, 0, 0, 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0, + 0}), + (__m256i)((__v4du){1, 0, 0, 0})), + 0xFFFFFFFFFFFFFull, 0, 0, 0)); + +TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64( + (__m256i)((__v4du){0, 0, 0, 0}), + (__m256i)((__v4du){0x1F000000000000ull, 0, 0, + 0}), + (__m256i)((__v4du){2, 0, 0, 0})), + 0xE000000000000ull, 0, 0, 0)); + __m128i test_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) { -// CHECK-LABEL: test_mm_madd52hi_avx_epu64 -// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // CHECK-LABEL: test_mm_madd52hi_avx_epu64 + // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_madd52hi_avx_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64( + (__m128i)((__v2du){50, 100}), + (__m128i)((__v2du){10, 20}), + (__m128i)((__v2du){5, 6})), + 50, 100)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64( + (__m128i)((__v2du){100, 0}), + (__m128i)((__v2du){10, 0}), + (__m128i)((__v2du){5, 0})), + 100, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64( + (__m128i)((__v2du){0, 0}), + (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}), + (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})), + 0xFFFFFFFFFFFFEull, 0)); + __m256i test_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) { -// CHECK-LABEL: test_mm256_madd52hi_avx_epu64 -// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // CHECK-LABEL: test_mm256_madd52hi_avx_epu64 + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_madd52hi_avx_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64( + (__m256i)((__v4du){0, 0, 0, 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, 0, 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFull, 0, 0})), + 0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0)); + +TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64( + (__m256i)((__v4du){100, 200, 300, 400}), + (__m256i)((__v4du){10, 20, 30, 40}), + (__m256i)((__v4du){5, 6, 7, 8})), + 100, 200, 300, 400)); + +TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64( + (__m256i)((__v4du){0, 0, 0, 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0, + 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0, + 0})), + 0xFFFFFFFFFFFFEull, 0, 0, 0)); + __m128i test_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) { -// CHECK-LABEL: test_mm_madd52lo_avx_epu64 -// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // CHECK-LABEL: test_mm_madd52lo_avx_epu64 + // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_madd52lo_avx_epu64(__X, __Y, __Z); } +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64( + (__m128i)((__v2du){0, 0}), + (__m128i)((__v2du){10, 0}), + (__m128i)((__v2du){5, 0})), + 50, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64( + (__m128i)((__v2du){100, 0}), + (__m128i)((__v2du){20, 0}), + (__m128i)((__v2du){30, 0})), + 700, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64( + (__m128i)((__v2du){1, 2}), + (__m128i)((__v2du){10, 20}), + (__m128i)((__v2du){2, 3})), + 21, 62)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64( + (__m128i)((__v2du){0, 0}), + (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}), + (__m128i)((__v2du){1, 0})), + 0xFFFFFFFFFFFFFull, 0)); + __m256i test_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) { -// CHECK-LABEL: test_mm256_madd52lo_avx_epu64 -// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // CHECK-LABEL: test_mm256_madd52lo_avx_epu64 + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_madd52lo_avx_epu64(__X, __Y, __Z); } + +TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64( + (__m256i)((__v4du){1, 2, 3, 4}), + (__m256i)((__v4du){10, 20, 30, 40}), + (__m256i)((__v4du){2, 3, 4, 5})), + 21, 62, 123, 204)); + + + +TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64( + (__m256i)((__v4du){0, 0, 0, 0}), + (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0, + 0}), + (__m256i)((__v4du){1, 0, 0, 0})), + 0xFFFFFFFFFFFFFull, 0, 0, 0)); + +TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64( + (__m256i)((__v4du){0, 0, 0, 0}), + (__m256i)((__v4du){0x1F000000000000ull, 0, 0, + 0}), + (__m256i)((__v4du){2, 0, 0, 0})), + 0xE000000000000ull, 0, 0, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64( + (__m128i)((__v2du){5, 10}), + (__m128i)((__v2du){100, 200}), + (__m128i)((__v2du){7, 8})), + 705, 1610)); + +TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64( + (__m256i)((__v4du){1, 2, 3, 4}), + (__m256i)((__v4du){10, 20, 30, 40}), + (__m256i)((__v4du){2, 3, 4, 5})), + 21, 62, 123, 204)); + diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 2b45b92..a1e05a1 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -312,36 +312,42 @@ __m64 test_mm_hadd_pi16(__m64 a, __m64 b) { // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128( return _mm_hadd_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_hadd_pi16((__m64)(__v4hi){1,2,3,4},(__m64)(__v4hi){5,6,7,8}),3,7,11,15)); __m64 test_mm_hadd_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_hadd_pi32 // CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128( return _mm_hadd_pi32(a, b); } +TEST_CONSTEXPR(match_v2si(_mm_hadd_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){3,4}),3,7)); __m64 test_mm_hadds_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_hadds_pi16 // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128( return _mm_hadds_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_hadds_pi16((__m64)(__v4hi){32767, 32767, 1,3},(__m64)(__v4hi){-1,3, 40, 60}),32767, 4, 2,100)); __m64 test_mm_hsub_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_hsub_pi16 // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128( return _mm_hsub_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_hsub_pi16((__m64)(__v4hi){1,2,4,3},(__m64)(__v4hi){10,5,0,-10}),-1,1,5,10)); __m64 test_mm_hsub_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_hsub_pi32 // CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128( return _mm_hsub_pi32(a, b); } +TEST_CONSTEXPR(match_v2si(_mm_hsub_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){4,3}),-1,1)); __m64 test_mm_hsubs_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_hsubs_pi16 // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128( return _mm_hsubs_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_hsubs_pi16((__m64)(__v4hi){32767, 32767, 5, -32767},(__m64)(__v4hi){4,5,10,5}),0,32767,-1,5)); __m64 test_mm_insert_pi16(__m64 a, int d) { // CHECK-LABEL: test_mm_insert_pi16 diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c index c53afc5..a82dd40 100644 --- a/clang/test/CodeGen/X86/sse3-builtins.c +++ b/clang/test/CodeGen/X86/sse3-builtins.c @@ -31,24 +31,28 @@ __m128d test_mm_hadd_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_hadd_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_hadd_pd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +3.0, +7.0)); __m128 test_mm_hadd_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_hadd_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_hadd_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_hadd_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f,+6.0f,+7.0f,+8.0f}), +3.0f, +7.0f, +11.0f, +15.0f)); __m128d test_mm_hsub_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_hsub_pd // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_hsub_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_hsub_pd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +3.0}), -1.0, +1.0)); __m128 test_mm_hsub_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_hsub_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_hsub_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_hsub_ps((__m128){+1.0f, +2.0f, +4.0f, +3.0f}, (__m128){+5.0f,+7.0f,+10.0f,+8.0f}), -1.0f, +1.0f, -2.0f, +2.0f)); __m128i test_mm_lddqu_si128(__m128i const* P) { // CHECK-LABEL: test_mm_lddqu_si128 diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c index 5885768..e623599 100644 --- a/clang/test/CodeGen/X86/ssse3-builtins.c +++ b/clang/test/CodeGen/X86/ssse3-builtins.c @@ -60,36 +60,43 @@ __m128i test_mm_hadd_epi16(__m128i a, __m128i b) { // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_hadd_epi16(a, b); } +TEST_CONSTEXPR(match_v8hi(_mm_hadd_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}, (__m128i)(__v8hi){17,18,19,20,21,22,23,24}), 3,7,11,15,35,39,43,47)); __m128i test_mm_hadd_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_hadd_epi32 // CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_hadd_epi32(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_hadd_epi32((__m128i)(__v4si){1,2,3,4}, (__m128i)(__v4si){5,6,7,8}), 3,7,11,15)); __m128i test_mm_hadds_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_hadds_epi16 // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_hadds_epi16(a, b); } +TEST_CONSTEXPR(match_v8hi(_mm_hadds_epi16((__m128i)(__v8hi){30000,30000,-1,2,-3,3,1,4}, (__m128i)(__v8hi){2,6,1,9,-4,16,7,8}), 32767, 1,0,5,8,10,12,15)); + __m128i test_mm_hsub_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_hsub_epi16 // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_hsub_epi16(a, b); } +TEST_CONSTEXPR(match_v8hi(_mm_hsub_epi16((__m128i)(__v8hi){20,15,16,12,9,6,4,2}, (__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 5,4,3,2,1,0,-1,-2)); __m128i test_mm_hsub_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_hsub_epi32 // CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_hsub_epi32(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_hsub_epi32((__m128i)(__v4si){4,3,1,1}, (__m128i)(__v4si){7,5,10,5}), 1,0,2,5)); __m128i test_mm_hsubs_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_hsubs_epi16 // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_hsubs_epi16(a, b); } +TEST_CONSTEXPR(match_v8hi(_mm_hsubs_epi16((__m128i)(__v8hi){32767, -15,16,12,9,6,4,2},(__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 32767,4,3,2,1,0,-1,-2)); __m128i test_mm_maddubs_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_maddubs_epi16 diff --git a/clang/test/CodeGen/inline-asm-systemz-flag-output.c b/clang/test/CodeGen/inline-asm-systemz-flag-output.c new file mode 100644 index 0000000..041797b --- /dev/null +++ b/clang/test/CodeGen/inline-asm-systemz-flag-output.c @@ -0,0 +1,57 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -O2 -triple s390x-linux -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: define dso_local signext range(i32 0, 4) i32 @test( +// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]] +// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4 +// CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP1]]) +// CHECK-NEXT: ret i32 [[ASMRESULT1]] +// +int test(int x) { + int cc; + asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc)); + return cc; +} + +// CHECK-LABEL: define dso_local signext range(i32 0, 2) i32 @test_low_high_transformation( +// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2]], !srcloc [[META3:![0-9]+]] +// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4 +// CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[ASMRESULT1]], -1 +// CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 2 +// CHECK-NEXT: [[LOR_EXT:%.*]] = zext i1 [[TMP3]] to i32 +// CHECK-NEXT: ret i32 [[LOR_EXT]] +// +int test_low_high_transformation(int x) { + int cc; + asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc)); + return cc == 1 || cc == 2; +} + +// CHECK-LABEL: define dso_local signext range(i32 0, 2) i32 @test_equal_high_transformation( +// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2]], !srcloc [[META4:![0-9]+]] +// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4 +// CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ASMRESULT1]], 1 +// CHECK-NEXT: [[LOR_EXT:%.*]] = xor i32 [[TMP2]], 1 +// CHECK-NEXT: ret i32 [[LOR_EXT]] +// +int test_equal_high_transformation(int x) { + int cc; + asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc)); + return cc == 0 || cc == 2; +} +//. +// CHECK: [[META2]] = !{i64 788} +// CHECK: [[META3]] = !{i64 1670} +// CHECK: [[META4]] = !{i64 2505} +//. diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl deleted file mode 100644 index f48521b..0000000 --- a/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl +++ /dev/null @@ -1,70 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL -// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPIRV - -// DXIL: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", i16, 1, 0, 1) } -// DXIL: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", i16, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.1" = type { target("dx.TypedBuffer", i32, 1, 0, 1) } -// DXIL: %"class.hlsl::RWBuffer.2" = type { target("dx.TypedBuffer", i32, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.3" = type { target("dx.TypedBuffer", i64, 1, 0, 1) } -// DXIL: %"class.hlsl::RWBuffer.4" = type { target("dx.TypedBuffer", i64, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.5" = type { target("dx.TypedBuffer", half, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.6" = type { target("dx.TypedBuffer", float, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.7" = type { target("dx.TypedBuffer", double, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.8" = type { target("dx.TypedBuffer", <4 x i16>, 1, 0, 1) } -// DXIL: %"class.hlsl::RWBuffer.9" = type { target("dx.TypedBuffer", <3 x i32>, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.10" = type { target("dx.TypedBuffer", <2 x half>, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.11" = type { target("dx.TypedBuffer", <3 x float>, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.12" = type { target("dx.TypedBuffer", <4 x i32>, 1, 0, 1) } - -// SPIRV: %"class.hlsl::RWBuffer" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 24) } -// SPIRV: %"class.hlsl::RWBuffer.2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) } -// SPIRV: %"class.hlsl::RWBuffer.3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 2, 41) } -// SPIRV: %"class.hlsl::RWBuffer.4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 2, 40) } -// SPIRV: %"class.hlsl::RWBuffer.5" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.6" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 3) } -// SPIRV: %"class.hlsl::RWBuffer.7" = type { target("spirv.Image", double, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.10" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.11" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 21) } - -RWBuffer<int16_t> BufI16; -RWBuffer<uint16_t> BufU16; -RWBuffer<int> BufI32; -RWBuffer<uint> BufU32; -RWBuffer<int64_t> BufI64; -RWBuffer<uint64_t> BufU64; -RWBuffer<half> BufF16; -RWBuffer<float> BufF32; -RWBuffer<double> BufF64; -RWBuffer< vector<int16_t, 4> > BufI16x4; -RWBuffer< vector<uint, 3> > BufU32x3; -RWBuffer<half2> BufF16x2; -RWBuffer<float3> BufF32x3; -RWBuffer<int4> BufI32x4; -// TODO: RWBuffer<snorm half> BufSNormF16; -> 11 -// TODO: RWBuffer<unorm half> BufUNormF16; -> 12 -// TODO: RWBuffer<snorm float> BufSNormF32; -> 13 -// TODO: RWBuffer<unorm float> BufUNormF32; -> 14 -// TODO: RWBuffer<snorm double> BufSNormF64; -> 15 -// TODO: RWBuffer<unorm double> BufUNormF64; -> 16 - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { - BufI16[GI] = 0; - BufU16[GI] = 0; - BufI32[GI] = 0; - BufU32[GI] = 0; - BufI64[GI] = 0; - BufU64[GI] = 0; - BufF16[GI] = 0; - BufF32[GI] = 0; - BufF64[GI] = 0; - BufI16x4[GI] = 0; - BufU32x3[GI] = 0; - BufF16x2[GI] = 0; - BufF32x3[GI] = 0; -} diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl deleted file mode 100644 index 0de171c..0000000 --- a/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl +++ /dev/null @@ -1,26 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=DXC,CHECK -// RUN: %clang_cc1 -triple spirv1.6-pc-vulkan1.3-compute -fspv-use-unknown-image-format -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=SPIRV,CHECK - -RWBuffer<int> In; -RWBuffer<int> Out; - -[numthreads(1,1,1)] -void main(unsigned GI : SV_GroupIndex) { - // CHECK: define void @main() - - // DXC: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) - // SPIRV: %[[INPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) - // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]] - // DXC: %[[OUTPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) - // SPIRV: %[[OUTPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) - // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]] - Out[GI] = In[GI]; - - // DXC: %[[INPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) - // SPIRV: %[[INPTR:.*]] = call ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) - // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]] - // DXC: %[[OUTPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) - // SPIRV: %[[OUTPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) - // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]] - Out[GI] = In.Load(GI); -} diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-constructor.hlsl index ca33c42..1ec9f0f 100644 --- a/clang/test/CodeGenHLSL/resources/RWBuffer-constructor.hlsl +++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-constructor.hlsl @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | \ -// RUN: llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL +// RUN: llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL // FIXME: SPIR-V codegen of llvm.spv.resource.handlefrombinding and resource types is not yet implemented // RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | \ // llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV @@ -14,7 +14,7 @@ RWBuffer<float> Buf1 : register(u5, space3); // Resource with implicit binding -RWBuffer<double> Buf2; +Buffer<double> Buf2; export void foo() { // Local resource declaration @@ -22,12 +22,12 @@ export void foo() { } // CHECK: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", float, 1, 0, 0) } -// CHECK: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", double, 1, 0, 0) } -// CHECK: %"class.hlsl::RWBuffer.1" = type { target("dx.TypedBuffer", i32, 1, 0, 1) } +// CHECK: %"class.hlsl::Buffer" = type { target("dx.TypedBuffer", double, 0, 0, 0) } +// CHECK: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", i32, 1, 0, 1) } // CHECK: @Buf1 = internal global %"class.hlsl::RWBuffer" poison, align 4 // CHECK: @[[Buf1Str:.*]] = private unnamed_addr constant [5 x i8] c"Buf1\00", align 1 -// CHECK: @Buf2 = internal global %"class.hlsl::RWBuffer.0" poison, align 4 +// CHECK: @Buf2 = internal global %"class.hlsl::Buffer" poison, align 4 // CHECK: @[[Buf2Str:.*]] = private unnamed_addr constant [5 x i8] c"Buf2\00", align 1 // Buf1 initialization part 1 - global init function that calls RWBuffer<float>::__createFromBinding @@ -50,24 +50,24 @@ export void foo() { // Buf2 initialization part 1 - global init function that RWBuffer<float>::__createFromImplicitBinding // CHECK: define internal void @__cxx_global_var_init.1() // CHECK-NEXT: entry: -// CHECK-NEXT: call void @hlsl::RWBuffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) +// CHECK-NEXT: call void @hlsl::Buffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) // CHECK-SAME: (ptr {{.*}} @Buf2, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 0, ptr noundef @[[Buf2Str]]) -// Buf2 initialization part 2 - body of RWBuffer<float>::__createFromImplicitBinding call -// CHECK: define linkonce_odr hidden void @hlsl::RWBuffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) -// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWBuffer.0") align 4 %[[RetValue2:.*]], i32 noundef %orderId, +// Buf2 initialization part 2 - body of Buffer<double>::__createFromImplicitBinding call +// CHECK: define linkonce_odr hidden void @hlsl::Buffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) +// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::Buffer") align 4 %[[RetValue2:.*]], i32 noundef %orderId, // CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, ptr noundef %name) -// CHECK: %[[Tmp2:.*]] = alloca %"class.hlsl::RWBuffer.0", align 4 -// CHECK: %[[Handle2:.*]] = call target("dx.TypedBuffer", double, 1, 0, 0) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.TypedBuffer_f64_1_0_0t( -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.0", ptr %[[Tmp2]], i32 0, i32 0 -// CHECK-DXIL: store target("dx.TypedBuffer", double, 1, 0, 0) %[[Handle2]], ptr %__handle, align 4 -// CHECK: call void @hlsl::RWBuffer<double>::RWBuffer(hlsl::RWBuffer<double> const&)(ptr {{.*}} %[[RetValue2]], ptr {{.*}} %[[Tmp2]]) +// CHECK: %[[Tmp2:.*]] = alloca %"class.hlsl::Buffer", align 4 +// CHECK: %[[Handle2:.*]] = call target("dx.TypedBuffer", double, 0, 0, 0) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.TypedBuffer_f64_0_0_0t( +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %[[Tmp2]], i32 0, i32 0 +// CHECK-DXIL: store target("dx.TypedBuffer", double, 0, 0, 0) %[[Handle2]], ptr %__handle, align 4 +// CHECK: call void @hlsl::Buffer<double>::Buffer(hlsl::Buffer<double> const&)(ptr {{.*}} %[[RetValue2]], ptr {{.*}} %[[Tmp2]]) // Buf3 initialization part 1 - local variable declared in function foo() is initialized by RWBuffer<int> C1 default constructor // CHECK: define void @foo() // CHECK-NEXT: entry: -// CHECK-NEXT: %Buf3 = alloca %"class.hlsl::RWBuffer.1", align 4 +// CHECK-NEXT: %Buf3 = alloca %"class.hlsl::RWBuffer.0", align 4 // CHECK-NEXT: call void @hlsl::RWBuffer<int>::RWBuffer()(ptr {{.*}} %Buf3) // Buf3 initialization part 2 - body of RWBuffer<int> default C1 constructor that calls the default C2 constructor @@ -76,11 +76,11 @@ export void foo() { // Buf3 initialization part 3 - body of RWBuffer<int> default C2 constructor that initializes handle to poison // CHECK: define linkonce_odr hidden void @hlsl::RWBuffer<int>::RWBuffer()(ptr {{.*}} %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.1", ptr %{{.*}}, i32 0, i32 0 +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.0", ptr %{{.*}}, i32 0, i32 0 // CHECK-NEXT: store target("dx.TypedBuffer", i32, 1, 0, 1) poison, ptr %__handle, align 4 // Module initialization -// CHECK: define internal void @_GLOBAL__sub_I_RWBuffer_constructor.hlsl() +// CHECK: define internal void @_GLOBAL__sub_I_TypedBuffers_constructor.hlsl() // CHECK-NEXT: entry: // CHECK-NEXT: call void @__cxx_global_var_init() // CHECK-NEXT: call void @__cxx_global_var_init.1() diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl new file mode 100644 index 0000000..d3dba8a --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl @@ -0,0 +1,94 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=Buffer %s | FileCheck %s -DRESOURCE=Buffer -DRW=0 -check-prefixes=DXIL + +// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=Buffer %s | FileCheck %s -DRESOURCE=Buffer -DRW=1 -check-prefixes=SPV-RO + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=RWBuffer %s | FileCheck %s -DRESOURCE=RWBuffer -DRW=1 -check-prefixes=DXIL + +// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=RWBuffer %s | FileCheck %s -DRESOURCE=RWBuffer --DRW=2 -check-prefixes=SPV-RW + +// DXIL: %"class.hlsl::[[RESOURCE]]" = type { target("dx.TypedBuffer", i16, [[RW]], 0, 1) } +// DXIL: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.TypedBuffer", i16, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.TypedBuffer", i32, [[RW]], 0, 1) } +// DXIL: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.TypedBuffer", i32, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.TypedBuffer", i64, [[RW]], 0, 1) } +// DXIL: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.TypedBuffer", i64, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.TypedBuffer", half, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.TypedBuffer", float, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.TypedBuffer", double, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.TypedBuffer", <4 x i16>, [[RW]], 0, 1) } +// DXIL: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.TypedBuffer", <3 x i32>, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.TypedBuffer", <2 x half>, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.TypedBuffer", <3 x float>, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.TypedBuffer", <4 x i32>, [[RW]], 0, 1) } + +// SPV-RO: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.Image", half, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.Image", float, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.Image", double, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.Image", half, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.Image", float, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) } + +// SPV-RW: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 24) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 2, 41) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 2, 40) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 3) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.Image", double, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 21) } + +RESOURCE<int16_t> BufI16; +RESOURCE<uint16_t> BufU16; +RESOURCE<int> BufI32; +RESOURCE<uint> BufU32; +RESOURCE<int64_t> BufI64; +RESOURCE<uint64_t> BufU64; +RESOURCE<half> BufF16; +RESOURCE<float> BufF32; +RESOURCE<double> BufF64; +RESOURCE< vector<int16_t, 4> > BufI16x4; +RESOURCE< vector<uint, 3> > BufU32x3; +RESOURCE<half2> BufF16x2; +RESOURCE<float3> BufF32x3; +RESOURCE<int4> BufI32x4; +// TODO: RESOURCE<snorm half> BufSNormF16; -> 11 +// TODO: RESOURCE<unorm half> BufUNormF16; -> 12 +// TODO: RESOURCE<snorm float> BufSNormF32; -> 13 +// TODO: RESOURCE<unorm float> BufUNormF32; -> 14 +// TODO: RESOURCE<snorm double> BufSNormF64; -> 15 +// TODO: RESOURCE<unorm double> BufUNormF64; -> 16 + +[numthreads(1,1,1)] +void main(int GI : SV_GroupIndex) { + int16_t v1 = BufI16[GI]; + uint16_t v2 = BufU16[GI]; + int v3 = BufI32[GI]; + uint v4 = BufU32[GI]; + int64_t v5 = BufI64[GI]; + uint64_t v6 = BufU64[GI]; + half v7 = BufF16[GI]; + float v8 = BufF32[GI]; + double v9 = BufF64[GI]; + vector<int16_t,4> v10 = BufI16x4[GI]; + vector<int, 3> v11 = BufU32x3[GI]; + half2 v12 = BufF16x2[GI]; + float3 v13 = BufF32x3[GI]; +} diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl new file mode 100644 index 0000000..b153bda --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL +// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPIRV + +// NOTE: SPIRV codegen for resource methods is not yet implemented + +Buffer<float> Buf : register(t0); +RWBuffer<uint4> RWBuf : register(u0); + +// DXIL: %"class.hlsl::Buffer" = type { target("dx.TypedBuffer", float, 0, 0, 0) } +// DXIL: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) } + +// DXIL: @Buf = internal global %"class.hlsl::Buffer" poison +// DXIL: @RWBuf = internal global %"class.hlsl::RWBuffer" poison + +export float TestLoad() { + return Buf.Load(1) + RWBuf.Load(2).y; +} + +// CHECK: define noundef nofpclass(nan inf) float @TestLoad()() +// CHECK: call {{.*}} float @hlsl::Buffer<float>::Load(unsigned int)(ptr {{.*}} @Buf, i32 noundef 1) +// CHECK: call {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int)(ptr {{.*}} @RWBuf, i32 noundef 2) +// CHECK: add +// CHECK: ret float + +// CHECK: define {{.*}} float @hlsl::Buffer<float>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index) +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %{{.*}}, i32 0, i32 0 +// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", float, 0, 0, 0), ptr %__handle +// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr +// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0) %[[HANDLE]], i32 %[[INDEX]]) +// CHECK-NEXT: %[[VAL:.*]] = load float, ptr %[[PTR]] +// CHECK-NEXT: ret float %[[VAL]] + +// CHECK: define {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index) +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %{{.*}}, i32 0, i32 0 +// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), ptr %__handle +// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr +// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %[[HANDLE]], i32 %[[INDEX]]) +// CHECK-NEXT: %[[VEC:.*]] = load <4 x i32>, ptr %[[PTR]] +// CHECK-NEXT: ret <4 x i32> %[[VEC]] + +// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0), i32) +// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), i32) diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl new file mode 100644 index 0000000..adc35f6 --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=DXIL,CHECK +// RUN: %clang_cc1 -triple spirv1.6-pc-vulkan1.3-compute -fspv-use-unknown-image-format -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=SPIRV,CHECK + +Buffer<int> In; +RWBuffer<int> Out; + +[numthreads(1,1,1)] +void main(unsigned GI : SV_GroupIndex) { + // CHECK: define void @main() + + // DXIL: %[[INPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_0_0_1t(target("dx.TypedBuffer", i32, 0, 0, 1) %{{.*}}, i32 %{{.*}}) + // SPIRV: %[[INPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_1_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) %{{.*}}, i32 %{{.*}}) + // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]] + // DXIL: %[[OUTPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) + // SPIRV: %[[OUTPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) + // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]] + Out[GI] = In[GI]; + + // DXIL: %[[INPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) + // SPIRV: %[[INPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) + // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]] + // DXIL: %[[OUTPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) + // SPIRV: %[[OUTPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) + // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]] + Out[GI + 1] = Out[GI]; +} diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index ecddf13..44089c4 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -2525,10 +2525,10 @@ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_KL_M32 // RUN: %clang -march=pantherlake -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_PTL_M32,CHECK_NKL_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NKL_M32 // RUN: %clang -march=clearwaterforest -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_PTL_M32,CHECK_CWF_M32,CHECK_NKL_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_CWF_M32,CHECK_NKL_M32 // CHECK_ARL_M32: #define __ADX__ 1 // CHECK_ARL_M32: #define __AES__ 1 // CHECK_ARL_M32: #define __AVX2__ 1 @@ -2568,7 +2568,7 @@ // CHECK_ARL_M32: #define __POPCNT__ 1 // CHECK_ARL_M32-NOT: #define __PREFETCHI__ 1 // CHECK_ARLS_M32-NOT: #define __PREFETCHI__ 1 -// CHECK_PTL_M32: #define __PREFETCHI__ 1 +// CHECK_CWF_M32: #define __PREFETCHI__ 1 // CHECK_ARL_M32: #define __PRFCHW__ 1 // CHECK_ARL_M32: #define __PTWRITE__ 1 // CHECK_ARL_M32-NOT: #define __RAOINT__ 1 @@ -2595,7 +2595,6 @@ // CHECK_ARL_M32: #define __UINTR__ 1 // CHECK_ARL_M32-NOT: #define __USERMSR__ 1 // CHECK_ARLS_M32-NOT: #define __USERMSR__ 1 -// CHECK_PTL_M32-NOT: #define __USERMSR__ 1 // CHECK_CWF_M32: #define __USERMSR__ 1 // CHECK_ARL_M32: #define __VAES__ 1 // CHECK_ARL_M32: #define __VPCLMULQDQ__ 1 @@ -2630,10 +2629,10 @@ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_KL_M64 // RUN: %clang -march=pantherlake -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_PTL_M64,CHECK_NKL_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NKL_M64 // RUN: %clang -march=clearwaterforest -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_PTL_M64,CHECK_CWF_M64,CHECK_NKL_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_CWF_M64,CHECK_NKL_M64 // CHECK_ARL_M64: #define __ADX__ 1 // CHECK_ARL_M64: #define __AES__ 1 // CHECK_ARL_M64: #define __AVX2__ 1 @@ -2673,7 +2672,7 @@ // CHECK_ARL_M64: #define __POPCNT__ 1 // CHECK_ARL_M64-NOT: #define __PREFETCHI__ 1 // CHECK_ARLS_M64-NOT: #define __PREFETCHI__ 1 -// CHECK_PTL_M64: #define __PREFETCHI__ 1 +// CHECK_CWF_M64: #define __PREFETCHI__ 1 // CHECK_ARL_M64: #define __PRFCHW__ 1 // CHECK_ARL_M64: #define __PTWRITE__ 1 // CHECK_ARL_M64-NOT: #define __RAOINT__ 1 @@ -2701,7 +2700,6 @@ // CHECK_ARL_M64: #define __UINTR__ 1 // CHECK_ARL_M64-NOT: #define __USERMSR__ 1 // CHECK_ARLS_M64-NOT: #define __USERMSR__ 1 -// CHECK_PTL_M64-NOT: #define __USERMSR__ 1 // CHECK_CWF_M64: #define __USERMSR__ 1 // CHECK_ARL_M64: #define __VAES__ 1 // CHECK_ARL_M64: #define __VPCLMULQDQ__ 1 diff --git a/clang/test/Preprocessor/systemz_asm_flag_output.c b/clang/test/Preprocessor/systemz_asm_flag_output.c new file mode 100644 index 0000000..b627499 --- /dev/null +++ b/clang/test/Preprocessor/systemz_asm_flag_output.c @@ -0,0 +1,4 @@ +// RUN: %clang -target systemz-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s +// RUN: %clang -target s390x-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s + +// CHECK: #define __GCC_ASM_FLAG_OUTPUTS__ 1 diff --git a/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp b/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp index 6cf0e02..331fe83 100644 --- a/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp +++ b/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp @@ -626,3 +626,20 @@ void fn() { } } + + +namespace GH109096 { +consteval void undefined(); +template <typename T> +struct scope_exit { + T t; + constexpr ~scope_exit() { t(); } + // expected-error@-1 {{call to immediate function 'GH109096::(anonymous class)::operator()' is not a constant expression}} \ + // expected-note@-1 {{implicit use of 'this' pointer is only allowed within the evaluation}} +}; + +scope_exit guard( // expected-note {{in instantiation of member function}} + []() { undefined(); } +); + +} diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp index 3fbe7c0..aaa20f6 100644 --- a/clang/test/SemaTemplate/concepts.cpp +++ b/clang/test/SemaTemplate/concepts.cpp @@ -1416,6 +1416,31 @@ concept IsEntitySpec = } +namespace case8 { + +template <class T> +struct type_identity { + using type = T; +}; + +template <typename Inner> +struct Cat {}; + +template <typename T> +concept CatConcept = requires { + []<class Inner>(type_identity<Cat<Inner>>) {}(type_identity<T>{}); +}; + +template <typename Dummy> +struct Feeder { + template <CatConcept Dummy2> + void feed() noexcept {} +}; + +void main() { Feeder<int>{}.feed<Cat<int>>(); } + +} + } namespace GH162125 { diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp index 0f1b1d8..de09086 100644 --- a/clang/unittests/Basic/DiagnosticTest.cpp +++ b/clang/unittests/Basic/DiagnosticTest.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/VirtualFileSystem.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include <algorithm> #include <memory> #include <optional> #include <vector> @@ -296,35 +295,23 @@ TEST_F(SuppressionMappingTest, EmitCategoryIsExcluded) { } TEST_F(SuppressionMappingTest, LongestMatchWins) { - StringRef Lines[] = { - "[unused]", - "src:*clang/*", - "src:*clang/lib/Sema/*", - "src:*clang/lib/Sema/*=emit", - "src:*clang/lib/Sema/foo*", - }; - llvm::MutableArrayRef<StringRef> Rules = Lines; - Rules = Rules.drop_front(); - llvm::sort(Rules); - - do { - Diags.getDiagnosticOptions().DiagnosticSuppressionMappingsFile = "foo.txt"; - std::string Contents = join(std::begin(Lines), std::end(Lines), "\n"); - FS->addFile("foo.txt", /*ModificationTime=*/{}, - llvm::MemoryBuffer::getMemBuffer(Contents)); - clang::ProcessWarningOptions(Diags, Diags.getDiagnosticOptions(), *FS); - EXPECT_THAT(diags(), IsEmpty()); - - EXPECT_TRUE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile("clang/lib/Basic/foo.h"))) - << Contents; - EXPECT_FALSE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile("clang/lib/Sema/bar.h"))) - << Contents; - EXPECT_TRUE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile("clang/lib/Sema/foo.h"))) - << Contents; - } while (std::next_permutation(Rules.begin(), Rules.end())); + llvm::StringLiteral SuppressionMappingFile = R"( + [unused] + src:*clang/* + src:*clang/lib/Sema/*=emit + src:*clang/lib/Sema/foo*)"; + Diags.getDiagnosticOptions().DiagnosticSuppressionMappingsFile = "foo.txt"; + FS->addFile("foo.txt", /*ModificationTime=*/{}, + llvm::MemoryBuffer::getMemBuffer(SuppressionMappingFile)); + clang::ProcessWarningOptions(Diags, Diags.getDiagnosticOptions(), *FS); + EXPECT_THAT(diags(), IsEmpty()); + + EXPECT_TRUE(Diags.isSuppressedViaMapping( + diag::warn_unused_function, locForFile("clang/lib/Basic/foo.h"))); + EXPECT_FALSE(Diags.isSuppressedViaMapping( + diag::warn_unused_function, locForFile("clang/lib/Sema/bar.h"))); + EXPECT_TRUE(Diags.isSuppressedViaMapping(diag::warn_unused_function, + locForFile("clang/lib/Sema/foo.h"))); } TEST_F(SuppressionMappingTest, LongShortMatch) { diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 9f8e833..5931b60 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -83,6 +83,8 @@ mark_as_advanced(COMPILER_RT_BUILD_ORC) option(COMPILER_RT_BUILD_GWP_ASAN "Build GWP-ASan, and link it into SCUDO" ON) mark_as_advanced(COMPILER_RT_BUILD_GWP_ASAN) option(COMPILER_RT_ENABLE_CET "Build Compiler RT with CET enabled" OFF) +option(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME "Build asan unit tests without depending upon a just-built asan runtime" OFF) +mark_as_advanced(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME) option(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH "Set custom sysroot for building SCUDO standalone" OFF) mark_as_advanced(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH) diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt index 9cd9c97..6d88c96 100644 --- a/compiler-rt/lib/asan/tests/CMakeLists.txt +++ b/compiler-rt/lib/asan/tests/CMakeLists.txt @@ -170,11 +170,21 @@ function(add_asan_tests arch test_runtime) set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config) set(CONFIG_NAME_DYNAMIC ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig) + # On some platforms, unit tests can be run against the runtime that shipped + # with the host compiler with COMPILER_RT_TEST_STANDALONE_BUILD_LIBS=OFF. + # COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME=ON removes the dependency + # on `asan`, allowing the tests to be run independently without + # a newly built asan runtime. + set(ASAN_UNIT_TEST_DEPS asan) + if(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME) + set(ASAN_UNIT_TEST_DEPS) + endif() + # Closure to keep the values. function(generate_asan_tests test_objects test_suite testname) generate_compiler_rt_tests(${test_objects} ${test_suite} ${testname} ${arch} COMPILE_DEPS ${ASAN_UNITTEST_HEADERS} ${ASAN_IGNORELIST_FILE} - DEPS asan + DEPS ${ASAN_UNIT_TEST_DEPS} KIND ${TEST_KIND} ${ARGN} ) @@ -215,7 +225,7 @@ function(add_asan_tests arch test_runtime) add_compiler_rt_test(AsanDynamicUnitTests "${dynamic_test_name}" "${arch}" SUBDIR "${CONFIG_NAME_DYNAMIC}" OBJECTS ${ASAN_INST_TEST_OBJECTS} - DEPS asan ${ASAN_INST_TEST_OBJECTS} + DEPS ${ASAN_UNIT_TEST_DEPS} ${ASAN_INST_TEST_OBJECTS} LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS} ${TARGET_LINK_FLAGS} ${DYNAMIC_LINK_FLAGS} ) endif() diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h index 7fb58be..edb2699 100644 --- a/compiler-rt/lib/msan/msan.h +++ b/compiler-rt/lib/msan/msan.h @@ -303,6 +303,7 @@ u32 ChainOrigin(u32 id, StackTrace *stack); const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1; const int STACK_TRACE_TAG_FIELDS = STACK_TRACE_TAG_POISON + 1; const int STACK_TRACE_TAG_VPTR = STACK_TRACE_TAG_FIELDS + 1; +const int STACK_TRACE_TAG_ALLOC_PADDING = STACK_TRACE_TAG_VPTR + 1; #define GET_MALLOC_STACK_TRACE \ UNINITIALIZED BufferedStackTrace stack; \ diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp index 64df863..80608aa 100644 --- a/compiler-rt/lib/msan/msan_allocator.cpp +++ b/compiler-rt/lib/msan/msan_allocator.cpp @@ -217,25 +217,52 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment, } auto *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated)); meta->requested_size = size; + uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated); + void* padding_start = reinterpret_cast<char*>(allocated) + size; + uptr padding_size = actually_allocated_size - size; + + // - With calloc(7,1), we can set the ideal tagging: + // bytes 0-6: initialized, origin not set (and irrelevant) + // byte 7: uninitialized, origin TAG_ALLOC_PADDING + // bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING + // - If we have malloc(7) and __msan_get_track_origins() > 1, the 4-byte + // origin granularity only allows the slightly suboptimal tagging: + // bytes 0-6: uninitialized, origin TAG_ALLOC + // byte 7: uninitialized, origin TAG_ALLOC (suboptimal) + // bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING + // - If we have malloc(7) and __msan_get_track_origins() == 1, we use a + // single origin bean to reduce overhead: + // bytes 0-6: uninitialized, origin TAG_ALLOC + // byte 7: uninitialized, origin TAG_ALLOC (suboptimal) + // bytes 8-15: uninitialized, origin TAG_ALLOC (suboptimal) + if (__msan_get_track_origins() && flags()->poison_in_malloc && + (zero || (__msan_get_track_origins() > 1))) { + stack->tag = STACK_TRACE_TAG_ALLOC_PADDING; + Origin o2 = Origin::CreateHeapOrigin(stack); + __msan_set_origin(padding_start, padding_size, o2.raw_id()); + } + if (zero) { if (allocator.FromPrimary(allocated)) __msan_clear_and_unpoison(allocated, size); else __msan_unpoison(allocated, size); // Mem is already zeroed. + + if (flags()->poison_in_malloc) + __msan_poison(padding_start, padding_size); } else if (flags()->poison_in_malloc) { - __msan_poison(allocated, size); + __msan_poison(allocated, actually_allocated_size); + if (__msan_get_track_origins()) { stack->tag = StackTrace::TAG_ALLOC; Origin o = Origin::CreateHeapOrigin(stack); - __msan_set_origin(allocated, size, o.raw_id()); + __msan_set_origin( + allocated, + __msan_get_track_origins() == 1 ? actually_allocated_size : size, + o.raw_id()); } } - uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated); - // For compatibility, the allocator converted 0-sized allocations into 1 byte - if (size == 0 && actually_allocated_size > 0 && flags()->poison_in_malloc) - __msan_poison(allocated, 1); - UnpoisonParam(2); RunMallocHooks(allocated, size); return allocated; @@ -255,9 +282,10 @@ void __msan::MsanDeallocate(BufferedStackTrace *stack, void *p) { if (flags()->poison_in_free && allocator.FromPrimary(p)) { __msan_poison(p, size); if (__msan_get_track_origins()) { + uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(p); stack->tag = StackTrace::TAG_DEALLOC; Origin o = Origin::CreateHeapOrigin(stack); - __msan_set_origin(p, size, o.raw_id()); + __msan_set_origin(p, actually_allocated_size, o.raw_id()); } } if (MsanThread *t = GetCurrentThread()) { diff --git a/compiler-rt/lib/msan/msan_report.cpp b/compiler-rt/lib/msan/msan_report.cpp index 99bf81f..cd0bf67 100644 --- a/compiler-rt/lib/msan/msan_report.cpp +++ b/compiler-rt/lib/msan/msan_report.cpp @@ -90,6 +90,10 @@ static void DescribeOrigin(u32 id) { Printf(" %sVirtual table ptr was destroyed%s\n", d.Origin(), d.Default()); break; + case STACK_TRACE_TAG_ALLOC_PADDING: + Printf(" %sUninitialized value is outside of heap allocation%s\n", + d.Origin(), d.Default()); + break; default: Printf(" %sUninitialized value was created%s\n", d.Origin(), d.Default()); diff --git a/compiler-rt/test/msan/allocator_padding.cpp b/compiler-rt/test/msan/allocator_padding.cpp new file mode 100644 index 0000000..72acf31 --- /dev/null +++ b/compiler-rt/test/msan/allocator_padding.cpp @@ -0,0 +1,94 @@ +// *** malloc: all bytes are uninitialized +// * malloc byte 0 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 0 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 0 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// +// * malloc byte 6 +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 6 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 6 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// +// This test assumes the allocator allocates 16 bytes for malloc(7). Bytes +// 7-15 are padding. +// +// * malloc byte 7 +// Edge case: when the origin granularity spans both ALLOC and ALLOC_PADDING, +// ALLOC always takes precedence. +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 7 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 7 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// +// Bytes 8-15 are padding +// For track-origins=1, ALLOC is used instead of ALLOC_PADDING. +// +// * malloc byte 8 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 8 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 8 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// +// * malloc byte 15 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 15 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 15 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING + +// *** calloc +// Bytes 0-6 are fully initialized, so no MSan report should happen. +// +// * calloc byte 0 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1 +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1 +// +// * calloc byte 6 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1 +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1 +// +// * calloc byte 7 +// Byte 7 is uninitialized. Unlike malloc, this is tagged as ALLOC_PADDING +// (since the origin does not need to track bytes 4-6). +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// +// * calloc byte 8 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// +// * calloc byte 15 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char **argv) { +#ifdef USE_CALLOC + char *p = (char *)calloc(7, 1); +#else + char *p = (char *)malloc(7); +#endif + + if (argc == 2) { + int index = atoi(argv[1]); + + printf("p[%d] = %d\n", index, p[index]); + // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value + // CHECK: {{#0 0x.* in main .*allocator_padding.cpp:}}[[@LINE-2]] + // ORIGIN-ALLOC: Uninitialized value was created by a heap allocation + // ORIGIN-ALLOC-PADDING: Uninitialized value is outside of heap allocation + free(p); + } + + return 0; +} diff --git a/compiler-rt/test/msan/zero_alloc.cpp b/compiler-rt/test/msan/zero_alloc.cpp index 1451e1e..f4cf1d8 100644 --- a/compiler-rt/test/msan/zero_alloc.cpp +++ b/compiler-rt/test/msan/zero_alloc.cpp @@ -1,4 +1,9 @@ -// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK +// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,DISCOUNT +// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGINS #include <stdio.h> #include <stdlib.h> @@ -10,6 +15,7 @@ int main(int argc, char **argv) { printf("Content of p1 is: %d\n", *p1); // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]] + // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation free(p1); } @@ -19,6 +25,7 @@ int main(int argc, char **argv) { printf("Content of p2 is: %d\n", *p2); // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]] + // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation free(p2); } @@ -28,6 +35,8 @@ int main(int argc, char **argv) { printf("Content of p2 is: %d\n", *p3); // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]] + // DISCOUNT: Uninitialized value was created by a heap allocation + // ORIGINS: Uninitialized value is outside of heap allocation free(p3); } diff --git a/compiler-rt/test/tsan/Darwin/write-interpose.c b/compiler-rt/test/tsan/Darwin/write-interpose.c index cbd9a08..51ff3ee 100644 --- a/compiler-rt/test/tsan/Darwin/write-interpose.c +++ b/compiler-rt/test/tsan/Darwin/write-interpose.c @@ -7,6 +7,8 @@ // Note that running the below command with out `lock_during_write` should // deadlock (self-lock) // RUN: env DYLD_INSERT_LIBRARIES=%t.dylib TSAN_OPTIONS=verbosity=2:lock_during_write=disable_for_current_process %run %t 2>&1 | FileCheck %s +// +// UNSUPPORTED: ios #include <stdio.h> diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h index 1085393..408f039 100644 --- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h +++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h @@ -57,8 +57,11 @@ struct OpenACCMappableModel mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, - mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, + bool &needsDestroy) const; + + bool generatePrivateDestroy(mlir::Type type, mlir::OpBuilder &builder, + mlir::Location loc, mlir::Value privatized) const; }; } // namespace fir::acc diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h index 0b31cfe..bbb7e6e 100644 --- a/flang/include/flang/Optimizer/Support/Utils.h +++ b/flang/include/flang/Optimizer/Support/Utils.h @@ -200,6 +200,12 @@ std::optional<llvm::ArrayRef<int64_t>> getComponentLowerBoundsIfNonDefault( fir::RecordType recordType, llvm::StringRef component, mlir::ModuleOp module, const mlir::SymbolTable *symbolTable = nullptr); +/// Indicate if a derived type has final routine. Returns std::nullopt if that +/// information is not in the IR; +std::optional<bool> +isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module, + const mlir::SymbolTable *symbolTable = nullptr); + /// Generate a LLVM constant value of type `ity`, using the provided offset. mlir::LLVM::ConstantOp genConstantIndex(mlir::Location loc, mlir::Type ity, diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 62e5c0c..cfb1891 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -978,15 +978,40 @@ static RecipeOp genRecipeOp( auto mappableTy = mlir::dyn_cast<mlir::acc::MappableType>(ty); assert(mappableTy && "Expected that all variable types are considered mappable"); + bool needsDestroy = false; auto retVal = mappableTy.generatePrivateInit( builder, loc, mlir::cast<mlir::TypedValue<mlir::acc::MappableType>>( initBlock->getArgument(0)), initName, initBlock->getArguments().take_back(initBlock->getArguments().size() - 1), - initValue); + initValue, needsDestroy); mlir::acc::YieldOp::create(builder, loc, retVal ? retVal : initBlock->getArgument(0)); + // Create destroy region and generate destruction if requested. + if (needsDestroy) { + llvm::SmallVector<mlir::Type> destroyArgsTy; + llvm::SmallVector<mlir::Location> destroyArgsLoc; + // original and privatized/reduction value + destroyArgsTy.push_back(ty); + destroyArgsTy.push_back(ty); + destroyArgsLoc.push_back(loc); + destroyArgsLoc.push_back(loc); + // Append bounds arguments (if any) in the same order as init region + if (argsTy.size() > 1) { + destroyArgsTy.append(argsTy.begin() + 1, argsTy.end()); + destroyArgsLoc.insert(destroyArgsLoc.end(), argsTy.size() - 1, loc); + } + + builder.createBlock(&recipe.getDestroyRegion(), + recipe.getDestroyRegion().end(), destroyArgsTy, + destroyArgsLoc); + builder.setInsertionPointToEnd(&recipe.getDestroyRegion().back()); + // Call interface on the privatized/reduction value (2nd argument). + (void)mappableTy.generatePrivateDestroy( + builder, loc, recipe.getDestroyRegion().front().getArgument(1)); + mlir::acc::TerminatorOp::create(builder, loc); + } return recipe; } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 0afb295..70bb43a2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> { llvm::LogicalResult matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { + + if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) { + auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); + replaceWithAddrOfOrASCast( + rewriter, addr->getLoc(), + global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter), + getProgramAddressSpace(rewriter), + global ? global.getSymName() + : addr.getSymbol().getRootReference().getValue(), + convertType(addr.getType()), addr); + return mlir::success(); + } + auto global = addr->getParentOfType<mlir::ModuleOp>() .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); replaceWithAddrOfOrASCast( @@ -3231,7 +3244,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> { if (global.getDataAttr() && *global.getDataAttr() == cuf::DataAttribute::Constant) - TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation"); + g.setAddrSpace( + static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant)); rewriter.eraseOp(global); return mlir::success(); diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index 89aa010..9bf10b5 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -21,6 +21,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/Support/Utils.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/IR/BuiltinOps.h" @@ -352,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) { // calculation op. mlir::Value baseRef = llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op) + .Case<fir::DeclareOp>([&](auto op) { + // If this declare binds a view with an underlying storage operand, + // treat that storage as the base reference. Otherwise, fall back + // to the declared memref. + if (auto storage = op.getStorage()) + return storage; + return mlir::Value(varPtr); + }) .Case<hlfir::DesignateOp>([&](auto op) { // Get the base object. return op.getMemref(); @@ -548,14 +557,27 @@ template <typename Ty> mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const { + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const { + needsDestroy = false; mlir::Value retVal; mlir::Type unwrappedTy = fir::unwrapRefType(type); mlir::ModuleOp mod = builder.getInsertionBlock() ->getParent() ->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder firBuilder(builder, mod); + if (auto recType = llvm::dyn_cast<fir::RecordType>( + fir::getFortranElementType(unwrappedTy))) { + // Need to make deep copies of allocatable components. + if (fir::isRecordWithAllocatableMember(recType)) + TODO(loc, + "OpenACC: privatizing derived type with allocatable components"); + // Need to decide if user assignment/final routine should be called. + if (fir::isRecordWithFinalRoutine(recType, mod).value_or(false)) + TODO(loc, "OpenACC: privatizing derived type with user assignment or " + "final routine "); + } + + fir::FirOpBuilder firBuilder(builder, mod); auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp { auto alloca = fir::AllocaOp::create(firBuilder, loc, ty); return hlfir::DeclareOp::create(firBuilder, loc, alloca, varName); @@ -615,9 +637,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Value firClass = fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar); fir::StoreOp::create(builder, loc, firClass, retVal); + needsDestroy = true; } else if (mlir::isa<fir::SequenceType>(innerTy)) { hlfir::Entity source = hlfir::Entity{var}; - auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source); + auto [temp, cleanupFlag] = + hlfir::createTempFromMold(loc, firBuilder, source); if (fir::isa_ref_type(type)) { // When the temp is created - it is not a reference - thus we can // end up with a type inconsistency. Therefore ensure storage is created @@ -636,6 +660,9 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( } else { retVal = temp; } + // If heap was allocated, a destroy is required later. + if (cleanupFlag) + needsDestroy = true; } else { TODO(loc, "Unsupported boxed type for OpenACC private-like recipe"); } @@ -667,23 +694,61 @@ template mlir::Value OpenACCMappableModel<fir::BaseBoxType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::ReferenceType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::HeapType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::PointerType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; + +template <typename Ty> +bool OpenACCMappableModel<Ty>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const { + mlir::Type unwrappedTy = fir::unwrapRefType(type); + // For boxed scalars allocated with AllocMem during init, free the heap. + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(unwrappedTy)) { + mlir::Value boxVal = privatized; + if (fir::isa_ref_type(boxVal.getType())) + boxVal = fir::LoadOp::create(builder, loc, boxVal); + mlir::Value addr = fir::BoxAddrOp::create(builder, loc, boxVal); + // FreeMem only accepts fir.heap and this may not be represented in the box + // type if the privatized entity is not an allocatable. + mlir::Type heapType = + fir::HeapType::get(fir::unwrapRefType(addr.getType())); + if (heapType != addr.getType()) + addr = fir::ConvertOp::create(builder, loc, heapType, addr); + fir::FreeMemOp::create(builder, loc, addr); + return true; + } + + // Nothing to do for other categories by default, they are stack allocated. + return true; +} + +template bool OpenACCMappableModel<fir::BaseBoxType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::ReferenceType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::HeapType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::PointerType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; } // namespace fir::acc diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp index c71642c..92390e4a 100644 --- a/flang/lib/Optimizer/Support/Utils.cpp +++ b/flang/lib/Optimizer/Support/Utils.cpp @@ -51,6 +51,16 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault( return std::nullopt; } +std::optional<bool> +fir::isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module, + const mlir::SymbolTable *symbolTable) { + fir::TypeInfoOp typeInfo = + fir::lookupTypeInfoOp(recordType, module, symbolTable); + if (!typeInfo) + return std::nullopt; + return !typeInfo.getNoFinal(); +} + mlir::LLVM::ConstantOp fir::genConstantIndex(mlir::Location loc, mlir::Type ity, mlir::ConversionPatternRewriter &rewriter, diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir index bbd3f9f..60cda9e 100644 --- a/flang/test/Fir/CUDA/cuda-code-gen.mlir +++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir @@ -284,3 +284,31 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e // CHECK-LABEL: llvm.func @_QQxxx() // CHECK: llvm.alloca %{{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK-NOT: llvm.call @_FortranACUFAllocDescriptor + +// ----- + +module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + gpu.module @cuda_device_mod { + fir.global @_QMkernelsEinitial_val {data_attr = #cuf.cuda<constant>} : i32 { + %0 = fir.zero_bits i32 + fir.has_value %0 : i32 + } + gpu.func @_QMkernelsPassign(%arg0: !fir.ref<!fir.array<?xi32>>) kernel { + %c-1 = arith.constant -1 : index + %c1_i32 = arith.constant 1 : i32 + %0 = arith.constant 1 : i32 + %1 = arith.addi %0, %c1_i32 : i32 + %2 = fir.address_of(@_QMkernelsEinitial_val) : !fir.ref<i32> + %4 = fir.load %2 : !fir.ref<i32> + %5 = fir.convert %1 : (i32) -> i64 + %6 = fircg.ext_array_coor %arg0(%c-1)<%5> : (!fir.ref<!fir.array<?xi32>>, index, i64) -> !fir.ref<i32> + fir.store %4 to %6 : !fir.ref<i32> + gpu.return + } + } +} + +// CHECK: llvm.mlir.global external @_QMkernelsEinitial_val() {addr_space = 4 : i32} : i32 +// CHECK-LABEL: gpu.func @_QMkernelsPassign +// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMkernelsEinitial_val : !llvm.ptr<4> +// CHECK: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<4> to !llvm.ptr diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir new file mode 100644 index 0000000..fabfe4c --- /dev/null +++ b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir @@ -0,0 +1,24 @@ +// Use --mlir-disable-threading so that the diagnostic printing is serialized. +// RUN: fir-opt %s -pass-pipeline='builtin.module(test-fir-openacc-interfaces)' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s + +module { + // Build a scalar view via fir.declare with a storage operand into an array of i8 + func.func @_QPdeclare_with_storage_is_nonscalar() { + %c0 = arith.constant 0 : index + %arr = fir.alloca !fir.array<4xi8> + %elem_i8 = fir.coordinate_of %arr, %c0 : (!fir.ref<!fir.array<4xi8>>, index) -> !fir.ref<i8> + %elem_f32 = fir.convert %elem_i8 : (!fir.ref<i8>) -> !fir.ref<f32> + %view = fir.declare %elem_f32 storage(%arr[0]) {uniq_name = "_QFpi"} + : (!fir.ref<f32>, !fir.ref<!fir.array<4xi8>>) -> !fir.ref<f32> + // Force interface query through an acc op that prints type category + %cp = acc.copyin varPtr(%view : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + acc.enter_data dataOperands(%cp : !fir.ref<f32>) + return + } + + // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + // CHECK: Pointer-like and Mappable: !fir.ref<f32> + // CHECK: Type category: array +} + + diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 index 429f207..3987f9f 100644 --- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 +++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 @@ -4,6 +4,11 @@ ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefix=FIR-CHECK +! TODO: This test hits a fatal TODO. Deal with allocatable component +! destructions. For arrays, allocatable component allocation may also be +! missing. +! XFAIL: * + module m_firstprivate_derived_alloc_comp type point real, allocatable :: x(:) diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90 index d37eb8d..485825d 100644 --- a/flang/test/Lower/OpenACC/acc-private.f90 +++ b/flang/test/Lower/OpenACC/acc-private.f90 @@ -26,6 +26,12 @@ ! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> !fir.box<!fir.array<?x?x2xi32>> ! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.box<!fir.array<?x?x2xi32>>, !fir.box<!fir.array<?x?x2xi32>> ! CHECK: acc.terminator +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb4.ub9_box_Uxi32 : !fir.box<!fir.array<?xi32>> init { @@ -47,6 +53,12 @@ ! CHECK: %[[RIGHT:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>> ! CHECK: hlfir.assign %[[LEFT]] to %[[RIGHT]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>> ! CHECK: acc.terminator +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init { @@ -64,6 +76,12 @@ ! CHECK: %[[DES_V2:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>> ! CHECK: hlfir.assign %[[DES_V1]] to %[[DES_V2]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>> ! CHECK: acc.terminator +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_box_UxUx2xi32 : !fir.box<!fir.array<?x?x2xi32>> init { @@ -74,6 +92,12 @@ ! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?x?x2xi32>, %[[DIM0]]#1, %[[DIM1]]#1 {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x2xi32>>, !fir.heap<!fir.array<?x?x2xi32>>) ! CHECK: acc.yield %[[DECL]]#0 : !fir.box<!fir.array<?x?x2xi32>> +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_ref_box_ptr_Uxi32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> init { @@ -89,6 +113,13 @@ ! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> +! CHECK: } destroy { +! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>): +! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ptr<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: @privatization_ref_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>> init { @@ -99,6 +130,12 @@ ! CHECK: %[[BOX:.*]] = fir.embox %[[ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>> ! CHECK: fir.store %[[BOX]] to %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>> ! CHECK: acc.yield %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>> +! CHECK: } destroy { +! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>, %arg1: !fir.ref<!fir.box<!fir.heap<i32>>>): +! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<i32>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32> +! CHECK: fir.freemem %[[ADDR]] : !fir.heap<i32> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_ref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init { @@ -114,6 +151,12 @@ ! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> +! CHECK: } destroy { +! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>): +! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[ADDR]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init { @@ -124,6 +167,12 @@ ! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %0#1 {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>) ! CHECK: acc.yield %[[DECLARE:.*]]#0 : !fir.box<!fir.array<?xi32>> +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb50.ub99_ref_50xf32 : !fir.ref<!fir.array<50xf32>> init { @@ -140,6 +189,7 @@ ! CHECK: %[[DES_SRC:.*]] = hlfir.designate %[[DECL_SRC]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>> ! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[DECL_DST]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>> ! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.ref<!fir.array<50xf32>>, !fir.ref<!fir.array<50xf32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_100xf32 : !fir.ref<!fir.array<100xf32>> init { diff --git a/libcxx/include/__cxx03/__atomic/atomic.h b/libcxx/include/__cxx03/__atomic/atomic.h index bc4a393..f2e8493 100644 --- a/libcxx/include/__cxx03/__atomic/atomic.h +++ b/libcxx/include/__cxx03/__atomic/atomic.h @@ -34,9 +34,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp> struct atomic : public __atomic_base<_Tp> { - using __base = __atomic_base<_Tp>; - using value_type = _Tp; - using difference_type = value_type; + using __base = __atomic_base<_Tp>; _LIBCPP_HIDE_FROM_ABI atomic() _NOEXCEPT = default; @@ -59,8 +57,8 @@ struct atomic : public __atomic_base<_Tp> { template <class _Tp> struct atomic<_Tp*> : public __atomic_base<_Tp*> { - using __base = __atomic_base<_Tp*>; - using value_type = _Tp*; + using __base = __atomic_base<_Tp*>; + using difference_type = ptrdiff_t; _LIBCPP_HIDE_FROM_ABI atomic() _NOEXCEPT = default; diff --git a/libcxx/include/__cxx03/__atomic/atomic_base.h b/libcxx/include/__cxx03/__atomic/atomic_base.h index a2b40c6..d79ef7d 100644 --- a/libcxx/include/__cxx03/__atomic/atomic_base.h +++ b/libcxx/include/__cxx03/__atomic/atomic_base.h @@ -32,6 +32,8 @@ struct __atomic_base // false { mutable __cxx_atomic_impl<_Tp> __a_; + using value_type = _Tp; + _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const volatile _NOEXCEPT { return __cxx_atomic_is_lock_free(sizeof(__cxx_atomic_impl<_Tp>)); } @@ -127,6 +129,8 @@ template <class _Tp> struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> { using __base = __atomic_base<_Tp, false>; + using difference_type = typename __base::value_type; + _LIBCPP_HIDE_FROM_ABI __atomic_base() _NOEXCEPT = default; _LIBCPP_HIDE_FROM_ABI __atomic_base(_Tp __d) _NOEXCEPT : __base(__d) {} diff --git a/libcxx/include/__cxx03/regex b/libcxx/include/__cxx03/regex index b96d59d..b6a78f2 100644 --- a/libcxx/include/__cxx03/regex +++ b/libcxx/include/__cxx03/regex @@ -2100,7 +2100,7 @@ public: __ranges_.push_back( std::make_pair(__traits_.transform(__b.begin(), __b.end()), __traits_.transform(__e.begin(), __e.end()))); } else { - if (__b.size() != 1 || __e.size() != 1) + if (__b.size() != 1 || __e.size() != 1 || char_traits<typename string_type::value_type>::lt(__e[0], __b[0])) __throw_regex_error<regex_constants::error_range>(); if (__icase_) { __b[0] = __traits_.translate_nocase(__b[0]); @@ -3911,7 +3911,7 @@ _ForwardIterator basic_regex<_CharT, _Traits>::__parse_character_escape( ++__first; break; default: - if (*__first != '_' && !__traits_.isctype(*__first, ctype_base::alnum)) { + if (!__traits_.isctype(*__first, ctype_base::alnum)) { if (__str) *__str = *__first; else diff --git a/libcxx/include/__cxx03/sstream b/libcxx/include/__cxx03/sstream index 44c2423..741158a 100644 --- a/libcxx/include/__cxx03/sstream +++ b/libcxx/include/__cxx03/sstream @@ -354,9 +354,15 @@ private: public: // [stringbuf.cons] constructors: - _LIBCPP_HIDE_FROM_ABI basic_stringbuf() : __hm_(nullptr), __mode_(ios_base::in | ios_base::out) {} + _LIBCPP_HIDE_FROM_ABI basic_stringbuf() : __hm_(nullptr), __mode_(ios_base::in | ios_base::out) { + // it is implementation-defined whether we initialize eback() & friends to nullptr, and libc++ doesn't + __init_buf_ptrs(); + } - _LIBCPP_HIDE_FROM_ABI explicit basic_stringbuf(ios_base::openmode __wch) : __hm_(nullptr), __mode_(__wch) {} + _LIBCPP_HIDE_FROM_ABI explicit basic_stringbuf(ios_base::openmode __wch) : __hm_(nullptr), __mode_(__wch) { + // it is implementation-defined whether we initialize eback() & friends to nullptr, and libc++ doesn't + __init_buf_ptrs(); + } _LIBCPP_HIDE_FROM_ABI explicit basic_stringbuf(const string_type& __s, ios_base::openmode __wch = ios_base::in | ios_base::out) diff --git a/libcxx/include/__cxx03/vector b/libcxx/include/__cxx03/vector index 4b62e0b..dbaa33c 100644 --- a/libcxx/include/__cxx03/vector +++ b/libcxx/include/__cxx03/vector @@ -432,10 +432,12 @@ public: template <__enable_if_t<__is_allocator<_Allocator>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI vector(size_type __n, const value_type& __x, const allocator_type& __a) : __end_cap_(nullptr, __a) { + auto __guard = std::__make_exception_guard(__destroy_vector(*this)); if (__n > 0) { __vallocate(__n); __construct_at_end(__n, __x); } + __guard.__complete(); } template <class _InputIterator, @@ -1054,9 +1056,7 @@ inline _LIBCPP_HIDE_FROM_ABI vector<_Tp, _Allocator>::vector(vector&& __x, const __x.__begin_ = __x.__end_ = __x.__end_cap() = nullptr; } else { typedef move_iterator<iterator> _Ip; - auto __guard = std::__make_exception_guard(__destroy_vector(*this)); - assign(_Ip(__x.begin()), _Ip(__x.end())); - __guard.__complete(); + __init_with_size(_Ip(__x.begin()), _Ip(__x.end()), __x.size()); } } diff --git a/libcxx/test/benchmarks/bitset.bench.cpp b/libcxx/test/benchmarks/bitset.bench.cpp index 8fcf52e..b4c7e6f 100644 --- a/libcxx/test/benchmarks/bitset.bench.cpp +++ b/libcxx/test/benchmarks/bitset.bench.cpp @@ -103,7 +103,7 @@ BENCHMARK(BM_BitsetToString<262144>)->Arg(50)->Name("BM_BitsetToString<262144>/U BENCHMARK(BM_BitsetToString<524288>)->Arg(50)->Name("BM_BitsetToString<524288>/Uniform (50%)"); BENCHMARK(BM_BitsetToString<1048576>)->Arg(50)->Name("BM_BitsetToString<1048576>/Uniform (50%)"); // 1 << 20 -static void BM_ctor_ull(benchmark::State& state) { +static void BM_Bitset_ctor_ull(benchmark::State& state) { unsigned long long val = (1ULL << state.range(0)) - 1; for (auto _ : state) { std::bitset<128> b(val); @@ -111,6 +111,6 @@ static void BM_ctor_ull(benchmark::State& state) { } } -BENCHMARK(BM_ctor_ull)->DenseRange(1, 63); +BENCHMARK(BM_Bitset_ctor_ull)->DenseRange(1, 63); BENCHMARK_MAIN(); diff --git a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.verify.cpp b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.verify.cpp index 320ef57..1bd5792 100644 --- a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.verify.cpp +++ b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.verify.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - // <atomic> // template <class T> @@ -48,12 +46,12 @@ void pointer_to_incomplete_type() { void function_pointer() { { volatile std::atomic<void (*)(int)> fun; - // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}} + // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}} std::atomic_fetch_add(&fun, 0); } { std::atomic<void (*)(int)> fun; - // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}} + // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}} std::atomic_fetch_add(&fun, 0); } } diff --git a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.verify.cpp b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.verify.cpp index bdd8089..bdd4a83 100644 --- a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.verify.cpp +++ b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.verify.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - // <atomic> // template <class T> @@ -51,12 +49,12 @@ void pointer_to_incomplete_type() { void function_pointer() { { volatile std::atomic<void (*)(int)> fun; - // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}} + // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}} std::atomic_fetch_add_explicit(&fun, 0, std::memory_order_relaxed); } { std::atomic<void (*)(int)> fun; - // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}} + // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}} std::atomic_fetch_add_explicit(&fun, 0, std::memory_order_relaxed); } } diff --git a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.verify.cpp b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.verify.cpp index 2c9f898..105a010 100644 --- a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.verify.cpp +++ b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.verify.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - // <atomic> // template <class T> @@ -48,12 +46,12 @@ void pointer_to_incomplete_type() { void function_pointer() { { volatile std::atomic<void (*)(int)> fun; - // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}} + // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}} std::atomic_fetch_sub(&fun, 0); } { std::atomic<void (*)(int)> fun; - // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}} + // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}} std::atomic_fetch_sub(&fun, 0); } } diff --git a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.verify.cpp b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.verify.cpp index 88c4275..1647ed3 100644 --- a/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.verify.cpp +++ b/libcxx/test/libcxx-03/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.verify.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - // <atomic> // template <class T> @@ -51,12 +49,12 @@ void pointer_to_incomplete_type() { void function_pointer() { { volatile std::atomic<void (*)(int)> fun; - // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}} + // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}} std::atomic_fetch_sub_explicit(&fun, 0, std::memory_order_relaxed); } { std::atomic<void (*)(int)> fun; - // expected-error-re@*:* {{static assertion failed due to requirement '!is_function<void (int)>::value'{{.*}}Pointer to function isn't allowed}} + // expected-error-re@*:* {{static assertion failed due to requirement {{.+}}Pointer to function isn't allowed}} std::atomic_fetch_sub_explicit(&fun, 0, std::memory_order_relaxed); } } diff --git a/libcxx/test/libcxx-03/input.output/string.streams/stringbuf/const_sso_buffer.pass.cpp b/libcxx/test/libcxx-03/input.output/string.streams/stringbuf/const_sso_buffer.pass.cpp index 2b6c380..d6caa33 100644 --- a/libcxx/test/libcxx-03/input.output/string.streams/stringbuf/const_sso_buffer.pass.cpp +++ b/libcxx/test/libcxx-03/input.output/string.streams/stringbuf/const_sso_buffer.pass.cpp @@ -8,8 +8,6 @@ // <sstream> -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - // How the constructors of basic_stringbuf initialize the buffer pointers is // not specified. For some constructors it's implementation defined whether the // pointers are set to nullptr. Libc++'s implementation directly uses the SSO diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp index fcbf649..283adbc 100644 --- a/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp +++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp @@ -15,8 +15,6 @@ // UNSUPPORTED: no-wide-characters -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include <fstream> std::basic_filebuf<char, std::char_traits<wchar_t> > f; diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp index 8eca76c..ba6f3c3 100644 --- a/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp +++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp @@ -15,8 +15,6 @@ // UNSUPPORTED: no-wide-characters -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include <fstream> std::basic_fstream<char, std::char_traits<wchar_t> > f; diff --git a/libcxx/test/std/atomics/types.pass.cpp b/libcxx/test/std/atomics/types.pass.cpp index c979392..b1edec5 100644 --- a/libcxx/test/std/atomics/types.pass.cpp +++ b/libcxx/test/std/atomics/types.pass.cpp @@ -17,9 +17,6 @@ // typedef T value_type; // }; -// atomic still has a difference_type in the C++03 frozen headers -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include <atomic> #include <chrono> #include <cstdint> diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp index 00de053..679eec2 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp @@ -11,8 +11,6 @@ // (bug report: https://llvm.org/PR58392) // Check that vector constructors don't leak memory when an operation inside the constructor throws an exception -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include <cstddef> #include <memory> #include <type_traits> diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp index dac4396..d131f5c 100644 --- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp @@ -15,8 +15,6 @@ // basic_stringbuf() : basic_stringbuf(ios_base::in | ios_base::out) {} // C++20 // explicit basic_stringbuf(ios_base::openmode which); // C++20 -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include <sstream> #include <cassert> diff --git a/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp index 799a362..0184a38 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// // -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - // <regex> // template <class BidirectionalIterator, class Allocator, class charT, class traits> diff --git a/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp b/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp index ecfdaee..cabd9eb 100644 --- a/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp +++ b/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp @@ -14,8 +14,6 @@ // template <class ST, class SA> // basic_regex(const basic_string<charT, ST, SA>& s); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include <regex> #include <cassert> #include "test_macros.h" diff --git a/libcxx/utils/ci/docker-compose.yml b/libcxx/utils/ci/docker-compose.yml index cac97a9..9367a8f 100644 --- a/libcxx/utils/ci/docker-compose.yml +++ b/libcxx/utils/ci/docker-compose.yml @@ -23,7 +23,7 @@ services: dockerfile: Dockerfile target: actions-builder args: - GITHUB_RUNNER_VERSION: "2.328.0" + GITHUB_RUNNER_VERSION: "2.329.0" <<: [*image_versions, *compiler_versions] android-buildkite-builder: diff --git a/lldb/cmake/modules/LLDBFramework.cmake b/lldb/cmake/modules/LLDBFramework.cmake index c6f00ed..23d9d49 100644 --- a/lldb/cmake/modules/LLDBFramework.cmake +++ b/lldb/cmake/modules/LLDBFramework.cmake @@ -68,8 +68,6 @@ if(NOT APPLE_EMBEDDED) ) endif() -find_program(unifdef_EXECUTABLE unifdef) - # Wrap output in a target, so lldb-framework can depend on it. add_custom_target(liblldb-resource-headers DEPENDS lldb-sbapi-dwarf-enums ${lldb_staged_headers}) set_target_properties(liblldb-resource-headers PROPERTIES FOLDER "LLDB/Resources") diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index fec9fde..1a7db8f 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -130,6 +130,8 @@ FLAGS_ENUM(LaunchFlags){ eLaunchFlagInheritTCCFromParent = (1u << 12), ///< Don't make the inferior responsible for its own TCC ///< permissions but instead inherit them from its parent. + eLaunchFlagMemoryTagging = + (1u << 13), ///< Launch process with memory tagging explicitly enabled. }; /// Thread Run Modes. diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index 53e991a..1a2860a 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -1,3 +1,4 @@ +from abc import ABC, abstractmethod import ctypes import errno import io @@ -5,6 +6,7 @@ import threading import socket import traceback from lldbsuite.support import seven +from typing import Optional, List, Tuple def checksum(message): @@ -86,7 +88,7 @@ class MockGDBServerResponder: handles any packet not recognized in the common packet handling code. """ - registerCount = 40 + registerCount: int = 40 class RESPONSE_DISCONNECT: pass @@ -95,7 +97,7 @@ class MockGDBServerResponder: pass def __init__(self): - self.packetLog = [] + self.packetLog: List[str] = [] def respond(self, packet): """ @@ -241,7 +243,7 @@ class MockGDBServerResponder: def qHostInfo(self): return "ptrsize:8;endian:little;" - def qEcho(self): + def qEcho(self, num: int): return "E04" def qQueryGDBServer(self): @@ -262,10 +264,10 @@ class MockGDBServerResponder: def D(self, packet): return "OK" - def readRegisters(self): + def readRegisters(self) -> str: return "00000000" * self.registerCount - def readRegister(self, register): + def readRegister(self, register: int) -> str: return "00000000" def writeRegisters(self, registers_hex): @@ -305,7 +307,9 @@ class MockGDBServerResponder: # SIGINT is 2, return type is 2 digit hex string return "S02" - def qXferRead(self, obj, annex, offset, length): + def qXferRead( + self, obj: str, annex: str, offset: int, length: int + ) -> Tuple[Optional[str], bool]: return None, False def _qXferResponse(self, data, has_more): @@ -373,15 +377,17 @@ class MockGDBServerResponder: pass -class ServerChannel: +class ServerChannel(ABC): """ A wrapper class for TCP or pty-based server. """ - def get_connect_address(self): + @abstractmethod + def get_connect_address(self) -> str: """Get address for the client to connect to.""" - def get_connect_url(self): + @abstractmethod + def get_connect_url(self) -> str: """Get URL suitable for process connect command.""" def close_server(self): @@ -393,10 +399,12 @@ class ServerChannel: def close_connection(self): """Close all resources used by the accepted connection.""" - def recv(self): + @abstractmethod + def recv(self) -> bytes: """Receive a data packet from the connected client.""" - def sendall(self, data): + @abstractmethod + def sendall(self, data: bytes) -> None: """Send the data to the connected client.""" @@ -427,11 +435,11 @@ class ServerSocket(ServerChannel): self._connection.close() self._connection = None - def recv(self): + def recv(self) -> bytes: assert self._connection is not None return self._connection.recv(4096) - def sendall(self, data): + def sendall(self, data: bytes) -> None: assert self._connection is not None return self._connection.sendall(data) @@ -443,10 +451,10 @@ class TCPServerSocket(ServerSocket): )[0] super().__init__(family, type, proto, addr) - def get_connect_address(self): + def get_connect_address(self) -> str: return "[{}]:{}".format(*self._server_socket.getsockname()) - def get_connect_url(self): + def get_connect_url(self) -> str: return "connect://" + self.get_connect_address() @@ -454,10 +462,10 @@ class UnixServerSocket(ServerSocket): def __init__(self, addr): super().__init__(socket.AF_UNIX, socket.SOCK_STREAM, 0, addr) - def get_connect_address(self): + def get_connect_address(self) -> str: return self._server_socket.getsockname() - def get_connect_url(self): + def get_connect_url(self) -> str: return "unix-connect://" + self.get_connect_address() @@ -471,7 +479,7 @@ class PtyServerSocket(ServerChannel): self._primary = io.FileIO(primary, "r+b") self._secondary = io.FileIO(secondary, "r+b") - def get_connect_address(self): + def get_connect_address(self) -> str: libc = ctypes.CDLL(None) libc.ptsname.argtypes = (ctypes.c_int,) libc.ptsname.restype = ctypes.c_char_p @@ -484,7 +492,7 @@ class PtyServerSocket(ServerChannel): self._secondary.close() self._primary.close() - def recv(self): + def recv(self) -> bytes: try: return self._primary.read(4096) except OSError as e: @@ -493,8 +501,8 @@ class PtyServerSocket(ServerChannel): return b"" raise - def sendall(self, data): - return self._primary.write(data) + def sendall(self, data: bytes) -> None: + self._primary.write(data) class MockGDBServer: @@ -527,18 +535,21 @@ class MockGDBServer: self._thread.join() self._thread = None - def get_connect_address(self): + def get_connect_address(self) -> str: + assert self._socket is not None return self._socket.get_connect_address() - def get_connect_url(self): + def get_connect_url(self) -> str: + assert self._socket is not None return self._socket.get_connect_url() def run(self): + assert self._socket is not None # For testing purposes, we only need to worry about one client # connecting just one time. try: self._socket.accept() - except: + except Exception: traceback.print_exc() return self._shouldSendAck = True @@ -553,7 +564,7 @@ class MockGDBServer: self._receive(data) except self.TerminateConnectionException: pass - except Exception as e: + except Exception: print( "An exception happened when receiving the response from the gdb server. Closing the client..." ) @@ -586,7 +597,9 @@ class MockGDBServer: Once a complete packet is found at the front of self._receivedData, its data is removed form self._receivedData. """ + assert self._receivedData is not None data = self._receivedData + assert self._receivedDataOffset is not None i = self._receivedDataOffset data_len = len(data) if data_len == 0: @@ -639,10 +652,13 @@ class MockGDBServer: self._receivedDataOffset = 0 return packet - def _sendPacket(self, packet): - self._socket.sendall(seven.bitcast_to_bytes(frame_packet(packet))) + def _sendPacket(self, packet: str): + assert self._socket is not None + framed_packet = seven.bitcast_to_bytes(frame_packet(packet)) + self._socket.sendall(framed_packet) def _handlePacket(self, packet): + assert self._socket is not None if packet is self.PACKET_ACK: # Ignore ACKs from the client. For the future, we can consider # adding validation code to make sure the client only sends ACKs diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index 21d94d6..8ae20bd 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -127,6 +127,10 @@ Status CommandOptionsProcessLaunch::SetOptionValue( break; } + case 'M': + launch_info.GetFlags().Set(eLaunchFlagMemoryTagging); + break; + case 'c': if (!option_arg.empty()) launch_info.SetShell(FileSpec(option_arg)); diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 595b3d0..a9f054e 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -1173,6 +1173,11 @@ let Command = "process launch" in { Arg<"Boolean">, Desc<"Set whether to shell expand arguments to the process when " "launching.">; + def process_launch_memory_tagging + : Option<"memory-tagging", "M">, + Desc<"Set whether to explicitly enable memory tagging when launching " + "the process. Requires hardware support. " + "(Only supported on Darwin.)">; } let Command = "process attach" in { diff --git a/lldb/source/Host/freebsd/Host.cpp b/lldb/source/Host/freebsd/Host.cpp index fa7efad..dfdbfea 100644 --- a/lldb/source/Host/freebsd/Host.cpp +++ b/lldb/source/Host/freebsd/Host.cpp @@ -18,8 +18,6 @@ #include <dlfcn.h> #include <execinfo.h> -#include "llvm/Object/ELF.h" - #include "lldb/Host/FileSystem.h" #include "lldb/Host/Host.h" #include "lldb/Host/HostInfo.h" @@ -32,6 +30,7 @@ #include "lldb/Utility/Status.h" #include "lldb/Utility/StreamString.h" +#include "llvm/Object/ELF.h" #include "llvm/TargetParser/Host.h" namespace lldb_private { diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm index 3c1d117..7120892 100644 --- a/lldb/source/Host/macosx/objcxx/Host.mm +++ b/lldb/source/Host/macosx/objcxx/Host.mm @@ -1210,6 +1210,39 @@ static Status LaunchProcessPosixSpawn(const char *exe_path, } } + if (launch_info.GetFlags().Test(eLaunchFlagMemoryTagging)) { + // The following function configures the spawn attributes to launch the + // process with memory tagging explicitly enabled. We look it up + // dynamically since it is only available on newer OS. Does nothing on + // hardware which does not support MTE. + // + // int posix_spawnattr_set_use_sec_transition_shims_np( + // posix_spawnattr_t *attr, uint32_t flags); + // + using posix_spawnattr_set_use_sec_transition_shims_np_t = + int (*)(posix_spawnattr_t *attr, uint32_t flags); + auto posix_spawnattr_set_use_sec_transition_shims_np_fn = + (posix_spawnattr_set_use_sec_transition_shims_np_t)dlsym( + RTLD_DEFAULT, "posix_spawnattr_set_use_sec_transition_shims_np"); + if (posix_spawnattr_set_use_sec_transition_shims_np_fn) { + error = + Status(posix_spawnattr_set_use_sec_transition_shims_np_fn(&attr, 0), + eErrorTypePOSIX); + if (error.Fail()) { + LLDB_LOG(log, + "error: {0}, " + "posix_spawnattr_set_use_sec_transition_shims_np(&attr, 0)", + error); + return error; + } + } else { + LLDB_LOG(log, + "error: posix_spawnattr_set_use_sec_transition_shims_np not " + "available", + error); + } + } + // Don't set the binpref if a shell was provided. After all, that's only // going to affect what version of the shell is launched, not what fork of // the binary is launched. We insert "arch --arch <ARCH> as part of the diff --git a/lldb/source/Host/windows/MainLoopWindows.cpp b/lldb/source/Host/windows/MainLoopWindows.cpp index c0b1079..9b7df10 100644 --- a/lldb/source/Host/windows/MainLoopWindows.cpp +++ b/lldb/source/Host/windows/MainLoopWindows.cpp @@ -55,11 +55,7 @@ public: if (m_monitor_thread.joinable()) { m_stopped = true; SetEvent(m_ready); - // Keep trying to cancel ReadFile() until the thread exits. - do { - CancelIoEx(m_handle, /*lpOverlapped=*/NULL); - } while (WaitForSingleObject(m_monitor_thread.native_handle(), 1) == - WAIT_TIMEOUT); + CancelIoEx(m_handle, /*lpOverlapped=*/NULL); m_monitor_thread.join(); } CloseHandle(m_event); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp index 4b183a8..5588208 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp @@ -52,7 +52,7 @@ private: ValueObject *m_tree = nullptr; size_t m_num_elements = 0; ValueObject *m_next_element = nullptr; - std::vector<std::pair<ValueObject *, uint64_t>> m_elements_cache; + std::vector<ValueObject *> m_elements_cache; }; class LibCxxUnorderedMapIteratorSyntheticFrontEnd @@ -192,26 +192,25 @@ lldb::ValueObjectSP lldb_private::formatters:: return nullptr; } } - m_elements_cache.push_back( - {value_sp.get(), hash_sp->GetValueAsUnsigned(0)}); + m_elements_cache.push_back(value_sp.get()); m_next_element = node_sp->GetChildMemberWithName("__next_").get(); if (!m_next_element || m_next_element->GetValueAsUnsigned(0) == 0) m_next_element = nullptr; } - std::pair<ValueObject *, uint64_t> val_hash = m_elements_cache[idx]; - if (!val_hash.first) + ValueObject *val_hash = m_elements_cache[idx]; + if (!val_hash) return lldb::ValueObjectSP(); StreamString stream; stream.Printf("[%" PRIu64 "]", (uint64_t)idx); DataExtractor data; Status error; - val_hash.first->GetData(data, error); + val_hash->GetData(data, error); if (error.Fail()) return lldb::ValueObjectSP(); const bool thread_and_frame_only_if_stopped = true; - ExecutionContext exe_ctx = val_hash.first->GetExecutionContextRef().Lock( - thread_and_frame_only_if_stopped); + ExecutionContext exe_ctx = + val_hash->GetExecutionContextRef().Lock(thread_and_frame_only_if_stopped); return CreateValueObjectFromData(stream.GetString(), data, exe_ctx, m_element_type); } diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp index cc0c9e7..6d8f41a 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp @@ -14,6 +14,7 @@ #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/lldb-enumerations.h" +#include "llvm/ADT/Sequence.h" using namespace lldb; using namespace lldb_private; @@ -266,22 +267,47 @@ bool ClassDescriptorV2::method_list_t::Read(Process *process, return true; } -bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr, - lldb::addr_t relative_selector_base_addr, - bool is_small, bool has_direct_sel) { - size_t ptr_size = process->GetAddressByteSize(); - size_t size = GetSize(process, is_small); +llvm::SmallVector<ClassDescriptorV2::method_t, 0> +ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses, + lldb::addr_t relative_selector_base_addr, + bool is_small, bool has_direct_sel) const { + lldb_private::Process *process = m_runtime.GetProcess(); + if (!process) + return {}; - DataBufferHeap buffer(size, '\0'); - Status error; + const size_t size = method_t::GetSize(process, is_small); + const size_t num_methods = addresses.size(); - process->ReadMemory(addr, buffer.GetBytes(), size, error); - if (error.Fail()) { - return false; + llvm::SmallVector<uint8_t, 0> buffer(num_methods * size, 0); + llvm::DenseSet<uint32_t> failed_indices; + + for (auto [idx, addr] : llvm::enumerate(addresses)) { + Status error; + process->ReadMemory(addr, buffer.data() + idx * size, size, error); + if (error.Fail()) + failed_indices.insert(idx); } - DataExtractor extractor(buffer.GetBytes(), size, process->GetByteOrder(), - ptr_size); + llvm::SmallVector<method_t, 0> methods; + methods.reserve(num_methods); + for (auto [idx, addr] : llvm::enumerate(addresses)) { + if (failed_indices.contains(idx)) + continue; + DataExtractor extractor(buffer.data() + idx * size, size, + process->GetByteOrder(), + process->GetAddressByteSize()); + methods.push_back(method_t()); + methods.back().Read(extractor, process, addr, relative_selector_base_addr, + is_small, has_direct_sel); + } + + return methods; +} + +bool ClassDescriptorV2::method_t::Read(DataExtractor &extractor, + Process *process, lldb::addr_t addr, + lldb::addr_t relative_selector_base_addr, + bool is_small, bool has_direct_sel) { lldb::offset_t cursor = 0; if (is_small) { @@ -291,11 +317,11 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr, m_name_ptr = addr + nameref_offset; + Status error; if (!has_direct_sel) { // The SEL offset points to a SELRef. We need to dereference twice. - m_name_ptr = process->ReadUnsignedIntegerFromMemory(m_name_ptr, ptr_size, - 0, error); - if (!error.Success()) + m_name_ptr = process->ReadPointerFromMemory(m_name_ptr, error); + if (error.Fail()) return false; } else if (relative_selector_base_addr != LLDB_INVALID_ADDRESS) { m_name_ptr = relative_selector_base_addr + nameref_offset; @@ -308,13 +334,13 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr, m_imp_ptr = extractor.GetAddress_unchecked(&cursor); } + Status error; process->ReadCStringFromMemory(m_name_ptr, m_name, error); - if (error.Fail()) { + if (error.Fail()) return false; - } process->ReadCStringFromMemory(m_types_ptr, m_types, error); - return !error.Fail(); + return error.Success(); } bool ClassDescriptorV2::ivar_list_t::Read(Process *process, lldb::addr_t addr) { @@ -447,17 +473,19 @@ ClassDescriptorV2::GetMethodList(Process *process, bool ClassDescriptorV2::ProcessMethodList( std::function<bool(const char *, const char *)> const &instance_method_func, ClassDescriptorV2::method_list_t &method_list) const { - lldb_private::Process *process = m_runtime.GetProcess(); - auto method = std::make_unique<method_t>(); - lldb::addr_t relative_selector_base_addr = - m_runtime.GetRelativeSelectorBaseAddr(); - for (uint32_t i = 0, e = method_list.m_count; i < e; ++i) { - method->Read(process, method_list.m_first_ptr + (i * method_list.m_entsize), - relative_selector_base_addr, method_list.m_is_small, - method_list.m_has_direct_selector); - if (instance_method_func(method->m_name.c_str(), method->m_types.c_str())) + auto idx_to_method_addr = [&](uint32_t idx) { + return method_list.m_first_ptr + (idx * method_list.m_entsize); + }; + llvm::SmallVector<addr_t> addresses = llvm::to_vector(llvm::map_range( + llvm::seq<uint32_t>(method_list.m_count), idx_to_method_addr)); + + llvm::SmallVector<method_t, 0> methods = + ReadMethods(addresses, m_runtime.GetRelativeSelectorBaseAddr(), + method_list.m_is_small, method_list.m_has_direct_selector); + + for (const auto &method : methods) + if (instance_method_func(method.m_name.c_str(), method.m_types.c_str())) break; - } return true; } diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h index 920a5eb..78b3311 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h @@ -172,11 +172,16 @@ private: + field_size; // IMP imp; } - bool Read(Process *process, lldb::addr_t addr, + bool Read(DataExtractor &extractor, Process *process, lldb::addr_t addr, lldb::addr_t relative_selector_base_addr, bool is_small, bool has_direct_sel); }; + llvm::SmallVector<method_t, 0> + ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses, + lldb::addr_t relative_selector_base_addr, bool is_small, + bool has_direct_sel) const; + struct ivar_list_t { uint32_t m_entsize; uint32_t m_count; diff --git a/lldb/test/API/macosx/mte/Makefile b/lldb/test/API/macosx/mte/Makefile index cb20942..d614e0f 100644 --- a/lldb/test/API/macosx/mte/Makefile +++ b/lldb/test/API/macosx/mte/Makefile @@ -1,12 +1,15 @@ C_SOURCES := main.c -EXE := uaf_mte +EXE := uaf -all: uaf_mte sign +binary-plain: uaf +binary-entitled: uaf sign + +all: binary-entitled include Makefile.rules -sign: mte-entitlements.plist uaf_mte +sign: mte-entitlements.plist uaf ifeq ($(OS),Darwin) codesign -s - -f --entitlements $^ endif diff --git a/lldb/test/API/macosx/mte/TestDarwinMTE.py b/lldb/test/API/macosx/mte/TestDarwinMTE.py index 489e24a..a70b4b4 100644 --- a/lldb/test/API/macosx/mte/TestDarwinMTE.py +++ b/lldb/test/API/macosx/mte/TestDarwinMTE.py @@ -7,13 +7,25 @@ from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil import lldbsuite.test.cpu_feature as cpu_feature -exe_name = "uaf_mte" # Must match Makefile +exe_name = "uaf" # Must match Makefile class TestDarwinMTE(TestBase): NO_DEBUG_INFO_TESTCASE = True @skipUnlessFeature(cpu_feature.AArch64.MTE) + def test_process_launch_memory_tagging(self): + self.build(make_targets=["binary-plain"]) + self.createTestTarget(self.getBuildArtifact(exe_name)) + + self.expect("process launch", substrs=["exited with status = 0"]) + + self.expect( + "process launch --memory-tagging", + substrs=["stopped", "stop reason = EXC_ARM_MTE_TAG_FAULT"], + ) + + @skipUnlessFeature(cpu_feature.AArch64.MTE) def test_tag_fault(self): self.build() exe = self.getBuildArtifact(exe_name) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index a4d110f..402fd05 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -4172,7 +4172,7 @@ non-AMD key names should be prefixed by "*vendor-name*.". "Image", or "Pipe". This may be more restrictive than indicated by "AccQual" to reflect what the - kernel actual does. If not + kernel actually does. If not present then the runtime must assume what is implied by "AccQual" and "IsConst". Values @@ -5436,8 +5436,8 @@ The fields used by CP for code objects before V3 also match those specified in ``COMPUTE_PGM_RSRC1.PRIORITY``. 13:12 2 bits FLOAT_ROUND_MODE_32 Wavefront starts execution with specified rounding - mode for single (32 - bit) floating point + mode for single (32-bit) + floating point precision floating point operations. @@ -5769,7 +5769,7 @@ The fields used by CP for code objects before V3 also match those specified in Wavefront starts execution with memory violation - exceptions exceptions + exceptions enabled which are generated when a memory violation has occurred for this wavefront from @@ -6005,7 +6005,7 @@ The fields used by CP for code objects before V3 also match those specified in FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush ====================================== ===== ==================================== - Denormal flushing is sign respecting. i.e. the behavior expected by + Denormal flushing is sign respecting, i.e., the behavior expected by ``"denormal-fp-math"="preserve-sign"``. The behavior is undefined with ``"denormal-fp-math"="positive-zero"`` @@ -16831,7 +16831,7 @@ For GFX125x: * Some memory operations contain a ``nv`` bit, for "non-volatile", which indicates memory that is not expected to change during a kernel's execution. This information is propagated to the cache lines for that address - (refered to as ``$nv``). + (referred to as ``$nv``). * When ``nv=0`` reads hit dirty ``$nv=1`` data in cache, the hardware will writeback the data to the next level in the hierarchy and then subsequently read @@ -18970,7 +18970,7 @@ On entry to a function: #. All other registers are unspecified. #. Any necessary ``s_waitcnt`` has been performed to ensure memory is available to the function. -#. Use pass-by-reference (byref) in stead of pass-by-value (byval) for struct +#. Use pass-by-reference (byref) instead of pass-by-value (byval) for struct arguments in C ABI. Callee is responsible for allocating stack memory and copying the value of the struct if modified. Note that the backend still supports byval for struct arguments. @@ -20214,7 +20214,7 @@ from the value of the ``-mcpu`` option that is passed to the assembler. .amdgpu_hsa_kernel (name) +++++++++++++++++++++++++ -This directives specifies that the symbol with given name is a kernel entry +This directive specifies that the symbol with given name is a kernel entry point (label) and the object should contain corresponding symbol of type STT_AMDGPU_HSA_KERNEL. diff --git a/llvm/docs/GettingStartedVS.rst b/llvm/docs/GettingStartedVS.rst index bc5746d..e65fd8f 100644 --- a/llvm/docs/GettingStartedVS.rst +++ b/llvm/docs/GettingStartedVS.rst @@ -126,6 +126,15 @@ These instructions were tested with Visual Studio 2019 and Python 3.9.6: cmake -S llvm\llvm -B build -DLLVM_ENABLE_PROJECTS=clang -DLLVM_TARGETS_TO_BUILD=X86 -Thost=x64 exit + .. note:: + By default, the Visual Studio project files generated by CMake use the + 32-bit toolset. If you are developing on a 64-bit version of Windows and + want to use the 64-bit toolset, pass the ``-Thost=x64`` flag when + generating the Visual Studio solution. This requires CMake 3.8.0 or later. + + For Windows on Arm the equivalent is ``-Thost=ARM64``, but this the default + for those hosts, so you do not have to use this option. + ``LLVM_ENABLE_PROJECTS`` specifies any additional LLVM projects you want to build while ``LLVM_TARGETS_TO_BUILD`` selects the compiler targets. If ``LLVM_TARGETS_TO_BUILD`` is omitted by default all targets are built @@ -149,10 +158,6 @@ These instructions were tested with Visual Studio 2019 and Python 3.9.6: * CMake generates project files for all build types. To select a specific build type, use the Configuration manager from the VS IDE or the ``/property:Configuration`` command-line option when using MSBuild. - * By default, the Visual Studio project files generated by CMake use the - 32-bit toolset. If you are developing on a 64-bit version of Windows and - want to use the 64-bit toolset, pass the ``-Thost=x64`` flag when - generating the Visual Studio solution. This requires CMake 3.8.0 or later. 13. Start Visual Studio and select configuration: diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h index a96535c..1bb07e39 100644 --- a/llvm/include/llvm/ADT/StringSwitch.h +++ b/llvm/include/llvm/ADT/StringSwitch.h @@ -54,17 +54,14 @@ public: explicit StringSwitch(StringRef S) : Str(S), Result() { } + StringSwitch(StringSwitch &&) = default; + // StringSwitch is not copyable. StringSwitch(const StringSwitch &) = delete; // StringSwitch is not assignable due to 'Str' being 'const'. void operator=(const StringSwitch &) = delete; - void operator=(StringSwitch &&other) = delete; - - StringSwitch(StringSwitch &&other) - : Str(other.Str), Result(std::move(other.Result)) { } - - ~StringSwitch() = default; + void operator=(StringSwitch &&) = delete; // Case-sensitive case matchers StringSwitch &Case(StringLiteral S, T Value) { diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h index f06e7ce..bb7f3be 100644 --- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -32,8 +32,11 @@ bool IsAnnotationOK(const GlobalVariable &GV); /// profile information and provides methods to operate on them. class StaticDataProfileInfo { public: - /// Accummulate the profile count of a constant that will be lowered to static - /// data sections. + /// A constant is tracked only if the following conditions are met. + /// 1) It has local (i.e., private or internal) linkage. + // 2) Its data kind is one of {.rodata, .data, .bss, .data.rel.ro}. + // 3) It's eligible for section prefix annotation. See `AnnotationKind` + // above for ineligible reasons. DenseMap<const Constant *, uint64_t> ConstantProfileCounts; /// Keeps track of the constants that are seen at least once without profile @@ -44,6 +47,22 @@ public: LLVM_ABI std::optional<uint64_t> getConstantProfileCount(const Constant *C) const; + /// Use signed enums for enum value comparison, and make 'LukewarmOrUnknown' + /// as 0 so any accidentally uninitialized value will default to unknown. + enum class StaticDataHotness : int8_t { + Cold = -1, + LukewarmOrUnknown = 0, + Hot = 1, + }; + + /// Return the hotness of the constant \p C based on its profile count \p + /// Count. + LLVM_ABI StaticDataHotness getConstantHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const; + + /// Return the string representation of the hotness enum \p Hotness. + LLVM_ABI StringRef hotnessToStr(StaticDataHotness Hotness) const; + public: StaticDataProfileInfo() = default; diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def index 8dcc292..1cfcdbf 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def @@ -62,6 +62,7 @@ ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15, 0x139) ELF_RELOC(R_AARCH64_PLT32, 0x13a) ELF_RELOC(R_AARCH64_GOTPCREL32, 0x13b) ELF_RELOC(R_AARCH64_PATCHINST, 0x13c) +ELF_RELOC(R_AARCH64_FUNCINIT64, 0x13d) // General dynamic TLS relocations ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21, 0x200) ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21, 0x201) diff --git a/llvm/include/llvm/CAS/CASID.h b/llvm/include/llvm/CAS/CASID.h index 8820994..f508ed3 100644 --- a/llvm/include/llvm/CAS/CASID.h +++ b/llvm/include/llvm/CAS/CASID.h @@ -95,8 +95,7 @@ public: } friend hash_code hash_value(const CASID &ID) { - ArrayRef<uint8_t> Hash = ID.getHash(); - return hash_combine_range(Hash.begin(), Hash.end()); + return hash_combine_range(ID.getHash()); } const CASContext &getContext() const { diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h index 1050b3d..c252f9d 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -229,8 +229,8 @@ public: /// doing something wrong if you call pruneValue directly on a /// LiveInterval. Indeed, you are supposed to call pruneValue on the main /// LiveRange and all the LiveRanges of the subranges if any. - LLVM_ATTRIBUTE_UNUSED void pruneValue(LiveInterval &, SlotIndex, - SmallVectorImpl<SlotIndex> *) { + [[maybe_unused]] void pruneValue(LiveInterval &, SlotIndex, + SmallVectorImpl<SlotIndex> *) { llvm_unreachable( "Use pruneValue on the main LiveRange and on each subrange"); } diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h index 77ce052..2c59a52 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h +++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -43,8 +43,14 @@ public: /// /// \param LDCS Flag to indicate whether we should load the call site /// information from DWARF `DW_TAG_call_site` entries - DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false) - : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {} + /// + /// \param MachO Flag to indicate if the object file is mach-o (Apple's + /// executable format). Apple has some compile unit attributes that look like + /// split DWARF, but they aren't and they can cause warnins to be emitted + /// about missing DWO files. + DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false, + bool MachO = false) + : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS), IsMachO(MachO) {} /// Extract the DWARF from the supplied object file and convert it into the /// Gsym format in the GsymCreator object that is passed in. Returns an @@ -97,6 +103,7 @@ private: DWARFContext &DICtx; GsymCreator &Gsym; bool LoadDwarfCallSites; + bool IsMachO; friend class DwarfTransformerTest; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h index 596cc18..b0197f0 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h @@ -13,7 +13,6 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H #define LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H -#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include "llvm/Support/Compiler.h" @@ -54,9 +53,6 @@ inline size_t numDeallocActions(const AllocActions &AAs) { AAs, [](const AllocActionCallPair &P) { return !!P.Dealloc; }); } -using OnRunFinalizeActionsCompleteFn = - unique_function<void(Expected<std::vector<WrapperFunctionCall>>)>; - /// Run finalize actions. /// /// If any finalize action fails then the corresponding dealloc actions will be @@ -67,16 +63,13 @@ using OnRunFinalizeActionsCompleteFn = /// be returned. The dealloc actions should be run by calling /// runDeallocationActions. If this function succeeds then the AA argument will /// be cleared before the function returns. -LLVM_ABI void runFinalizeActions(AllocActions &AAs, - OnRunFinalizeActionsCompleteFn OnComplete); - -using OnRunDeallocActionsComeleteFn = unique_function<void(Error)>; +LLVM_ABI Expected<std::vector<WrapperFunctionCall>> +runFinalizeActions(AllocActions &AAs); /// Run deallocation actions. /// Dealloc actions will be run in reverse order (from last element of DAs to /// first). -LLVM_ABI void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs, - OnRunDeallocActionsComeleteFn OnComplete); +LLVM_ABI Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs); using SPSAllocActionCallPair = SPSTuple<SPSWrapperFunctionCall, SPSWrapperFunctionCall>; diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index 5a26e2f..e9a417d 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -833,6 +833,7 @@ struct BBAddrMap { bool MultiBBRange : 1; bool OmitBBEntries : 1; bool CallsiteEndOffsets : 1; + bool BBHash : 1; bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; } @@ -845,7 +846,8 @@ struct BBAddrMap { (static_cast<uint8_t>(BrProb) << 2) | (static_cast<uint8_t>(MultiBBRange) << 3) | (static_cast<uint8_t>(OmitBBEntries) << 4) | - (static_cast<uint8_t>(CallsiteEndOffsets) << 5); + (static_cast<uint8_t>(CallsiteEndOffsets) << 5) | + (static_cast<uint8_t>(BBHash) << 6); } // Decodes from minimum bit width representation and validates no @@ -854,7 +856,8 @@ struct BBAddrMap { Features Feat{ static_cast<bool>(Val & (1 << 0)), static_cast<bool>(Val & (1 << 1)), static_cast<bool>(Val & (1 << 2)), static_cast<bool>(Val & (1 << 3)), - static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5))}; + static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5)), + static_cast<bool>(Val & (1 << 6))}; if (Feat.encode() != Val) return createStringError( std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x", @@ -864,10 +867,10 @@ struct BBAddrMap { bool operator==(const Features &Other) const { return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange, - OmitBBEntries, CallsiteEndOffsets) == + OmitBBEntries, CallsiteEndOffsets, BBHash) == std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb, Other.MultiBBRange, Other.OmitBBEntries, - Other.CallsiteEndOffsets); + Other.CallsiteEndOffsets, Other.BBHash); } }; @@ -920,17 +923,19 @@ struct BBAddrMap { false}; // Metdata for this basic block. // Offsets of end of call instructions, relative to the basic block start. SmallVector<uint32_t, 1> CallsiteEndOffsets; + uint64_t Hash = 0; // Hash for this basic block. BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD, - SmallVector<uint32_t, 1> CallsiteEndOffsets) + SmallVector<uint32_t, 1> CallsiteEndOffsets, uint64_t Hash) : ID(ID), Offset(Offset), Size(Size), MD(MD), - CallsiteEndOffsets(std::move(CallsiteEndOffsets)) {} + CallsiteEndOffsets(std::move(CallsiteEndOffsets)), Hash(Hash) {} UniqueBBID getID() const { return {ID, 0}; } bool operator==(const BBEntry &Other) const { return ID == Other.ID && Offset == Other.Offset && Size == Other.Size && - MD == Other.MD && CallsiteEndOffsets == Other.CallsiteEndOffsets; + MD == Other.MD && CallsiteEndOffsets == Other.CallsiteEndOffsets && + Hash == Other.Hash; } bool hasReturn() const { return MD.HasReturn; } diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index c90591d..a7c7c7c 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -163,6 +163,7 @@ struct BBAddrMapEntry { llvm::yaml::Hex64 Size; llvm::yaml::Hex64 Metadata; std::optional<std::vector<llvm::yaml::Hex64>> CallsiteEndOffsets; + std::optional<llvm::yaml::Hex64> Hash; }; uint8_t Version; llvm::yaml::Hex8 Feature; diff --git a/llvm/include/llvm/TableGen/CodeGenHelpers.h b/llvm/include/llvm/TableGen/CodeGenHelpers.h index 5b823db..ce91f62 100644 --- a/llvm/include/llvm/TableGen/CodeGenHelpers.h +++ b/llvm/include/llvm/TableGen/CodeGenHelpers.h @@ -34,6 +34,21 @@ private: raw_ostream &OS; }; +// Simple RAII helper for emitting header include guard (ifndef-define-endif). +class IncludeGuardEmitter { +public: + IncludeGuardEmitter(raw_ostream &OS, StringRef Name) + : Name(Name.str()), OS(OS) { + OS << "#ifndef " << Name << "\n" + << "#define " << Name << "\n\n"; + } + ~IncludeGuardEmitter() { OS << "\n#endif // " << Name << "\n"; } + +private: + std::string Name; + raw_ostream &OS; +}; + // Simple RAII helper for emitting namespace scope. Name can be a single // namespace (empty for anonymous namespace) or nested namespace. class NamespaceEmitter { diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index b8c540c..9f8ac6e 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -849,17 +849,12 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind, /// %sum.2 = select %cmp, %add, %sum.1 RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) { - SelectInst *SI = dyn_cast<SelectInst>(I); - if (!SI) - return InstDesc(false, I); - - CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition()); + Value *TrueVal, *FalseVal; // Only handle single use cases for now. - if (!CI || !CI->hasOneUse()) + if (!match(I, + m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal)))) return InstDesc(false, I); - Value *TrueVal = SI->getTrueValue(); - Value *FalseVal = SI->getFalseValue(); // Handle only when either of operands of select instruction is a PHI // node for now. if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) || @@ -886,7 +881,7 @@ RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) { if (!IPhi || IPhi != FalseVal) return InstDesc(false, I); - return InstDesc(true, SI); + return InstDesc(true, I); } RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 4e38626..e08ef60 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6644,7 +6644,7 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, "Invalid mask width"); // If index-width (mask size) is less than pointer-size then mask is // 1-extended. - if (match(Op1, m_PtrToInt(m_Specific(Op0)))) + if (match(Op1, m_PtrToIntOrAddr(m_Specific(Op0)))) return Op0; // NOTE: We may have attributes associated with the return value of the diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index ab37338..0b2e3fc 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -393,7 +393,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, /// \param AA The AliasAnalysis we used for our search. /// \param AllowImpreciseClobber Always false, unless we do relaxed verify. -LLVM_ATTRIBUTE_UNUSED static void +[[maybe_unused]] static void checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, const MemoryLocation &StartLoc, const MemorySSA &MSSA, const UpwardsMemoryQuery &Query, BatchAAResults &AA, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 3fab6b0..a64b93d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6417,8 +6417,18 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S, case scSequentialUMinExpr: return GetGCDMultiple(cast<SCEVNAryExpr>(S)); case scUnknown: { - // ask ValueTracking for known bits + // Ask ValueTracking for known bits. SCEVUnknown only become available at + // the point their underlying IR instruction has been defined. If CtxI was + // not provided, use: + // * the first instruction in the entry block if it is an argument + // * the instruction itself otherwise. const SCEVUnknown *U = cast<SCEVUnknown>(S); + if (!CtxI) { + if (isa<Argument>(U->getValue())) + CtxI = &*F.getEntryBlock().begin(); + else if (auto *I = dyn_cast<Instruction>(U->getValue())) + CtxI = I; + } unsigned Known = computeKnownBits(U->getValue(), getDataLayout(), &AC, CtxI, &DT) .countMinTrailingZeros(); @@ -15761,6 +15771,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock( const SCEV *OneAlignedUp = GetNextSCEVDividesByDivisor(One, DividesBy); To = SE.getUMaxExpr(FromRewritten, OneAlignedUp); + } else { + if (LHS->getType()->isPointerTy()) { + LHS = SE.getLosslessPtrToIntExpr(LHS); + RHS = SE.getLosslessPtrToIntExpr(RHS); + if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS)) + break; + } + auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) { + const SCEV *Sub = SE.getMinusSCEV(A, B); + AddRewrite(Sub, Sub, + SE.getUMaxExpr(Sub, SE.getOne(From->getType()))); + }; + AddSubRewrite(LHS, RHS); + AddSubRewrite(RHS, LHS); + continue; } break; default: diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index 1f751ee..e7f0b2c 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -60,6 +60,36 @@ void StaticDataProfileInfo::addConstantProfileCount( OriginalCount = getInstrMaxCountValue(); } +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getConstantHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const { + // The accummulated counter shows the constant is hot. Return enum 'hot' + // whether this variable is seen by unprofiled functions or not. + if (PSI->isHotCount(Count)) + return StaticDataHotness::Hot; + // The constant is not hot, and seen by unprofiled functions. We don't want to + // assign it to unlikely sections, even if the counter says 'cold'. So return + // enum 'LukewarmOrUnknown'. + if (ConstantWithoutCounts.count(C)) + return StaticDataHotness::LukewarmOrUnknown; + // The accummulated counter shows the constant is cold so return enum 'cold'. + if (PSI->isColdCount(Count)) + return StaticDataHotness::Cold; + + return StaticDataHotness::LukewarmOrUnknown; +} + +StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const { + switch (Hotness) { + case StaticDataHotness::Cold: + return "unlikely"; + case StaticDataHotness::Hot: + return "hot"; + default: + return ""; + } +} + std::optional<uint64_t> StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { auto I = ConstantProfileCounts.find(C); @@ -70,23 +100,10 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { StringRef StaticDataProfileInfo::getConstantSectionPrefix( const Constant *C, const ProfileSummaryInfo *PSI) const { - auto Count = getConstantProfileCount(C); + std::optional<uint64_t> Count = getConstantProfileCount(C); if (!Count) return ""; - // The accummulated counter shows the constant is hot. Return 'hot' whether - // this variable is seen by unprofiled functions or not. - if (PSI->isHotCount(*Count)) - return "hot"; - // The constant is not hot, and seen by unprofiled functions. We don't want to - // assign it to unlikely sections, even if the counter says 'cold'. So return - // an empty prefix before checking whether the counter is cold. - if (ConstantWithoutCounts.count(C)) - return ""; - // The accummulated counter shows the constant is cold. Return 'unlikely'. - if (PSI->isColdCount(*Count)) - return "unlikely"; - // The counter says lukewarm. Return an empty prefix. - return ""; + return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count)); } bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 219bbc9..05fffe9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1437,7 +1437,8 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, BrProbEnabled, MF.hasBBSections() && NumMBBSectionRanges > 1, static_cast<bool>(BBAddrMapSkipEmitBBEntries), - HasCalls}; + HasCalls, + false}; } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 4931403..53f1cfe2 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -770,7 +770,7 @@ struct PartwordMaskValues { Value *Inv_Mask = nullptr; }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { auto PrintObj = [&O](auto *V) { if (V) diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 3812823..04d9309 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -112,7 +112,7 @@ APInt GISelValueTracking::getKnownOnes(Register R) { return getKnownBits(R).One; } -LLVM_ATTRIBUTE_UNUSED static void +[[maybe_unused]] static void dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" @@ -2013,6 +2013,43 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1; break; } + case TargetOpcode::G_ADD: { + Register Src2 = MI.getOperand(2).getReg(); + unsigned Src2NumSignBits = + computeNumSignBits(Src2, DemandedElts, Depth + 1); + if (Src2NumSignBits <= 2) + return 1; // Early out. + + Register Src1 = MI.getOperand(1).getReg(); + unsigned Src1NumSignBits = + computeNumSignBits(Src1, DemandedElts, Depth + 1); + if (Src1NumSignBits == 1) + return 1; // Early Out. + + // Special case decrementing a value (ADD X, -1): + KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth); + if (Known2.isAllOnes()) { + KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((Known1.Zero | 1).isAllOnes()) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (Known1.isNonNegative()) { + FirstAnswer = Src1NumSignBits; + break; + } + + // Otherwise, we treat this like an ADD. + } + + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1; + break; + } case TargetOpcode::G_FCMP: case TargetOpcode::G_ICMP: { bool IsFP = Opcode == TargetOpcode::G_FCMP; diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 0e38017..d2f2c3e 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -661,7 +661,10 @@ void LiveIntervals::extendToIndices(LiveRange &LR, void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl<SlotIndex> *EndPoints) { LiveQueryResult LRQ = LR.Query(Kill); - VNInfo *VNI = LRQ.valueOutOrDead(); + // LR may have liveness reachable from early clobber slot, which may be + // only live-in instead of live-out of the instruction. + // For example, LR =[1r, 3r), Kill = 3e, we have to prune [3e, 3r) of LR. + VNInfo *VNI = LRQ.valueOutOrDead() ? LRQ.valueOutOrDead() : LRQ.valueIn(); if (!VNI) return; diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 3268c26..9662511 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1551,7 +1551,7 @@ LLVM_DUMP_METHOD void ILPValue::dump() const { dbgs() << *this << '\n'; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) { Val.print(OS); return OS; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e153842..358e060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -658,13 +658,13 @@ namespace { bool InexpensiveOnly = false, std::optional<EVT> OutVT = std::nullopt); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); + SDValue buildRsqrtEstimate(SDValue Op); + SDValue buildSqrtEstimate(SDValue Op); + SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip); SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -5044,7 +5044,6 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { unsigned Opc = N->getOpcode(); bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); - ConstantSDNode *N1C = isConstOrConstSplat(N1); // X / undef -> undef // X % undef -> undef @@ -5076,7 +5075,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { // division-by-zero or remainder-by-zero, so assume the divisor is 1. // TODO: Similarly, if we're zero-extending a boolean divisor, then assume // it's a 1. - if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) + if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1)) return IsDiv ? N0 : DAG.getConstant(0, DL, VT); return SDValue(); @@ -18591,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0))) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -18636,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A); SDValue AAZ = DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0)); - if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ)) return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt); // Estimate creation failed. Clean up speculatively created nodes. @@ -18646,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // We found a FSQRT, so try to make this fold: // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) - if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { + if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) { SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y); AddToWorklist(Div.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, Div); @@ -18743,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { return SDValue(); // FSQRT nodes have flags that propagate to the created nodes. + SelectionDAG::FlagInserter FlagInserter(DAG, Flags); // TODO: If this is N0/sqrt(N0), and we reach this node before trying to // transform the fdiv, we may produce a sub-optimal estimate sequence // because the reciprocal calculation may not have to filter out a // 0.0 input. - return buildSqrtEstimate(N0, Flags); + return buildSqrtEstimate(N0); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -29744,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); } // If non-reciprocal square root is requested, multiply the result by Arg. if (!Reciprocal) - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); return Est; } @@ -29776,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -29790,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations for reciprocal square root: // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { - SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est); + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est); + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -29800,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, SDValue LHS; if (Reciprocal || (i + 1) < Iterations) { // RSQRT: LHS = (E * -0.5) - LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); } else { // SQRT: LHS = (A * E) * -0.5 - LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf); } - Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS); } return Est; @@ -29815,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, - bool Reciprocal) { +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) { if (LegalDAG) return SDValue(); @@ -29844,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (Iterations > 0) Est = UseOneConstNR - ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) - : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal); if (!Reciprocal) { SDLoc DL(Op); // Try the target specific test first. @@ -29863,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, true); +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, false); +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, false); } /// Return true if there is any possibility that the two addresses overlap. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c9aeef7..90edaf3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5063,8 +5063,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; case ISD::ADD: case ISD::ADDC: - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. + // TODO: Move Operand 1 check before Operand 0 check Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. @@ -5088,6 +5087,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); if (Tmp2 == 1) return 1; // Early out. + + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. return std::min(Tmp, Tmp2) - 1; case ISD::SUB: Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -6403,8 +6405,9 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, if (VT.isScalableVector()) return SDValue(); - // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be - // simplified to one big BUILD_VECTOR. + // A CONCAT_VECTOR of scalar sources, such as UNDEF, BUILD_VECTOR and + // single-element INSERT_VECTOR_ELT operands can be simplified to one big + // BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. EVT SVT = VT.getScalarType(); SmallVector<SDValue, 16> Elts; @@ -6414,6 +6417,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); else if (Op.getOpcode() == ISD::BUILD_VECTOR) Elts.append(Op->op_begin(), Op->op_end()); + else if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && + OpVT.getVectorNumElements() == 1 && + isNullConstant(Op.getOperand(2))) + Elts.push_back(Op.getOperand(1)); else return SDValue(); } diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index 7a0256f..fa39603 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -338,9 +338,13 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI, if (FilePath.empty()) { // If we had a DW_AT_decl_file, but got no file then we need to emit a // warning. + const uint64_t DwarfFileIdx = dwarf::toUnsigned( + Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX); + // Check if there is no DW_AT_decl_line attribute, and don't report an + // error if it isn't there. + if (DwarfFileIdx == UINT32_MAX) + return; Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) { - const uint64_t DwarfFileIdx = dwarf::toUnsigned( - Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX); OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has an invalid file index " << DwarfFileIdx << " in its DW_AT_decl_file attribute, unable to create a single " @@ -629,6 +633,10 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { size_t NumBefore = Gsym.getNumFunctionInfos(); auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false); + // Apple uses DW_AT_GNU_dwo_id for things other than split DWARF. + if (IsMachO) + return ReturnDie; + if (DwarfUnit.getDWOId()) { DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit(); if (!DWOCU->isDWOUnit()) diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 5b3c05e..6c7e27e 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -260,22 +260,17 @@ public: } // Run finalization actions. - using WrapperFunctionCall = orc::shared::WrapperFunctionCall; - runFinalizeActions( - G->allocActions(), - [this, OnFinalized = std::move(OnFinalized)]( - Expected<std::vector<WrapperFunctionCall>> DeallocActions) mutable { - completeFinalization(std::move(OnFinalized), - std::move(DeallocActions)); - }); - } + auto DeallocActions = runFinalizeActions(G->allocActions()); + if (!DeallocActions) { + OnFinalized(DeallocActions.takeError()); + return; + } - void abandon(OnAbandonedFunction OnAbandoned) override { - Error Err = Error::success(); - if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); - if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); + // Release the finalize segments slab. + if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) { + OnFinalized(errorCodeToError(EC)); + return; + } #ifndef NDEBUG // Set 'G' to null to flag that we've been successfully finalized. @@ -284,22 +279,17 @@ public: G = nullptr; #endif - OnAbandoned(std::move(Err)); + // Continue with finalized allocation. + OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments), + std::move(*DeallocActions))); } -private: - void completeFinalization( - OnFinalizedFunction OnFinalized, - Expected<std::vector<orc::shared::WrapperFunctionCall>> DeallocActions) { - - if (!DeallocActions) - return OnFinalized(DeallocActions.takeError()); - - // Release the finalize segments slab. - if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) { - OnFinalized(errorCodeToError(EC)); - return; - } + void abandon(OnAbandonedFunction OnAbandoned) override { + Error Err = Error::success(); + if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) + Err = joinErrors(std::move(Err), errorCodeToError(EC)); + if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments)) + Err = joinErrors(std::move(Err), errorCodeToError(EC)); #ifndef NDEBUG // Set 'G' to null to flag that we've been successfully finalized. @@ -308,11 +298,10 @@ private: G = nullptr; #endif - // Continue with finalized allocation. - OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments), - std::move(*DeallocActions))); + OnAbandoned(std::move(Err)); } +private: Error applyProtections() { for (auto &KV : BL.segments()) { const auto &AG = KV.first; diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 7b327af..7e606c6a 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -91,19 +91,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, sys::Memory::InvalidateInstructionCache(Base.toPtr<void *>(), Size); } - std::vector<shared::WrapperFunctionCall> DeinitializeActions; - { - std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P; - auto F = P.get_future(); - shared::runFinalizeActions( - AI.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) { - P.set_value(std::move(R)); - }); - if (auto DeinitializeActionsOrErr = F.get()) - DeinitializeActions = std::move(*DeinitializeActionsOrErr); - else - return OnInitialized(DeinitializeActionsOrErr.takeError()); - } + auto DeinitializeActions = shared::runFinalizeActions(AI.Actions); + if (!DeinitializeActions) + return OnInitialized(DeinitializeActions.takeError()); { std::lock_guard<std::mutex> Lock(Mutex); @@ -111,7 +101,7 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, // This is the maximum range whose permission have been possibly modified auto &Alloc = Allocations[MinAddr]; Alloc.Size = MaxAddr - MinAddr; - Alloc.DeinitializationActions = std::move(DeinitializeActions); + Alloc.DeinitializationActions = std::move(*DeinitializeActions); Reservations[AI.MappingBase.toPtr<void *>()].Allocations.push_back(MinAddr); } @@ -128,10 +118,10 @@ void InProcessMemoryMapper::deinitialize( for (auto Base : llvm::reverse(Bases)) { - shared::runDeallocActions( - Allocations[Base].DeinitializationActions, [&](Error Err) { - AllErr = joinErrors(std::move(AllErr), std::move(Err)); - }); + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } // Reset protections to read/write so the area can be reused if (auto EC = sys::Memory::protectMappedMemory( diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp index 08ab0c6..91f2899 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp @@ -12,39 +12,31 @@ namespace llvm { namespace orc { namespace shared { -void runFinalizeActions(AllocActions &AAs, - OnRunFinalizeActionsCompleteFn OnComplete) { +Expected<std::vector<WrapperFunctionCall>> +runFinalizeActions(AllocActions &AAs) { std::vector<WrapperFunctionCall> DeallocActions; DeallocActions.reserve(numDeallocActions(AAs)); for (auto &AA : AAs) { if (AA.Finalize) - - if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) { - while (!DeallocActions.empty()) { - Err = joinErrors(std::move(Err), - DeallocActions.back().runWithSPSRetErrorMerged()); - DeallocActions.pop_back(); - } - return OnComplete(std::move(Err)); - } + if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) + return joinErrors(std::move(Err), runDeallocActions(DeallocActions)); if (AA.Dealloc) DeallocActions.push_back(std::move(AA.Dealloc)); } AAs.clear(); - OnComplete(std::move(DeallocActions)); + return DeallocActions; } -void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs, - OnRunDeallocActionsComeleteFn OnComplete) { +Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs) { Error Err = Error::success(); while (!DAs.empty()) { Err = joinErrors(std::move(Err), DAs.back().runWithSPSRetErrorMerged()); DAs = DAs.drop_back(); } - OnComplete(std::move(Err)); + return Err; } } // namespace shared diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp index 8c24b1f..4fbf232 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp @@ -9,10 +9,8 @@ #include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h" #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" -#include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/Process.h" #include "llvm/Support/WindowsError.h" -#include <future> #include <sstream> #if defined(LLVM_ON_UNIX) @@ -183,24 +181,15 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize( } // Run finalization actions and get deinitlization action list. - std::vector<shared::WrapperFunctionCall> DeinitializeActions; - { - std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P; - auto F = P.get_future(); - shared::runFinalizeActions( - FR.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) { - P.set_value(std::move(R)); - }); - if (auto DeinitializeActionsOrErr = F.get()) - DeinitializeActions = std::move(*DeinitializeActionsOrErr); - else - return DeinitializeActionsOrErr.takeError(); + auto DeinitializeActions = shared::runFinalizeActions(FR.Actions); + if (!DeinitializeActions) { + return DeinitializeActions.takeError(); } { std::lock_guard<std::mutex> Lock(Mutex); Allocations[MinAddr].DeinitializationActions = - std::move(DeinitializeActions); + std::move(*DeinitializeActions); Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr); } @@ -221,11 +210,10 @@ Error ExecutorSharedMemoryMapperService::deinitialize( std::lock_guard<std::mutex> Lock(Mutex); for (auto Base : llvm::reverse(Bases)) { - shared::runDeallocActions( - Allocations[Base].DeinitializationActions, [&](Error Err) { - if (Err) - AllErr = joinErrors(std::move(AllErr), std::move(Err)); - }); + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } // Remove the allocation from the allocation list of its reservation for (auto &Reservation : Reservations) { diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 3842b1a..6a9ef2e 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -741,7 +741,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, assert(!CI2->isZero() && "And zero handled above"); if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) { // If and'ing the address of a global with a constant, fold it. - if (CE1->getOpcode() == Instruction::PtrToInt && + if ((CE1->getOpcode() == Instruction::PtrToInt || + CE1->getOpcode() == Instruction::PtrToAddr) && isa<GlobalValue>(CE1->getOperand(0))) { GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0)); diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp index eef8a21..6b7b4f1 100644 --- a/llvm/lib/ObjCopy/ConfigManager.cpp +++ b/llvm/lib/ObjCopy/ConfigManager.cpp @@ -122,14 +122,14 @@ ConfigManager::getDXContainerConfig() const { if (!Common.AddGnuDebugLink.empty() || !Common.SplitDWO.empty() || !Common.AllocSectionsPrefix.empty() || Common.DiscardMode != DiscardType::None || !Common.AddSection.empty() || - !Common.DumpSection.empty() || !Common.KeepSection.empty() || - !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || - !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() || - Common.ExtractDWO || Common.OnlyKeepDebug || Common.StripAllGNU || - Common.StripDWO || Common.StripDebug || Common.StripNonAlloc || - Common.StripSections || Common.StripUnneeded || - Common.DecompressDebugSections || Common.GapFill != 0 || - Common.PadTo != 0 || Common.ChangeSectionLMAValAll != 0 || + !Common.KeepSection.empty() || !Common.SectionsToRename.empty() || + !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() || + !Common.SetSectionType.empty() || Common.ExtractDWO || + Common.OnlyKeepDebug || Common.StripAllGNU || Common.StripDWO || + Common.StripDebug || Common.StripNonAlloc || Common.StripSections || + Common.StripUnneeded || Common.DecompressDebugSections || + Common.GapFill != 0 || Common.PadTo != 0 || + Common.ChangeSectionLMAValAll != 0 || !Common.ChangeSectionAddress.empty()) { return createStringError(llvm::errc::invalid_argument, "option is not supported for DXContainer"); diff --git a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp index d7f3c0d..95ab3d9 100644 --- a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp +++ b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp @@ -9,8 +9,10 @@ #include "llvm/ObjCopy/DXContainer/DXContainerObjcopy.h" #include "DXContainerReader.h" #include "DXContainerWriter.h" +#include "llvm/BinaryFormat/DXContainer.h" #include "llvm/ObjCopy/CommonConfig.h" #include "llvm/ObjCopy/DXContainer/DXContainerConfig.h" +#include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -42,7 +44,47 @@ static Error extractPartAsObject(StringRef PartName, StringRef OutFilename, "part '%s' not found", PartName.str().c_str()); } +static Error dumpPartToFile(StringRef PartName, StringRef Filename, + StringRef InputFilename, Object &Obj) { + auto PartIter = llvm::find_if( + Obj.Parts, [&PartName](const Part &P) { return P.Name == PartName; }); + if (PartIter == Obj.Parts.end()) + return createFileError(Filename, + std::make_error_code(std::errc::invalid_argument), + "part '%s' not found", PartName.str().c_str()); + ArrayRef<uint8_t> Contents = PartIter->Data; + // The DXContainer format is a bit odd because the part-specific headers are + // contained inside the part data itself. For parts that contain LLVM bitcode + // when we dump the part we want to skip the part-specific header so that we + // get a valid .bc file that we can inspect. All the data contained inside the + // program header is pulled out of the bitcode, so the header can be + // reconstructed if needed from the bitcode itself. More comprehensive + // documentation on the DXContainer format can be found at + // https://llvm.org/docs/DirectX/DXContainer.html. + + if (PartName == "DXIL" || PartName == "STAT") + Contents = Contents.drop_front(sizeof(llvm::dxbc::ProgramHeader)); + if (Contents.empty()) + return createFileError(Filename, object_error::parse_failed, + "part '%s' is empty", PartName.str().c_str()); + Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Filename, Contents.size()); + if (!BufferOrErr) + return createFileError(Filename, BufferOrErr.takeError()); + std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); + llvm::copy(Contents, Buf->getBufferStart()); + if (Error E = Buf->commit()) + return createFileError(Filename, std::move(E)); + return Error::success(); +} + static Error handleArgs(const CommonConfig &Config, Object &Obj) { + for (StringRef Flag : Config.DumpSection) { + auto [SecName, FileName] = Flag.split("="); + if (Error E = dumpPartToFile(SecName, FileName, Config.InputFilename, Obj)) + return E; + } + // Extract all sections before any modifications. for (StringRef Flag : Config.ExtractSection) { StringRef SectionName; diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 53699ce0..f256e7b 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -837,7 +837,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, Version = Data.getU8(Cur); if (!Cur) break; - if (Version < 2 || Version > 3) + if (Version < 2 || Version > 4) return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " + Twine(static_cast<int>(Version))); Feature = Data.getU8(Cur); // Feature byte @@ -852,6 +852,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, "callsite offsets feature is enabled: version = " + Twine(static_cast<int>(Version)) + " feature = " + Twine(static_cast<int>(Feature))); + if (FeatEnable.BBHash && Version < 4) + return createError("version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when " + "basic block hash feature is enabled: version = " + + Twine(static_cast<int>(Version)) + + " feature = " + Twine(static_cast<int>(Feature))); uint32_t NumBlocksInBBRange = 0; uint32_t NumBBRanges = 1; typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0; @@ -907,6 +912,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) + LastCallsiteEndOffset; uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr); + uint64_t Hash = FeatEnable.BBHash ? Data.getU64(Cur) : 0; Expected<BBAddrMap::BBEntry::Metadata> MetadataOrErr = BBAddrMap::BBEntry::Metadata::decode(MD); if (!MetadataOrErr) { @@ -914,7 +920,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, break; } BBEntries.push_back({ID, Offset + PrevBBEndOffset, Size, - *MetadataOrErr, CallsiteEndOffsets}); + *MetadataOrErr, CallsiteEndOffsets, Hash}); PrevBBEndOffset += Offset + Size; } TotalNumBlocks += BBEntries.size(); diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index faeeab3..8b75fbe 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1465,7 +1465,7 @@ void ELFState<ELFT>::writeSectionContent( for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) { // Write version and feature values. if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) { - if (E.Version > 3) + if (E.Version > 4) WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: " << static_cast<int>(E.Version) << "; encoding using the most recent version"; @@ -1526,6 +1526,12 @@ void ELFState<ELFT>::writeSectionContent( } SHeader.sh_size += CBA.writeULEB128(BBE.Size); SHeader.sh_size += CBA.writeULEB128(BBE.Metadata); + if (FeatureOrErr->BBHash || BBE.Hash.has_value()) { + uint64_t Hash = + BBE.Hash.has_value() ? BBE.Hash.value() : llvm::yaml::Hex64(0); + CBA.write<uint64_t>(Hash, ELFT::Endianness); + SHeader.sh_size += 8; + } } } if (!PGOAnalyses) diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index d9cce1e..421d6603 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1887,6 +1887,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping( IO.mapRequired("Size", E.Size); IO.mapRequired("Metadata", E.Metadata); IO.mapOptional("CallsiteEndOffsets", E.CallsiteEndOffsets); + IO.mapOptional("Hash", E.Hash); } void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping( diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp index 804ff07..41f5187 100644 --- a/llvm/lib/Support/TextEncoding.cpp +++ b/llvm/lib/Support/TextEncoding.cpp @@ -54,9 +54,9 @@ static std::optional<TextEncoding> getKnownEncoding(StringRef Name) { return std::nullopt; } -LLVM_ATTRIBUTE_UNUSED static void -HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, - SmallVectorImpl<char> &Result) { +[[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output, + size_t &OutputLength, + SmallVectorImpl<char> &Result) { // No space left in output buffer. Double the size of the underlying // memory in the SmallVectorImpl, adjust pointer and length and continue // the conversion. diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp index 8d66348..6f8e091 100644 --- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp +++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp @@ -476,7 +476,7 @@ nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) { std::min(NormalizedName.size(), UnicodeNameToCodepointLargestNameSize) + 1; - LLVM_ATTRIBUTE_UNUSED static std::size_t Rows = + [[maybe_unused]] static std::size_t Rows = UnicodeNameToCodepointLargestNameSize + 1; std::vector<char> Distances( diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 2c3870c..636d4f8a 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -8217,6 +8217,8 @@ bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) { Spec = AArch64::S_GOTPCREL; else if (Identifier == "plt") Spec = AArch64::S_PLT; + else if (Identifier == "funcinit") + Spec = AArch64::S_FUNCINIT; } if (Spec == AArch64::S_None) return Error(Loc, "invalid relocation specifier"); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index a388216..892b8da 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -232,6 +232,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(const MCFixup &Fixup, } if (RefKind == AArch64::S_AUTH || RefKind == AArch64::S_AUTHADDR) return ELF::R_AARCH64_AUTH_ABS64; + if (RefKind == AArch64::S_FUNCINIT) + return ELF::R_AARCH64_FUNCINIT64; return ELF::R_AARCH64_ABS64; } case AArch64::fixup_aarch64_add_imm12: diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 2b5cf34..bc090c6 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -40,6 +40,7 @@ const MCAsmInfo::AtSpecifier ELFAtSpecifiers[] = { {AArch64::S_GOT, "GOT"}, {AArch64::S_GOTPCREL, "GOTPCREL"}, {AArch64::S_PLT, "PLT"}, + {AArch64::S_FUNCINIT, "FUNCINIT"}, }; const MCAsmInfo::AtSpecifier MachOAtSpecifiers[] = { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 0dfa61b..f2acff5 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -164,6 +164,7 @@ enum { // ELF relocation specifiers in data directives: S_PLT = 0x400, S_GOTPCREL, + S_FUNCINIT, // Mach-O @ relocation specifiers: S_MACHO_GOT, diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index dd6fa16..d71f728 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -130,6 +130,12 @@ SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI) if (auto *CalledFunction = CB.getCalledFunction()) CalledFn = SMEAttrs(*CalledFunction, TLI); + // An `invoke` of an agnostic ZA function may not return normally (it may + // resume in an exception block). In this case, it acts like a private ZA + // callee and may require a ZA save to be set up before it is called. + if (isa<InvokeInst>(CB)) + CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false); + // FIXME: We probably should not allow SME attributes on direct calls but // clang duplicates streaming mode attributes at each callsite. assert((IsIndirect || diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 80e985d..a2841c11 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18168,7 +18168,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { return CacheLineAlign; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static bool isCopyFromRegOfInlineAsm(const SDNode *N) { assert(N->getOpcode() == ISD::CopyFromReg); do { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index a44a247..d516330 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -865,22 +865,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC_LO) { - if (AMDGPU::SReg_32RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC_LO) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } @@ -898,22 +892,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC) { - if (AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp index 39e651d..8945ec3 100644 --- a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp +++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp @@ -166,7 +166,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, } // TODO -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static DecodeStatus DecodesFPR128RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index ca81d30..8ace2d2 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/MD5.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <cstdint> #include <optional> using namespace llvm; @@ -193,7 +194,12 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) { dxbc::PSV::v2::ResourceBindInfo BindInfo; BindInfo.Type = Type; BindInfo.LowerBound = Binding.LowerBound; - BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1; + assert(Binding.Size == UINT32_MAX || + (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX && + "Resource range is too large"); + BindInfo.UpperBound = (Binding.Size == UINT32_MAX) + ? UINT32_MAX + : Binding.LowerBound + Binding.Size - 1; BindInfo.Space = Binding.Space; BindInfo.Kind = static_cast<dxbc::PSV::ResourceKind>(Kind); BindInfo.Flags = Flags; diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 974f653..5f180d6 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -667,11 +667,10 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable); } -LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily. - static DecodeStatus - DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const MCDisassembler *Decoder) { +[[maybe_unused]] // Suppress warning temporarily. +static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const MCDisassembler *Decoder) { static const MCPhysReg HvxVQRDecoderTable[] = { Hexagon::VQ0, Hexagon::VQ1, Hexagon::VQ2, Hexagon::VQ3, Hexagon::VQ4, Hexagon::VQ5, Hexagon::VQ6, Hexagon::VQ7}; diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index 5dde47a..a3296e0 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -419,8 +419,8 @@ namespace { using HCE = HexagonConstExtenders; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const OffsetRange &OR) { if (OR.Min > OR.Max) OS << '!'; OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align) @@ -435,8 +435,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &P) { if (P.Rs.Reg != 0) OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub); else @@ -451,8 +451,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintExpr &P) { OS << "## " << (P.Ex.Neg ? "- " : "+ "); if (P.Ex.Rs.Reg != 0) OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub); @@ -469,15 +469,15 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintInit &P) { OS << '[' << P.ExtI.first << ", " << PrintExpr(P.ExtI.second, P.HRI) << ']'; return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtDesc &ED) { assert(ED.OpNum != -1u); const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent(); const MachineFunction &MF = *MBB.getParent(); @@ -493,8 +493,8 @@ namespace { return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtRoot &ER) { switch (ER.Kind) { case MachineOperand::MO_Immediate: OS << "imm:" << ER.V.ImmVal; @@ -527,8 +527,8 @@ namespace { return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtValue &EV) { OS << HCE::ExtRoot(EV) << " off:" << EV.Offset; return OS; } @@ -540,8 +540,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintIMap &P) { OS << "{\n"; for (const std::pair<const HCE::ExtenderInit, HCE::IndexList> &Q : P.IMap) { OS << " " << PrintInit(Q.first, P.HRI) << " -> {"; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 4d96cfa..c7a4f68 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -789,7 +789,7 @@ struct ShuffleMask { } }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) { SM.print(OS); return OS; diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 87d052b..e4c0a16 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -364,7 +364,7 @@ private: const HexagonVectorCombine &HVC; }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n'; OS << "Addr: " << *AI.Addr << '\n'; @@ -375,7 +375,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no"); OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n'; @@ -394,7 +394,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan::Block &B) { OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "; @@ -408,7 +408,7 @@ raw_ostream &operator<<(raw_ostream &OS, return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n'; for (const AlignVectors::ByteSpan::Block &B : BS) diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index fa8ae60..2ff5843 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -111,7 +111,7 @@ namespace { friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { const ChainOfDependences &CD = D.Chain; int ChainSize = CD.size(); @@ -144,7 +144,7 @@ namespace { bool isDefined() { return Inst2Replace != nullptr; } }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { OS << "** ReuseValue ***\n"; OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index ca98269..e3094b4 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -275,7 +275,7 @@ namespace HexagonII { INST_ICLASS_ALU32_3 = 0xf0000000 }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] static unsigned getMemAccessSizeInBytes(MemAccessSize S) { switch (S) { case ByteAccess: return 1; diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 96ad5c6..0a8838c 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -156,13 +156,13 @@ FunctionPass *llvm::createRISCVVLOptimizerPass() { return new RISCVVLOptimizer(); } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) { OI.print(OS); return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static raw_ostream &operator<<(raw_ostream &OS, const std::optional<OperandInfo> &OI) { if (OI) diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index a0cf881..5a06ea3 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -24,6 +24,7 @@ class SystemZTargetMachine; namespace SystemZ { // Condition-code mask values. +const unsigned CCMASK_NONE = 0; const unsigned CCMASK_0 = 1 << 3; const unsigned CCMASK_1 = 1 << 2; const unsigned CCMASK_2 = 1 << 1; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3b7d11a..de28faf 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -15,6 +15,7 @@ #include "SystemZConstantPoolValue.h" #include "SystemZMachineFunctionInfo.h" #include "SystemZTargetMachine.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -24,6 +25,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -1514,6 +1516,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { default: break; } + } else if (Constraint.size() == 5 && Constraint.starts_with("{")) { + if (StringRef("{@cc}").compare(Constraint) == 0) + return C_Other; } return TargetLowering::getConstraintType(Constraint); } @@ -1707,6 +1712,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, SystemZMC::VR128Regs, 32); } + if (Constraint[1] == '@') { + if (StringRef("{@cc}").compare(Constraint) == 0) + return std::make_pair(0u, &SystemZ::GR32BitRegClass); + } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } @@ -1737,6 +1746,38 @@ Register SystemZTargetLowering::getExceptionSelectorRegister( return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; } +// Convert condition code in CCReg to an i32 value. +static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { + SDLoc DL(CCReg); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); + return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); +} + +// Lower @cc targets via setcc. +SDValue SystemZTargetLowering::LowerAsmOutputForConstraint( + SDValue &Chain, SDValue &Glue, const SDLoc &DL, + const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { + if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0) + return SDValue(); + + // Check that return type is valid. + if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() || + OpInfo.ConstraintVT.getSizeInBits() < 8) + report_fatal_error("Glue output operand is of invalid type"); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.addLiveIn(SystemZ::CC); + + if (Glue.getNode()) { + Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue); + Chain = Glue.getValue(1); + } else + Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32); + return getCCResult(DAG, Glue); +} + void SystemZTargetLowering::LowerAsmOperandForConstraint( SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { @@ -5300,14 +5341,6 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, Node->getMemoryVT(), Node->getMemOperand()); } -// Convert condition code in CCReg to an i32 value. -static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { - SDLoc DL(CCReg); - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); - return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); -} - SDValue SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { @@ -8723,95 +8756,247 @@ SDValue SystemZTargetLowering::combineSETCC( return SDValue(); } -static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { +static std::pair<SDValue, int> findCCUse(const SDValue &Val) { + switch (Val.getOpcode()) { + default: + return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); + case SystemZISD::IPM: + if (Val.getOperand(0).getOpcode() == SystemZISD::CLC || + Val.getOperand(0).getOpcode() == SystemZISD::STRCMP) + return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP); + return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY); + case SystemZISD::SELECT_CCMASK: { + SDValue Op4CCReg = Val.getOperand(4); + if (Op4CCReg.getOpcode() == SystemZISD::ICMP || + Op4CCReg.getOpcode() == SystemZISD::TM) { + auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0)); + if (OpCC != SDValue()) + return std::make_pair(OpCC, OpCCValid); + } + auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2)); + if (!CCValid) + return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); + int CCValidVal = CCValid->getZExtValue(); + return std::make_pair(Op4CCReg, CCValidVal); + } + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0)); + if (Op0CC != SDValue()) + return std::make_pair(Op0CC, Op0CCValid); + return findCCUse(Val.getOperand(1)); + } +} + +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, + SelectionDAG &DAG); + +SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC, + SelectionDAG &DAG) { + SDLoc DL(Val); + auto Opcode = Val.getOpcode(); + switch (Opcode) { + default: + return {}; + case ISD::Constant: + return {Val, Val, Val, Val}; + case SystemZISD::IPM: { + SDValue IPMOp0 = Val.getOperand(0); + if (IPMOp0 != CC) + return {}; + SmallVector<SDValue, 4> ShiftedCCVals; + for (auto CC : {0, 1, 2, 3}) + ShiftedCCVals.emplace_back( + DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32)); + return ShiftedCCVals; + } + case SystemZISD::SELECT_CCMASK: { + SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1); + auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2)); + auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3)); + if (!CCValid || !CCMask) + return {}; + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG); + const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG); + if (TrueSDVals.empty() || FalseSDVals.empty()) + return {}; + SDValue Op4CCReg = Val.getOperand(4); + if (Op4CCReg != CC) + combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG); + if (Op4CCReg != CC) + return {}; + SmallVector<SDValue, 4> MergedSDVals; + for (auto &CCVal : {0, 1, 2, 3}) + MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0) + ? TrueSDVals[CCVal] + : FalseSDVals[CCVal]); + return MergedSDVals; + } + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SRA: + // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA + // would clobber CC). + if (!Val.hasOneUse()) + return {}; + [[fallthrough]]; + case ISD::SHL: + case ISD::SRL: + SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1); + const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) + return {}; + SmallVector<SDValue, 4> BinaryOpSDVals; + for (auto CCVal : {0, 1, 2, 3}) + BinaryOpSDVals.emplace_back(DAG.getNode( + Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal])); + return BinaryOpSDVals; + } +} + +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, + SelectionDAG &DAG) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, - // If the CCReg instruction is itself a ICMP testing the condition + // If the CCReg instruction is itself a ICMP / TM testing the condition // code set by some other instruction, see whether we can directly // use that condition code. - - // Verify that we have an ICMP against some constant. - if (CCValid != SystemZ::CCMASK_ICMP) - return false; - auto *ICmp = CCReg.getNode(); - if (ICmp->getOpcode() != SystemZISD::ICMP) - return false; - auto *CompareLHS = ICmp->getOperand(0).getNode(); - auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); - if (!CompareRHS) + auto *CCNode = CCReg.getNode(); + if (!CCNode) return false; - // Optimize the case where CompareLHS is a SELECT_CCMASK. - if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { - // Verify that we have an appropriate mask for a EQ or NE comparison. - bool Invert = false; - if (CCMask == SystemZ::CCMASK_CMP_NE) - Invert = !Invert; - else if (CCMask != SystemZ::CCMASK_CMP_EQ) + if (CCNode->getOpcode() == SystemZISD::TM) { + if (CCValid != SystemZ::CCMASK_TM) return false; - - // Verify that the ICMP compares against one of select values. - auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0)); - if (!TrueVal) - return false; - auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); - if (!FalseVal) + auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) { + auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode()); + auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode()); + if (!Op0Node || !Op1Node) + return -1; + auto Op0APVal = Op0Node->getAPIntValue(); + auto Op1APVal = Op1Node->getAPIntValue(); + auto Result = Op0APVal & Op1APVal; + bool AllOnes = Result == Op1APVal; + bool AllZeros = Result == 0; + bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0; + return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1; + }; + SDValue Op0 = CCNode->getOperand(0); + SDValue Op1 = CCNode->getOperand(1); + auto [Op0CC, Op0CCValid] = findCCUse(Op0); + if (Op0CC == SDValue()) return false; - if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue()) - Invert = !Invert; - else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue()) + const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) return false; - - // Compute the effective CC mask for the new branch or select. - auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2)); - auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3)); - if (!NewCCValid || !NewCCMask) - return false; - CCValid = NewCCValid->getZExtValue(); - CCMask = NewCCMask->getZExtValue(); - if (Invert) - CCMask ^= CCValid; - - // Return the updated CCReg link. - CCReg = CompareLHS->getOperand(4); + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]); + if (CCVal < 0) + return false; + NewCCMask <<= 1; + NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0; + } + NewCCMask &= Op0CCValid; + CCReg = Op0CC; + CCMask = NewCCMask; + CCValid = Op0CCValid; return true; } + if (CCNode->getOpcode() != SystemZISD::ICMP || + CCValid != SystemZ::CCMASK_ICMP) + return false; - // Optimize the case where CompareRHS is (SRA (SHL (IPM))). - if (CompareLHS->getOpcode() == ISD::SRA) { - auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); - if (!SRACount || SRACount->getZExtValue() != 30) - return false; - auto *SHL = CompareLHS->getOperand(0).getNode(); - if (SHL->getOpcode() != ISD::SHL) - return false; - auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1)); - if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) - return false; - auto *IPM = SHL->getOperand(0).getNode(); - if (IPM->getOpcode() != SystemZISD::IPM) - return false; - - // Avoid introducing CC spills (because SRA would clobber CC). - if (!CompareLHS->hasOneUse()) - return false; - // Verify that the ICMP compares against zero. - if (CompareRHS->getZExtValue() != 0) + SDValue CmpOp0 = CCNode->getOperand(0); + SDValue CmpOp1 = CCNode->getOperand(1); + SDValue CmpOp2 = CCNode->getOperand(2); + auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0); + if (Op0CC != SDValue()) { + const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) return false; - // Compute the effective CC mask for the new branch or select. - CCMask = SystemZ::reverseCCMask(CCMask); - - // Return the updated CCReg link. - CCReg = IPM->getOperand(0); + auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2); + auto CmpTypeVal = CmpType->getZExtValue(); + const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val, + const SDValue &Op1Val) { + auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode()); + auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode()); + if (!Op0Node || !Op1Node) + return -1; + auto Op0APVal = Op0Node->getAPIntValue(); + auto Op1APVal = Op1Node->getAPIntValue(); + if (CmpTypeVal == SystemZICMP::SignedOnly) + return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2; + return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2; + }; + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]); + if (CCVal < 0) + return false; + NewCCMask <<= 1; + NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0; + } + NewCCMask &= Op0CCValid; + CCMask = NewCCMask; + CCReg = Op0CC; + CCValid = Op0CCValid; return true; } return false; } -SDValue SystemZTargetLowering::combineBR_CCMASK( - SDNode *N, DAGCombinerInfo &DCI) const { +// Merging versus split in multiple branches cost. +TargetLoweringBase::CondMergingParams +SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, + const Value *Lhs, + const Value *Rhs) const { + const auto isFlagOutOpCC = [](const Value *V) { + using namespace llvm::PatternMatch; + const Value *RHSVal; + const APInt *RHSC; + if (const auto *I = dyn_cast<Instruction>(V)) { + // PatternMatch.h provides concise tree-based pattern match of llvm IR. + if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) || + match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) { + if (const auto *CB = dyn_cast<CallBase>(RHSVal)) { + if (CB->isInlineAsm()) { + const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand()); + return IA && + IA->getConstraintString().find("{@cc}") != std::string::npos; + } + } + } + } + return false; + }; + // Pattern (ICmp %asm) or (ICmp (And %asm)). + // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or + // BaseCost can be set >=2. If cost of instruction <= CostThreshold + // conditionals will be merged or else conditionals will be split. + if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs)) + return {3, 0, -1}; + // Default. + return {-1, -1, -1}; +} + +SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N, + DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. @@ -8824,8 +9009,7 @@ SDValue SystemZTargetLowering::combineBR_CCMASK( int CCMaskVal = CCMask->getZExtValue(); SDValue Chain = N->getOperand(0); SDValue CCReg = N->getOperand(4); - - if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) + if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), @@ -8848,16 +9032,80 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK( int CCMaskVal = CCMask->getZExtValue(); SDValue CCReg = N->getOperand(4); - if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) - return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), - N->getOperand(0), N->getOperand(1), - DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), - CCReg); + bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG); + + // Populate SDVals vector for each condition code ccval for given Val, which + // can again be another nested select_ccmask with the same CC. + const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) { + if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) { + SmallVector<SDValue, 4> Res; + if (Val.getOperand(4) != CCReg) + return SmallVector<SDValue, 4>{}; + SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1); + auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3)); + if (!CCMask) + return SmallVector<SDValue, 4>{}; + + int CCMaskVal = CCMask->getZExtValue(); + for (auto &CC : {0, 1, 2, 3}) + Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal + : FalseVal); + return Res; + } + return SmallVector<SDValue, 4>{Val, Val, Val, Val}; + }; + // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either + // with CCReg found by combineCCMask or original CCReg. + SDValue TrueVal = N->getOperand(0); + SDValue FalseVal = N->getOperand(1); + auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG); + auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG); + // TrueSDVals/FalseSDVals might be empty in case of non-constant + // TrueVal/FalseVal for select_ccmask, which can not be optimized further. + if (TrueSDVals.empty()) + TrueSDVals = constructCCSDValsFromSELECT(TrueVal); + if (FalseSDVals.empty()) + FalseSDVals = constructCCSDValsFromSELECT(FalseVal); + if (!TrueSDVals.empty() && !FalseSDVals.empty()) { + SmallSet<SDValue, 4> MergedSDValsSet; + // Ignoring CC values outside CCValiid. + for (auto CC : {0, 1, 2, 3}) { + if ((CCValidVal & ((1 << (3 - CC)))) != 0) + MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0) + ? TrueSDVals[CC] + : FalseSDVals[CC]); + } + if (MergedSDValsSet.size() == 1) + return *MergedSDValsSet.begin(); + if (MergedSDValsSet.size() == 2) { + auto BeginIt = MergedSDValsSet.begin(); + SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt); + if (NewTrueVal == FalseVal || NewFalseVal == TrueVal) + std::swap(NewTrueVal, NewFalseVal); + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + NewCCMask <<= 1; + NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0) + ? (TrueSDVals[CC] == NewTrueVal) + : (FalseSDVals[CC] == NewTrueVal); + } + CCMaskVal = NewCCMask; + CCMaskVal &= CCValidVal; + TrueVal = NewTrueVal; + FalseVal = NewFalseVal; + IsCombinedCCReg = true; + } + } + + if (IsCombinedCCReg) + return DAG.getNode( + SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal, + FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg); + return SDValue(); } - SDValue SystemZTargetLowering::combineGET_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index f8706b7..d5b7603 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -533,6 +533,18 @@ public: } const char *getTargetNodeName(unsigned Opcode) const override; + + // This function currently returns cost for srl/ipm/cc sequence for merging. + CondMergingParams + getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, + const Value *Rhs) const override; + + // Handle Lowering flag assembly outputs. + SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, + const SDLoc &DL, + const AsmOperandInfo &Constraint, + SelectionDAG &DAG) const override; + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 47c24fc..f973949 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -601,6 +601,29 @@ static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineOperand Src = MI.getOperand(3); MachineOperand Len = MI.getOperand(4); + // If the length is a constant, we don't actually need the check. + if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) { + if (Def->getOpcode() == WebAssembly::CONST_I32 || + Def->getOpcode() == WebAssembly::CONST_I64) { + if (Def->getOperand(1).getImm() == 0) { + // A zero-length memcpy is a no-op. + MI.eraseFromParent(); + return BB; + } + // A non-zero-length memcpy doesn't need a zero check. + unsigned MemoryCopy = + Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32; + BuildMI(*BB, MI, DL, TII.get(MemoryCopy)) + .add(DstMem) + .add(SrcMem) + .add(Dst) + .add(Src) + .add(Len); + MI.eraseFromParent(); + return BB; + } + } + // We're going to add an extra use to `Len` to test if it's zero; that // use shouldn't be a kill, even if the original use is. MachineOperand NoKillLen = Len; @@ -669,6 +692,28 @@ static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL, MachineOperand Val = MI.getOperand(2); MachineOperand Len = MI.getOperand(3); + // If the length is a constant, we don't actually need the check. + if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) { + if (Def->getOpcode() == WebAssembly::CONST_I32 || + Def->getOpcode() == WebAssembly::CONST_I64) { + if (Def->getOperand(1).getImm() == 0) { + // A zero-length memset is a no-op. + MI.eraseFromParent(); + return BB; + } + // A non-zero-length memset doesn't need a zero check. + unsigned MemoryFill = + Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32; + BuildMI(*BB, MI, DL, TII.get(MemoryFill)) + .add(Mem) + .add(Dst) + .add(Val) + .add(Len); + MI.eraseFromParent(); + return BB; + } + } + // We're going to add an extra use to `Len` to test if it's zero; that // use shouldn't be a kill, even if the original use is. MachineOperand NoKillLen = Len; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 28fa2cd..b81641f 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -414,6 +414,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); + // fp constants getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 3af8b3e..2bf016a 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1335,9 +1335,8 @@ def ProcessorFeatures { !listconcat(ARLFeatures, ARLSAdditionalFeatures); // Pantherlake - list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI]; list<SubtargetFeature> PTLFeatures = - !listremove(!listconcat(ARLSFeatures, PTLAdditionalFeatures), [FeatureWIDEKL]); + !listremove(ARLSFeatures, [FeatureWIDEKL]); // Clearwaterforest diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eea84a2..c32b1a6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3624,6 +3624,16 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) BaseCost += 1; + + // For OR conditions with EQ comparisons, prefer splitting into branches + // (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops, + // unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed + // comparisons (SLT, SGT) that can be optimized. + if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or && + match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && + match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) + return {-1, -1, -1}; + return {BaseCost, BrMergingLikelyBias.getValue(), BrMergingUnlikelyBias.getValue()}; } @@ -3787,7 +3797,7 @@ static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { /// Return true if every element in Mask, is an in-place blend/select mask or is /// undef. -LLVM_ATTRIBUTE_UNUSED static bool isBlendOrUndef(ArrayRef<int> Mask) { +[[maybe_unused]] static bool isBlendOrUndef(ArrayRef<int> Mask) { unsigned NumElts = Mask.size(); for (auto [I, M] : enumerate(Mask)) if (!isUndefOrEqual(M, I) && !isUndefOrEqual(M, I + NumElts)) @@ -8096,7 +8106,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl, return DstVec; } -LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) { +[[maybe_unused]] static bool isHorizOp(unsigned Opcode) { switch (Opcode) { case X86ISD::PACKSS: case X86ISD::PACKUS: @@ -20813,7 +20823,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // for DAG type consistency we have to match the FP operand type. APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); - LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; + [[maybe_unused]] APFloat::opStatus Status = APFloat::opOK; bool LosesInfo = false; if (TheVT == MVT::f64) // The rounding mode is irrelevant as the conversion should be exact. @@ -22856,7 +22866,7 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, // be generated by the memcmp expansion pass with oversized integer compares // (see PR33325). bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); - if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) + if (isNullConstant(Y) && OpSize == 128 && !IsOrXorXorTreeCCZero) return SDValue(); // Don't perform this combine if constructing the vector will be expensive. diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index edca7c1..1932a3a 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -175,7 +175,7 @@ constexpr FeatureBitset FeaturesArrowlakeS = FeaturesArrowlake | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4; constexpr FeatureBitset FeaturesPantherlake = - (FeaturesArrowlakeS ^ FeatureWIDEKL) | FeaturePREFETCHI; + (FeaturesArrowlakeS ^ FeatureWIDEKL); constexpr FeatureBitset FeaturesClearwaterforest = (FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 6b67b48..09cb225 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2979,10 +2979,14 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op, "Op must be either i1 or vector of i1."); if (SI.getCondition()->getType() != Op->getType()) return nullptr; - if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL)) - return SelectInst::Create(Op, - IsAnd ? V : ConstantInt::getTrue(Op->getType()), - IsAnd ? ConstantInt::getFalse(Op->getType()) : V); + if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL)) { + Instruction *MDFrom = nullptr; + if (!ProfcheckDisableMetadataFixes) + MDFrom = &SI; + return SelectInst::Create( + Op, IsAnd ? V : ConstantInt::getTrue(Op->getType()), + IsAnd ? ConstantInt::getFalse(Op->getType()) : V, "", nullptr, MDFrom); + } return nullptr; } diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index ff5f390..66e45ec 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -266,8 +266,7 @@ void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI, if (!ProfcheckDisableMetadataFixes) BI->setMetadata(LLVMContext::MD_prof, SI->getMetadata(LLVMContext::MD_prof)); - DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, - {DominatorTree::Insert, StartBlock, NewBlock}}); + DTU->applyUpdates({{DominatorTree::Insert, StartBlock, NewBlock}}); } else { BasicBlock *EndBlock = SIUse->getParent(); BasicBlock *NewBlockT = BasicBlock::Create( @@ -1479,10 +1478,13 @@ bool DFAJumpThreading::run(Function &F) { DTU->flush(); #ifdef EXPENSIVE_CHECKS - assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full)); verifyFunction(F, &dbgs()); #endif + if (MadeChanges && VerifyDomInfo) + assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full) && + "Failed to maintain validity of domtree!"); + return MadeChanges; } diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp index 213d0f3..1335665 100644 --- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -39,10 +39,11 @@ public: private: AliasAnalysis *AA; }; +} // namespace /// iterativelyFlattenCFG - Call FlattenCFG on all the blocks in the function, /// iterating until no more changes are made. -bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { +static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { bool Changed = false; bool LocalChange = true; @@ -67,7 +68,6 @@ bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { } return Changed; } -} // namespace char FlattenCFGLegacyPass::ID = 0; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index b9d332b..578fec7 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -118,9 +118,13 @@ STATISTIC( STATISTIC(NumDeleted, "Number of instructions deleted"); STATISTIC(NumVectorized, "Number of vectorized aggregates"); +namespace llvm { /// Disable running mem2reg during SROA in order to test or debug SROA. static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden); +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // namespace llvm + namespace { class AllocaSliceRewriter; @@ -1777,7 +1781,8 @@ static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, } Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, - LI.getName() + ".sroa.speculated"); + LI.getName() + ".sroa.speculated", + ProfcheckDisableMetadataFixes ? nullptr : &SI); LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n"); LI.replaceAllUsesWith(V); @@ -2662,7 +2667,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); - V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend"); + // No profiling support for vector selects. + V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old, + DEBUG_TYPE, Name + "blend"); LLVM_DEBUG(dbgs() << " blend: " << *V << "\n"); return V; @@ -4360,10 +4367,13 @@ private: }; Value *Cond, *True, *False; + Instruction *MDFrom = nullptr; if (auto *SI = dyn_cast<SelectInst>(Sel)) { Cond = SI->getCondition(); True = SI->getTrueValue(); False = SI->getFalseValue(); + if (!ProfcheckDisableMetadataFixes) + MDFrom = SI; } else { Cond = Sel->getOperand(0); True = ConstantInt::get(Sel->getType(), 1); @@ -4383,8 +4393,12 @@ private: IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(), False->getName() + ".sroa.gep", NW); - Value *NSel = - IRB.CreateSelect(Cond, NTrue, NFalse, Sel->getName() + ".sroa.sel"); + Value *NSel = MDFrom + ? IRB.CreateSelect(Cond, NTrue, NFalse, + Sel->getName() + ".sroa.sel", MDFrom) + : IRB.CreateSelectWithUnknownProfile( + Cond, NTrue, NFalse, DEBUG_TYPE, + Sel->getName() + ".sroa.sel"); Visited.erase(&GEPI); GEPI.replaceAllUsesWith(NSel); GEPI.eraseFromParent(); diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 2ee91a9..0f3978f 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -47,6 +47,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/SSAUpdaterBulk.h" #include <cassert> #include <utility> @@ -321,7 +322,7 @@ class StructurizeCFG { void collectInfos(); - void insertConditions(bool Loops); + void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter); void simplifyConditions(); @@ -671,10 +672,9 @@ void StructurizeCFG::collectInfos() { } /// Insert the missing branch conditions -void StructurizeCFG::insertConditions(bool Loops) { +void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) { BranchVector &Conds = Loops ? LoopConds : Conditions; Value *Default = Loops ? BoolTrue : BoolFalse; - SSAUpdater PhiInserter; for (BranchInst *Term : Conds) { assert(Term->isConditional()); @@ -683,8 +683,9 @@ void StructurizeCFG::insertConditions(bool Loops) { BasicBlock *SuccTrue = Term->getSuccessor(0); BasicBlock *SuccFalse = Term->getSuccessor(1); - PhiInserter.Initialize(Boolean, ""); - PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); + unsigned Variable = PhiInserter.AddVariable("", Boolean); + PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent, + Default); BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; @@ -697,7 +698,7 @@ void StructurizeCFG::insertConditions(bool Loops) { ParentInfo = PI; break; } - PhiInserter.AddAvailableValue(BB, PI.Pred); + PhiInserter.AddAvailableValue(Variable, BB, PI.Pred); Dominator.addAndRememberBlock(BB); } @@ -706,9 +707,9 @@ void StructurizeCFG::insertConditions(bool Loops) { CondBranchWeights::setMetadata(*Term, ParentInfo.Weights); } else { if (!Dominator.resultIsRememberedBlock()) - PhiInserter.AddAvailableValue(Dominator.result(), Default); + PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default); - Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); + PhiInserter.AddUse(Variable, &Term->getOperandUse(0)); } } } @@ -1414,8 +1415,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT, orderNodes(); collectInfos(); createFlow(); - insertConditions(false); - insertConditions(true); + + SSAUpdaterBulk PhiInserter; + insertConditions(false, PhiInserter); + insertConditions(true, PhiInserter); + PhiInserter.RewriteAndOptimizeAllUses(*DT); + setPhiValues(); simplifyHoistedPhis(); simplifyConditions(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8ca3bed..23f5623 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -24,12 +24,9 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H -#include "VPlanAnalysis.h" #include "VPlanValue.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" @@ -41,10 +38,11 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/InstructionCost.h" -#include <algorithm> #include <cassert> #include <cstddef> +#include <functional> #include <string> +#include <utility> namespace llvm { @@ -346,13 +344,6 @@ public: /// Return the cost of the block. virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0; - /// Return true if it is legal to hoist instructions into this block. - bool isLegalToHoistInto() { - // There are currently no constraints that prevent an instruction to be - // hoisted into a VPBlockBase. - return true; - } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printAsOperand(raw_ostream &OS, bool PrintType = false) const { OS << getName(); diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll new file mode 100644 index 0000000..ebab9f0 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s --data-layout="e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64 %s +; RUN: opt < %s --data-layout="e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32 %s + +declare void @useptr(ptr) + +define void @ptrtoaddr(ptr %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) { +; X64-LABEL: 'ptrtoaddr' +; X64-NEXT: Classifying expressions for: @ptrtoaddr +; X64-NEXT: %p0 = ptrtoaddr ptr %in to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: %p1 = ptrtoaddr ptr %in to i32 +; X64-NEXT: --> %p1 U: full-set S: full-set +; X64-NEXT: %p2 = ptrtoaddr ptr %in to i16 +; X64-NEXT: --> %p2 U: full-set S: full-set +; X64-NEXT: %p3 = ptrtoaddr ptr %in to i128 +; X64-NEXT: --> %p3 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr +; +; X32-LABEL: 'ptrtoaddr' +; X32-NEXT: Classifying expressions for: @ptrtoaddr +; X32-NEXT: %p0 = ptrtoaddr ptr %in to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: %p1 = ptrtoaddr ptr %in to i32 +; X32-NEXT: --> %p1 U: full-set S: full-set +; X32-NEXT: %p2 = ptrtoaddr ptr %in to i16 +; X32-NEXT: --> %p2 U: full-set S: full-set +; X32-NEXT: %p3 = ptrtoaddr ptr %in to i128 +; X32-NEXT: --> %p3 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr +; + %p0 = ptrtoaddr ptr %in to i64 + %p1 = ptrtoaddr ptr %in to i32 + %p2 = ptrtoaddr ptr %in to i16 + %p3 = ptrtoaddr ptr %in to i128 + store i64 %p0, ptr %out0 + store i32 %p1, ptr %out1 + store i16 %p2, ptr %out2 + store i128 %p3, ptr %out3 + ret void +} + +define void @ptrtoaddr_as1(ptr addrspace(1) %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) { +; X64-LABEL: 'ptrtoaddr_as1' +; X64-NEXT: Classifying expressions for: @ptrtoaddr_as1 +; X64-NEXT: %p0 = ptrtoaddr ptr addrspace(1) %in to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: %p1 = ptrtoaddr ptr addrspace(1) %in to i32 +; X64-NEXT: --> %p1 U: full-set S: full-set +; X64-NEXT: %p2 = ptrtoaddr ptr addrspace(1) %in to i16 +; X64-NEXT: --> %p2 U: full-set S: full-set +; X64-NEXT: %p3 = ptrtoaddr ptr addrspace(1) %in to i128 +; X64-NEXT: --> %p3 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_as1 +; +; X32-LABEL: 'ptrtoaddr_as1' +; X32-NEXT: Classifying expressions for: @ptrtoaddr_as1 +; X32-NEXT: %p0 = ptrtoaddr ptr addrspace(1) %in to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: %p1 = ptrtoaddr ptr addrspace(1) %in to i32 +; X32-NEXT: --> %p1 U: full-set S: full-set +; X32-NEXT: %p2 = ptrtoaddr ptr addrspace(1) %in to i16 +; X32-NEXT: --> %p2 U: full-set S: full-set +; X32-NEXT: %p3 = ptrtoaddr ptr addrspace(1) %in to i128 +; X32-NEXT: --> %p3 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_as1 +; + %p0 = ptrtoaddr ptr addrspace(1) %in to i64 + %p1 = ptrtoaddr ptr addrspace(1) %in to i32 + %p2 = ptrtoaddr ptr addrspace(1) %in to i16 + %p3 = ptrtoaddr ptr addrspace(1) %in to i128 + store i64 %p0, ptr %out0 + store i32 %p1, ptr %out1 + store i16 %p2, ptr %out2 + store i128 %p3, ptr %out3 + ret void +} + +define void @ptrtoaddr_of_bitcast(ptr %in, ptr %out0) { +; X64-LABEL: 'ptrtoaddr_of_bitcast' +; X64-NEXT: Classifying expressions for: @ptrtoaddr_of_bitcast +; X64-NEXT: %in_casted = bitcast ptr %in to ptr +; X64-NEXT: --> %in U: full-set S: full-set +; X64-NEXT: %p0 = ptrtoaddr ptr %in_casted to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_of_bitcast +; +; X32-LABEL: 'ptrtoaddr_of_bitcast' +; X32-NEXT: Classifying expressions for: @ptrtoaddr_of_bitcast +; X32-NEXT: %in_casted = bitcast ptr %in to ptr +; X32-NEXT: --> %in U: full-set S: full-set +; X32-NEXT: %p0 = ptrtoaddr ptr %in_casted to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_of_bitcast +; + %in_casted = bitcast ptr %in to ptr + %p0 = ptrtoaddr ptr %in_casted to i64 + store i64 %p0, ptr %out0 + ret void +} + +define void @ptrtoaddr_of_nullptr(ptr %out0) { +; ALL-LABEL: 'ptrtoaddr_of_nullptr' +; ALL-NEXT: Classifying expressions for: @ptrtoaddr_of_nullptr +; ALL-NEXT: %p0 = ptrtoaddr ptr null to i64 +; ALL-NEXT: --> %p0 U: full-set S: full-set +; ALL-NEXT: Determining loop execution counts for: @ptrtoaddr_of_nullptr +; + %p0 = ptrtoaddr ptr null to i64 + store i64 %p0, ptr %out0 + ret void +} + +define void @ptrtoaddr_of_gep(ptr %in, ptr %out0) { +; X64-LABEL: 'ptrtoaddr_of_gep' +; X64-NEXT: Classifying expressions for: @ptrtoaddr_of_gep +; X64-NEXT: %in_adj = getelementptr inbounds i8, ptr %in, i64 42 +; X64-NEXT: --> (42 + %in) U: full-set S: full-set +; X64-NEXT: %p0 = ptrtoaddr ptr %in_adj to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_of_gep +; +; X32-LABEL: 'ptrtoaddr_of_gep' +; X32-NEXT: Classifying expressions for: @ptrtoaddr_of_gep +; X32-NEXT: %in_adj = getelementptr inbounds i8, ptr %in, i64 42 +; X32-NEXT: --> (42 + %in) U: full-set S: full-set +; X32-NEXT: %p0 = ptrtoaddr ptr %in_adj to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_of_gep +; + %in_adj = getelementptr inbounds i8, ptr %in, i64 42 + %p0 = ptrtoaddr ptr %in_adj to i64 + store i64 %p0, ptr %out0 + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll index acac2c9..0c1f37b 100644 --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -382,7 +382,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64 ; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i10 = sub i64 %i9, %i4 -; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X64-NEXT: --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -393,7 +393,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) -; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -1 +; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2 ; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) ; X64-NEXT: Loop %bb6: Trip multiple is 1 ; @@ -406,9 +406,9 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: %i8 = load i8, ptr %i7, align 1 ; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64 -; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i10 = sub i64 %i9, %i4 -; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -419,7 +419,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) -; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -1 +; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2 ; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) ; X32-NEXT: Loop %bb6: Trip multiple is 1 ; @@ -459,7 +459,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64 ; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i10 = sub i64 %i9, %i4 -; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X64-NEXT: --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -470,7 +470,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped ; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) -; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -1 +; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2 ; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) ; X64-NEXT: Loop %bb6: Trip multiple is 1 ; @@ -483,9 +483,9 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: %i8 = load i8, ptr %i7, align 1 ; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64 -; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i10 = sub i64 %i9, %i4 -; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -496,7 +496,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped ; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) -; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -1 +; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2 ; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) ; X32-NEXT: Loop %bb6: Trip multiple is 1 ; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll index 7ba422d..a477465c 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll @@ -578,22 +578,22 @@ define void @test_ptr_aligned_by_2_and_4_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptr_aligned_by_2_and_4_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptr_aligned_by_2_and_4_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_2_and_4_via_assumption ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ] @@ -615,9 +615,9 @@ define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 @@ -644,9 +644,9 @@ define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start)<nuw><nsw>,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 @@ -677,22 +677,22 @@ define void @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors(ptr ; CHECK-NEXT: %c = call i1 @cond() ; CHECK-NEXT: --> %c U: full-set S: full-set ; CHECK-NEXT: %iv = phi ptr [ %start, %then ], [ %start, %else ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir new file mode 100644 index 0000000..824ada1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir @@ -0,0 +1,278 @@ +# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -passes="print<gisel-value-tracking>" -filetype=null %s 2>&1 | FileCheck %s + +--- +name: Cst +body: | + bb.1: + ; CHECK-LABEL: name: @Cst + ; CHECK-NEXT: %0:_ KnownBits:00000010 SignBits:6 + ; CHECK-NEXT: %1:_ KnownBits:00011000 SignBits:3 + ; CHECK-NEXT: %2:_ KnownBits:00011010 SignBits:3 + %0:_(s8) = G_CONSTANT i8 2 + %1:_(s8) = G_CONSTANT i8 24 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: CstZero +body: | + bb.1: + ; CHECK-LABEL: name: @CstZero + ; CHECK-NEXT: %0:_ KnownBits:00000001 SignBits:7 + ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:00000000 SignBits:8 + %0:_(s8) = G_CONSTANT i8 1 + %1:_(s8) = G_CONSTANT i8 255 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: CstNegOne +body: | + bb.1: + ; CHECK-LABEL: name: @CstNegOne + ; CHECK-NEXT: %0:_ KnownBits:00000000 SignBits:8 + ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:11111111 SignBits:8 + %0:_(s8) = G_CONSTANT i8 0 + %1:_(s8) = G_CONSTANT i8 255 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: CstSeven +body: | + bb.1: + ; CHECK-LABEL: name: @CstSeven + ; CHECK-NEXT: %0:_ KnownBits:00001000 SignBits:4 + ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:00000111 SignBits:5 + %0:_(s8) = G_CONSTANT i8 8 + %1:_(s8) = G_CONSTANT i8 255 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: CstNeg +body: | + bb.1: + ; CHECK-LABEL: name: @CstNeg + ; CHECK-NEXT: %0:_ KnownBits:11100000 SignBits:3 + ; CHECK-NEXT: %1:_ KnownBits:00000010 SignBits:6 + ; CHECK-NEXT: %2:_ KnownBits:11100010 SignBits:3 + %0:_(s8) = G_CONSTANT i8 224 + %1:_(s8) = G_CONSTANT i8 2 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: ScalarVar +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarVar + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(s8) = COPY $b0 + %1:_(s8) = COPY $b1 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: ScalarRhsEarlyOut +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarRhsEarlyOut + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 3 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: ScalarNonNegative +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarNonNegative + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00001111 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000???? SignBits:4 + ; CHECK-NEXT: %3:_ KnownBits:11111111 SignBits:8 + ; CHECK-NEXT: %4:_ KnownBits:???????? SignBits:4 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 15 + %2:_(s8) = G_AND %0, %1 + %3:_(s8) = G_CONSTANT i8 255 + %4:_(s8) = G_ADD %2, %3 +... +--- +name: ScalarLhsEarlyOut +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarLhsEarlyOut + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 3 + %2:_(s8) = G_ADD %1, %0 +... +--- +name: ScalarPartKnown +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarPartKnown + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00001111 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000???? SignBits:4 + ; CHECK-NEXT: %3:_ KnownBits:00000101 SignBits:5 + ; CHECK-NEXT: %4:_ KnownBits:000????? SignBits:3 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 15 + %2:_(s8) = G_AND %0, %1 + %3:_(s8) = G_CONSTANT i8 5 + %4:_(s8) = G_ADD %2, %3 +... +--- +name: VectorCstZero +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCstZero + ; CHECK-NEXT: %0:_ KnownBits:0000000000000001 SignBits:15 + ; CHECK-NEXT: %1:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000001 SignBits:15 + ; CHECK-NEXT: %3:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %4:_ KnownBits:0000000000000000 SignBits:16 + %0:_(s16) = G_CONSTANT i16 1 + %1:_(s16) = G_CONSTANT i16 65535 + %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0 + %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %4:_(<4 x s16>) = G_ADD %2, %3 +... +--- +name: VectorCstNegOne +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCstNegOne + ; CHECK-NEXT: %0:_ KnownBits:0000000000000000 SignBits:16 + ; CHECK-NEXT: %1:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000000 SignBits:16 + ; CHECK-NEXT: %3:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %4:_ KnownBits:1111111111111111 SignBits:16 + %0:_(s16) = G_CONSTANT i16 0 + %1:_(s16) = G_CONSTANT i16 65535 + %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0 + %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %4:_(<4 x s16>) = G_ADD %2, %3 +... +--- +name: VectorVar +body: | + bb.1: + ; CHECK-LABEL: name: @VectorVar + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s16>) = COPY $d1 + %2:_(<4 x s16>) = G_ADD %0, %1 +... +--- +name: VectorRhsEarlyOut +body: | + bb.1: + ; CHECK-LABEL: name: @VectorRhsEarlyOut + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 3 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_ADD %2, %0 +... +--- +name: VectorNonNegative +body: | + bb.1: + ; CHECK-LABEL: name: @VectorNonNegative + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000011111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:0000000011111111 SignBits:8 + ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %4:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %5:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %6:_ KnownBits:???????????????? SignBits:8 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 255 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_AND %0, %2 + %4:_(s16) = G_CONSTANT i16 65535 + %5:_(<4 x s16>) = G_BUILD_VECTOR %4, %4, %4, %4 + %6:_(<4 x s16>) = G_ADD %3, %5 +... +--- +name: VectorLhsEarlyOut +body: | + bb.1: + ; CHECK-LABEL: name: @VectorLhsEarlyOut + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 3 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_ADD %0, %2 +... +--- +name: VectorPartKnown +body: | + bb.1: + ; CHECK-LABEL: name: @VectorPartKnown + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000011111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:0000000011111111 SignBits:8 + ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %4:_ KnownBits:0000000000101010 SignBits:10 + ; CHECK-NEXT: %5:_ KnownBits:0000000001001010 SignBits:9 + ; CHECK-NEXT: %6:_ KnownBits:000000000??01010 SignBits:9 + ; CHECK-NEXT: %7:_ KnownBits:0000000????????? SignBits:7 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 255 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_AND %0, %2 + %4:_(s16) = G_CONSTANT i16 42 + %5:_(s16) = G_CONSTANT i16 74 + %6:_(<4 x s16>) = G_BUILD_VECTOR %4, %5, %5, %4 + %7:_(<4 x s16>) = G_ADD %6, %3 +... +--- +name: VectorCst36 +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCst36 + ; CHECK-NEXT: %0:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %1:_ KnownBits:0000000000000110 SignBits:13 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000?1? SignBits:13 + ; CHECK-NEXT: %3:_ KnownBits:0000000000000?1? SignBits:13 + ; CHECK-NEXT: %4:_ KnownBits:000000000000???? SignBits:12 + %0:_(s16) = G_CONSTANT i16 3 + %1:_(s16) = G_CONSTANT i16 6 + %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0 + %3:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0 + %4:_(<4 x s16>) = G_ADD %2, %3 +... + +--- +name: VectorCst3unknown +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCst3unknown + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = COPY $h0 + %2:_(s16) = G_CONSTANT i16 3 + %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %2, %2, %1 + %4:_(<4 x s16>) = G_ADD %0, %3 +... diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll index dc88f94..cca190f 100644 --- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll +++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll @@ -1774,3 +1774,88 @@ define i128 @combine_i128_sdiv_const100(i128 %x) { %1 = sdiv i128 %x, 100 ret i128 %1 } + +; The following only becomes an sdiv_by_one after type legalisation, after which +; the splatted scalar constant has a different type to the splat vector. This +; test verifies DAGCombiner does not care about this type difference. +define <16 x i16> @combine_vec_sdiv_by_one_obfuscated(<16 x i16> %x) "target-features"="+sve" { +; CHECK-SD-LABEL: combine_vec_sdiv_by_one_obfuscated: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: combine_vec_sdiv_by_one_obfuscated: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: movi v3.8h, #1 +; CHECK-GI-NEXT: smov w8, v0.h[0] +; CHECK-GI-NEXT: mov v3.h[0], v2.h[0] +; CHECK-GI-NEXT: smov w9, v3.h[0] +; CHECK-GI-NEXT: smov w16, v3.h[7] +; CHECK-GI-NEXT: sdiv w14, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[1] +; CHECK-GI-NEXT: smov w9, v3.h[1] +; CHECK-GI-NEXT: sdiv w15, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[2] +; CHECK-GI-NEXT: smov w9, v3.h[2] +; CHECK-GI-NEXT: sdiv w13, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[3] +; CHECK-GI-NEXT: smov w9, v3.h[3] +; CHECK-GI-NEXT: sdiv w12, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[4] +; CHECK-GI-NEXT: smov w9, v3.h[4] +; CHECK-GI-NEXT: sdiv w11, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[5] +; CHECK-GI-NEXT: smov w9, v3.h[5] +; CHECK-GI-NEXT: sdiv w10, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[6] +; CHECK-GI-NEXT: smov w9, v3.h[6] +; CHECK-GI-NEXT: movi v3.8h, #1 +; CHECK-GI-NEXT: smov w17, v3.h[0] +; CHECK-GI-NEXT: smov w18, v3.h[1] +; CHECK-GI-NEXT: smov w0, v3.h[2] +; CHECK-GI-NEXT: smov w1, v3.h[3] +; CHECK-GI-NEXT: smov w2, v3.h[4] +; CHECK-GI-NEXT: smov w3, v3.h[5] +; CHECK-GI-NEXT: sdiv w8, w8, w9 +; CHECK-GI-NEXT: smov w9, v0.h[7] +; CHECK-GI-NEXT: fmov s0, w14 +; CHECK-GI-NEXT: mov v0.h[1], w15 +; CHECK-GI-NEXT: smov w15, v1.h[6] +; CHECK-GI-NEXT: mov v0.h[2], w13 +; CHECK-GI-NEXT: sdiv w9, w9, w16 +; CHECK-GI-NEXT: smov w16, v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], w12 +; CHECK-GI-NEXT: smov w12, v1.h[7] +; CHECK-GI-NEXT: mov v0.h[4], w11 +; CHECK-GI-NEXT: sdiv w16, w16, w17 +; CHECK-GI-NEXT: smov w17, v1.h[1] +; CHECK-GI-NEXT: mov v0.h[5], w10 +; CHECK-GI-NEXT: mov v0.h[6], w8 +; CHECK-GI-NEXT: sdiv w17, w17, w18 +; CHECK-GI-NEXT: smov w18, v1.h[2] +; CHECK-GI-NEXT: fmov s2, w16 +; CHECK-GI-NEXT: smov w16, v3.h[6] +; CHECK-GI-NEXT: mov v0.h[7], w9 +; CHECK-GI-NEXT: sdiv w18, w18, w0 +; CHECK-GI-NEXT: smov w0, v1.h[3] +; CHECK-GI-NEXT: mov v2.h[1], w17 +; CHECK-GI-NEXT: sdiv w0, w0, w1 +; CHECK-GI-NEXT: smov w1, v1.h[4] +; CHECK-GI-NEXT: mov v2.h[2], w18 +; CHECK-GI-NEXT: sdiv w1, w1, w2 +; CHECK-GI-NEXT: smov w2, v1.h[5] +; CHECK-GI-NEXT: mov v2.h[3], w0 +; CHECK-GI-NEXT: sdiv w14, w2, w3 +; CHECK-GI-NEXT: mov v2.h[4], w1 +; CHECK-GI-NEXT: sdiv w13, w15, w16 +; CHECK-GI-NEXT: smov w15, v3.h[7] +; CHECK-GI-NEXT: mov v2.h[5], w14 +; CHECK-GI-NEXT: sdiv w10, w12, w15 +; CHECK-GI-NEXT: mov v2.h[6], w13 +; CHECK-GI-NEXT: mov v2.h[7], w10 +; CHECK-GI-NEXT: mov v1.16b, v2.16b +; CHECK-GI-NEXT: ret + %zero_and_ones = shufflevector <16 x i16> zeroinitializer, <16 x i16> splat (i16 1), <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> + %div = sdiv <16 x i16> %x, %zero_and_ones + ret <16 x i16> %div +} diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll index b6dee97e..b8d6c88 100644 --- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll +++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll @@ -732,6 +732,247 @@ exit: ret void } +; This example corresponds to: +; +; __arm_agnostic("sme_za_state") void try_catch_agnostic_za_invoke() +; { +; try { +; agnostic_za_call(); +; } catch(...) { +; } +; } +; +; In this example we preserve all SME state enabled by PSTATE.ZA using +; `__arm_sme_save` before agnostic_za_call(). This is because on all normal +; returns from an agnostic ZA function ZA state should be preserved. That means +; we need to make sure ZA state is saved in case agnostic_za_call() throws, and +; we need to restore ZA state after unwinding to the catch block. + +define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personality ptr @__gxx_personality_v0 { +; CHECK-LABEL: try_catch_agnostic_za_invoke: +; CHECK: .Lfunc_begin5: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0 +; CHECK-NEXT: .cfi_lsda 28, .Lexception5 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: bl __arm_sme_state_size +; CHECK-NEXT: sub sp, sp, x0 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .Ltmp15: // EH_LABEL +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __arm_sme_save +; CHECK-NEXT: bl agnostic_za_call +; CHECK-NEXT: .Ltmp16: // EH_LABEL +; CHECK-NEXT: .LBB5_1: // %exit +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __arm_sme_restore +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: // %catch +; CHECK-NEXT: .Ltmp17: // EH_LABEL +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: b .LBB5_1 +; +; CHECK-SDAG-LABEL: try_catch_agnostic_za_invoke: +; CHECK-SDAG: .Lfunc_begin5: +; CHECK-SDAG-NEXT: .cfi_startproc +; CHECK-SDAG-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0 +; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception5 +; CHECK-SDAG-NEXT: // %bb.0: // %entry +; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: mov x29, sp +; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32 +; CHECK-SDAG-NEXT: .cfi_offset w19, -16 +; CHECK-SDAG-NEXT: .cfi_offset w30, -24 +; CHECK-SDAG-NEXT: .cfi_offset w29, -32 +; CHECK-SDAG-NEXT: bl __arm_sme_state_size +; CHECK-SDAG-NEXT: sub sp, sp, x0 +; CHECK-SDAG-NEXT: mov x19, sp +; CHECK-SDAG-NEXT: .Ltmp15: // EH_LABEL +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_save +; CHECK-SDAG-NEXT: bl agnostic_za_call +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_restore +; CHECK-SDAG-NEXT: .Ltmp16: // EH_LABEL +; CHECK-SDAG-NEXT: .LBB5_1: // %exit +; CHECK-SDAG-NEXT: mov sp, x29 +; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-SDAG-NEXT: ret +; CHECK-SDAG-NEXT: .LBB5_2: // %catch +; CHECK-SDAG-NEXT: .Ltmp17: // EH_LABEL +; CHECK-SDAG-NEXT: mov x1, x0 +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_restore +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_save +; CHECK-SDAG-NEXT: mov x0, x1 +; CHECK-SDAG-NEXT: bl __cxa_begin_catch +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_restore +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_save +; CHECK-SDAG-NEXT: bl __cxa_end_catch +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_restore +; CHECK-SDAG-NEXT: b .LBB5_1 +entry: + invoke void @agnostic_za_call() + to label %exit unwind label %catch + +catch: + %eh_info = landingpad { ptr, i32 } + catch ptr null + %exception_ptr = extractvalue { ptr, i32 } %eh_info, 0 + tail call ptr @__cxa_begin_catch(ptr %exception_ptr) + tail call void @__cxa_end_catch() + br label %exit + +exit: + ret void +} + +; This is the same `try_catch_agnostic_za_invoke`, but shows a lazy save would +; also need to be committed in a shared-ZA function calling an agnostic-ZA function. +define void @try_catch_inout_za_agnostic_za_callee() "aarch64_inout_za" personality ptr @__gxx_personality_v0 { +; CHECK-LABEL: try_catch_inout_za_agnostic_za_callee: +; CHECK: .Lfunc_begin6: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0 +; CHECK-NEXT: .cfi_lsda 28, .Lexception6 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: msub x9, x8, x8, x9 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stp x9, x8, [x29, #-16] +; CHECK-NEXT: .Ltmp18: // EH_LABEL +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl agnostic_za_call +; CHECK-NEXT: .Ltmp19: // EH_LABEL +; CHECK-NEXT: .LBB6_1: // %exit +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: cbnz x8, .LBB6_3 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB6_3: // %exit +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_4: // %catch +; CHECK-NEXT: .Ltmp20: // EH_LABEL +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: b .LBB6_1 +; +; CHECK-SDAG-LABEL: try_catch_inout_za_agnostic_za_callee: +; CHECK-SDAG: .Lfunc_begin6: +; CHECK-SDAG-NEXT: .cfi_startproc +; CHECK-SDAG-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0 +; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception6 +; CHECK-SDAG-NEXT: // %bb.0: // %entry +; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: mov x29, sp +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32 +; CHECK-SDAG-NEXT: .cfi_offset w19, -16 +; CHECK-SDAG-NEXT: .cfi_offset w30, -24 +; CHECK-SDAG-NEXT: .cfi_offset w29, -32 +; CHECK-SDAG-NEXT: rdsvl x8, #1 +; CHECK-SDAG-NEXT: mov x9, sp +; CHECK-SDAG-NEXT: msub x9, x8, x8, x9 +; CHECK-SDAG-NEXT: mov sp, x9 +; CHECK-SDAG-NEXT: stp x9, x8, [x29, #-16] +; CHECK-SDAG-NEXT: .Ltmp18: // EH_LABEL +; CHECK-SDAG-NEXT: sub x19, x29, #16 +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19 +; CHECK-SDAG-NEXT: bl agnostic_za_call +; CHECK-SDAG-NEXT: smstart za +; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-SDAG-NEXT: sub x0, x29, #16 +; CHECK-SDAG-NEXT: cbnz x8, .LBB6_2 +; CHECK-SDAG-NEXT: // %bb.1: // %entry +; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore +; CHECK-SDAG-NEXT: .LBB6_2: // %entry +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr +; CHECK-SDAG-NEXT: .Ltmp19: // EH_LABEL +; CHECK-SDAG-NEXT: .LBB6_3: // %exit +; CHECK-SDAG-NEXT: mov sp, x29 +; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-SDAG-NEXT: ret +; CHECK-SDAG-NEXT: .LBB6_4: // %catch +; CHECK-SDAG-NEXT: .Ltmp20: // EH_LABEL +; CHECK-SDAG-NEXT: mov x1, x0 +; CHECK-SDAG-NEXT: smstart za +; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-SDAG-NEXT: sub x0, x29, #16 +; CHECK-SDAG-NEXT: cbnz x8, .LBB6_6 +; CHECK-SDAG-NEXT: // %bb.5: // %catch +; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore +; CHECK-SDAG-NEXT: .LBB6_6: // %catch +; CHECK-SDAG-NEXT: mov x0, x1 +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19 +; CHECK-SDAG-NEXT: bl __cxa_begin_catch +; CHECK-SDAG-NEXT: smstart za +; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-SDAG-NEXT: sub x0, x29, #16 +; CHECK-SDAG-NEXT: cbnz x8, .LBB6_8 +; CHECK-SDAG-NEXT: // %bb.7: // %catch +; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore +; CHECK-SDAG-NEXT: .LBB6_8: // %catch +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19 +; CHECK-SDAG-NEXT: bl __cxa_end_catch +; CHECK-SDAG-NEXT: smstart za +; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-SDAG-NEXT: sub x0, x29, #16 +; CHECK-SDAG-NEXT: cbnz x8, .LBB6_10 +; CHECK-SDAG-NEXT: // %bb.9: // %catch +; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore +; CHECK-SDAG-NEXT: .LBB6_10: // %catch +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr +; CHECK-SDAG-NEXT: b .LBB6_3 +entry: + invoke void @agnostic_za_call() + to label %exit unwind label %catch + +catch: + %eh_info = landingpad { ptr, i32 } + catch ptr null + %exception_ptr = extractvalue { ptr, i32 } %eh_info, 0 + tail call ptr @__cxa_begin_catch(ptr %exception_ptr) + tail call void @__cxa_end_catch() + br label %exit + +exit: + ret void +} + declare ptr @__cxa_allocate_exception(i64) declare void @__cxa_throw(ptr, ptr, ptr) declare ptr @__cxa_begin_catch(ptr) @@ -742,3 +983,4 @@ declare void @may_throw() declare void @shared_za_call() "aarch64_inout_za" declare void @noexcept_shared_za_call() "aarch64_inout_za" declare void @shared_zt0_call() "aarch64_inout_zt0" +declare void @agnostic_za_call() "aarch64_za_state_agnostic" diff --git a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll index bea0310..70224fc 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll @@ -94,6 +94,18 @@ define void @main() #0 { %uav2_2 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f32_1_0( i32 4, i32 0, i32 10, i32 5, ptr null) + + ; RWBuffer<float4> UnboundedArray[] : register(u10, space5) +; CHECK: - Type: UAVTyped +; CHECK: Space: 5 +; CHECK: LowerBound: 10 +; CHECK: UpperBound: 4294967295 +; CHECK: Kind: TypedBuffer +; CHECK: Flags: +; CHECK: UsedByAtomic64: false + ; RWBuffer<float4> Buf = BufferArray[100]; + %uav3 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 5, i32 10, i32 -1, i32 100, ptr null) ret void } diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll index bf31ccb..559cc53 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll @@ -32,6 +32,40 @@ define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v32i8_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + +define void @select_v32i8_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v16i16: ; CHECK: # %bb.0: @@ -49,6 +83,40 @@ define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v16i16_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + +define void @select_v16i16_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v8i32: ; CHECK: # %bb.0: @@ -65,19 +133,70 @@ define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v8i32_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI8_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %sel = select <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i32> %v0, <8 x i32> %v1 + store <8 x i32> %sel, ptr %res + ret void +} + +define void @select_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false>, <8 x float> %v0, <8 x float> %v1 + store <8 x float> %sel, ptr %res + ret void +} + define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI10_0) ; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <4 x i64>, ptr %a0 %v1 = load <4 x i64>, ptr %a1 - %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %v0, <4 x i64> %v1 + %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v0, <4 x i64> %v1 store <4 x i64> %sel, ptr %res ret void } + +define void @select_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI11_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v0, <4 x double> %v1 + store <4 x double> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll index 8f25a6b..25c4f09 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll @@ -16,6 +16,20 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { ret void } +define void @select_v16i8_imm_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: select_v16i8_imm_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, -256 +; CHECK-NEXT: vbitseli.b $vr1, $vr0, 1 +; CHECK-NEXT: vst $vr1, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %v0 + store <16 x i8> %sel, ptr %res + ret void +} + define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v16i8: ; CHECK: # %bb.0: @@ -32,6 +46,40 @@ define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v16i8_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + +define void @select_v16i8_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v8i16: ; CHECK: # %bb.0: @@ -49,6 +97,40 @@ define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v8i16_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + +define void @select_v8i16_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI7_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI7_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v4i32: ; CHECK: # %bb.0: @@ -65,13 +147,47 @@ define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v4i32_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %sel = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> %v0, <4 x i32> %v1 + store <4 x i32> %sel, ptr %res + ret void +} + +define void @select_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI10_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %sel = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %v0, <4 x float> %v1 + store <4 x float> %sel, ptr %res + ret void +} + define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI11_0) ; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret @@ -81,3 +197,20 @@ define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { store <2 x i64> %sel, ptr %res ret void } + +define void @select_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI12_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %sel = select <2 x i1> <i1 false, i1 true>, <2 x double> %v0, <2 x double> %v1 + store <2 x double> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index e71f59c..cad684e 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -325,24 +325,21 @@ define float @sqrt_afn_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_afn_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB11_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB11_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -393,21 +390,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_afn_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB13_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB13_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -462,24 +457,21 @@ define float @sqrt_fast_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_fast_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB15_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB15_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -517,21 +509,19 @@ define float @sqrt_fast_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_fast_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB16_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB16_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x) ret float %rt diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll new file mode 100644 index 0000000..c19e93d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -O1 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s + +define i32 @pr134424(i64 %input_value, i32 %base_value, i1 %cond_flag1, i1 %cond_flag2, i1 %cond_flag3) { +; CHECK-LABEL: pr134424: +; CHECK: # %bb.0: # %for.body.us.preheader.i +; CHECK-NEXT: andi a3, a3, 1 +; CHECK-NEXT: andi a5, a2, 1 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 14 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bnez a5, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %for.body.us.preheader.i +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: .LBB0_2: # %for.body.us.preheader.i +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: andi a4, a4, 1 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: bnez a3, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %for.body.us.preheader.i +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: .LBB0_4: # %for.body.us.preheader.i +; CHECK-NEXT: vmsle.vi v0, v8, 0 +; CHECK-NEXT: sext.w a2, a2 +; CHECK-NEXT: bnez a4, .LBB0_6 +; CHECK-NEXT: # %bb.5: # %for.body.us.preheader.i +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: .LBB0_6: # %for.body.us.preheader.i +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a3, v8 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: bge a3, a2, .LBB0_11 +; CHECK-NEXT: # %bb.7: # %for.body.us.preheader.i +; CHECK-NEXT: bge a0, a1, .LBB0_12 +; CHECK-NEXT: .LBB0_8: # %for.body.us.preheader.i +; CHECK-NEXT: blt a3, a0, .LBB0_10 +; CHECK-NEXT: .LBB0_9: # %for.body.us.preheader.i +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB0_10: # %for.body.us.preheader.i +; CHECK-NEXT: sw a3, 0(zero) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_11: # %for.body.us.preheader.i +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: blt a0, a1, .LBB0_8 +; CHECK-NEXT: .LBB0_12: # %for.body.us.preheader.i +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: bge a3, a0, .LBB0_9 +; CHECK-NEXT: j .LBB0_10 +for.body.us.preheader.i: + %partial_vector = insertelement <4 x i64> zeroinitializer, i64 %input_value, i64 1 + %comparison_vector = shufflevector <4 x i64> %partial_vector, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 1, i32 1> + %comparison_result = icmp sle <4 x i64> %comparison_vector, zeroinitializer + %selected_value1 = select i1 %cond_flag1, i32 %base_value, i32 1 + %selected_value2 = select i1 %cond_flag2, i32 %base_value, i32 1 + %selected_value3 = select i1 %cond_flag3, i32 %base_value, i32 1 + %bool_to_int = zext <4 x i1> %comparison_result to <4 x i32> + %extended_vector = shufflevector <4 x i32> %bool_to_int, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> + %vector_min = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %extended_vector) + %min1 = call i32 @llvm.smin.i32(i32 %vector_min, i32 %selected_value1) + %min2 = call i32 @llvm.smin.i32(i32 %selected_value2, i32 %selected_value3) + %final_min = call i32 @llvm.smin.i32(i32 %min1, i32 %min2) + store i32 %final_min, ptr null, align 4 + ret i32 0 +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir new file mode 100644 index 0000000..aeab8f6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir @@ -0,0 +1,57 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=register-coalescer -o - %s | FileCheck %s + +--- +name: pr71023 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: pr71023 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x10, $v8, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF + ; CHECK-NEXT: undef [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_2:vrn8m1 = PseudoVMV_V_I_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_6:vrn8m1 = COPY undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2 + ; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3 + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3 + ; CHECK-NEXT: PseudoBR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber [[PseudoVMV_V_I_M1_]].sub_vrm1_0:vrn8m1 = PseudoVRGATHER_VI_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_0, [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSSEG6E8_V_M1_MASK [[PseudoVMV_V_I_M1_]].sub_vrm1_0_sub_vrm1_1_sub_vrm1_2_sub_vrm1_3_sub_vrm1_4_sub_vrm1_5, undef [[DEF]], killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1) + ; CHECK-NEXT: PseudoRET + bb.0: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $x10, $v8, $v10 + %0:gpr = IMPLICIT_DEF + %1:vrnov0 = PseudoVMV_V_I_M1 undef %1, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + %2:vrnov0 = IMPLICIT_DEF + undef %3.sub_vrm1_0:vrn6m1nov0 = COPY undef %1 + %3.sub_vrm1_3:vrn6m1nov0 = COPY %2 + %3.sub_vrm1_4:vrn6m1nov0 = COPY undef %1 + BNE undef %0, $x0, %bb.3 + PseudoBR %bb.1 + bb.1: + successors: %bb.3(0x40000000), %bb.2(0x40000000) + BNE killed undef %0, $x0, %bb.3 + PseudoBR %bb.2 + bb.2: + successors: %bb.3(0x80000000) + bb.3: + %4:vr = IMPLICIT_DEF + early-clobber %4:vr = PseudoVRGATHER_VI_M1 undef %4, killed %1, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + undef %5.sub_vrm1_0:vrn6m1 = COPY killed %4 + %5.sub_vrm1_5:vrn6m1 = COPY killed %2 + PseudoVSSEG6E8_V_M1_MASK killed %5, undef %0, killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1) + PseudoRET +... diff --git a/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll b/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll index c6ee804..07fbed9 100644 --- a/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll @@ -90,7 +90,7 @@ define i32 @test_tbegin_nofloat4(i32 %pad, ptr %ptr) { ; CHECK: tbegin 0, 65292 ; CHECK: ipm %r2 ; CHECK: srl %r2, 28 -; CHECK: ciblh %r2, 2, 0(%r14) +; CHECK: bnhr %r14 ; CHECK: mvhi 0(%r3), 0 ; CHECK: br %r14 %res = call i32 @llvm.s390.tbegin.nofloat(ptr null, i32 65292) @@ -219,7 +219,7 @@ define i32 @test_tend2(i32 %pad, ptr %ptr) { ; CHECK: tend ; CHECK: ipm %r2 ; CHECK: srl %r2, 28 -; CHECK: ciblh %r2, 2, 0(%r14) +; CHECK: bnhr %r14 ; CHECK: mvhi 0(%r3), 0 ; CHECK: br %r14 %res = call i32 @llvm.s390.tend() diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll new file mode 100644 index 0000000..6b8746e --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll @@ -0,0 +1,738 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -O2 | FileCheck %s +; Test implementation of combining br_ccmask for flag output operand, and +; optimizing ipm sequence using conditional branches. + +declare void @dummy() + +; Check a case where the cc is used as an integer. +; Just (srl (ipm)) sequence without optimization. +define i32 @test(ptr %a) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ipm %r2 +; CHECK-NEXT: srl %r2, 28 +; CHECK-NEXT: br %r14 + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + ret i32 %cc +} + +; Test-1(f1_0_*). Test all 14 valid combinations, where cc is being used for +; branching. + +; Check (cc == 0). +define void @f1_0_eq_0(ptr %a) { +; CHECK-LABEL: f1_0_eq_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jge dummy@PLT +; CHECK-NEXT: .LBB1_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp eq i32 %cc, 0 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc != 0). +define void @f1_0_ne_0(ptr %a) { +; CHECK-LABEL: f1_0_ne_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgne dummy@PLT +; CHECK-NEXT: .LBB2_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ugt i32 %cc, 0 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 1). +define void @f1_0_eq_1(ptr %a) { +; CHECK-LABEL: f1_0_eq_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgl dummy@PLT +; CHECK-NEXT: .LBB3_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp eq i32 %cc, 1 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc != 1). +define void @f1_0_ne_1(ptr %a) { +; CHECK-LABEL: f1_0_ne_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB4_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ne i32 %cc, 1 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 2). +define void @f1_0_eq_2(ptr %a) { +; CHECK-LABEL: f1_0_eq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgh dummy@PLT +; CHECK-NEXT: .LBB5_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp eq i32 %cc, 2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc != 2). +define void @f1_0_ne_2(ptr %a) { +; CHECK-LABEL: f1_0_ne_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnh dummy@PLT +; CHECK-NEXT: .LBB6_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ne i32 %cc, 2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 3). +define void @f1_0_eq_3(ptr %a) { +; CHECK-LABEL: f1_0_eq_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgo dummy@PLT +; CHECK-NEXT: .LBB7_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp eq i32 %cc, 3 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc != 3). +define void @f1_0_ne_3(ptr %a) { +; CHECK-LABEL: f1_0_ne_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgno dummy@PLT +; CHECK-NEXT: .LBB8_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ult i32 %cc, 3 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 0|1). +define void @f1_0_01(ptr %a) { +; CHECK-LABEL: f1_0_01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgle dummy@PLT +; CHECK-NEXT: .LBB9_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ult i32 %cc, 2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 0|2). +define void @f1_0_02(ptr %a) { +; CHECK-LABEL: f1_0_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jghe dummy@PLT +; CHECK-NEXT: .LBB10_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 0|3). +define void @f1_0_03(ptr %a) { +; CHECK-LABEL: f1_0_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnlh dummy@PLT +; CHECK-NEXT: .LBB11_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp0 = icmp ne i32 %cc, 0 + %cmp3 = icmp ne i32 %cc, 3 + %cmp.inv = and i1 %cmp0, %cmp3 + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 1|2). +define void @f1_0_12(ptr %a) { +; CHECK-LABEL: f1_0_12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jglh dummy@PLT +; CHECK-NEXT: .LBB12_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq1 = icmp eq i32 %cc, 1 + %cmpeq2 = icmp eq i32 %cc, 2 + %cmp = or i1 %cmpeq1, %cmpeq2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 1|3). +define void @f1_0_13(ptr %a) { +; CHECK-LABEL: f1_0_13: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnhe dummy@PLT +; CHECK-NEXT: .LBB13_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq1 = icmp eq i32 %cc, 1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cmp = or i1 %cmpeq1, %cmpeq3 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 2|3). +define void @f1_0_23(ptr %a) { +; CHECK-LABEL: f1_0_23: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnle dummy@PLT +; CHECK-NEXT: .LBB14_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ugt i32 %cc, 1 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Test-2(f1_1_*/f1_2_*/fl_3_*/f1_4_*). +; Test Mixed patterns involving Binary Ops. + +; Check 'add' for (cc != 0). +define void @f1_1_1(ptr %a) { +; CHECK-LABEL: f1_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgne dummy@PLT +; CHECK-NEXT: .LBB15_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cmp = icmp ult i32 %add, 3 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'add' for (cc == 1|2). +define void @f1_1_2(ptr %a) { +; CHECK-LABEL: f1_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jglh dummy@PLT +; CHECK-NEXT: .LBB16_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cmp = icmp ult i32 %add, 2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'add' for (cc == 1|2). +define void @f1_1_3(ptr %a) { +; CHECK-LABEL: f1_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jglh dummy@PLT +; CHECK-NEXT: .LBB17_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cmp.inv = icmp ult i32 %add, -2 + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define void @f1_2_1(ptr %a) { +; CHECK-LABEL: f1_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB18_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cmp.inv = and i1 %cmpne3, %cmpne0 + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define void @f1_2_2(ptr %a) { +; CHECK-LABEL: f1_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnh dummy@PLT +; CHECK-NEXT: .LBB19_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %and.cond.inv = and i1 %ugt1, %cmpne3 + br i1 %and.cond.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'and/tm' for (cc == 0|2). +define void @f1_2_3(ptr %a) { +; CHECK-LABEL: f1_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jghe dummy@PLT +; CHECK-NEXT: .LBB20_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'and/tm' for (cc == 1|3). +define void @f1_2_4(ptr %a) { +; CHECK-LABEL: f1_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnhe dummy@PLT +; CHECK-NEXT: .LBB21_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define void @f1_2_5(ptr %a) { +; CHECK-LABEL: f1_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB22_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cmp = xor i1 %cmpne3, %trunc + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define void @f1_3_1(ptr %a) { +; CHECK-LABEL: f1_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB23_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cmp.inv = xor i1 %cmpne3, %xor + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define void @f1_3_2(ptr %a) { +; CHECK-LABEL: f1_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB24_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cmp.inv = xor i1 %cmpeq3, %trunc + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define void @f1_3_3(ptr %a) { +; CHECK-LABEL: f1_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnh dummy@PLT +; CHECK-NEXT: .LBB25_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cmp.cond.inv = xor i1 %cmpne0, %trunc + br i1 %cmp.cond.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'or' with both operands are select_ccmask one with TM and other with +; ICMP(cc == 1). +define void @f1_4_1(ptr %a) { +; CHECK-LABEL: f1_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgl dummy@PLT +; CHECK-NEXT: .LBB26_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cmp.cond.inv = or i1 %cmpeq3, %cmpeq0 + br i1 %cmp.cond.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'or' for (cc == 0|1). +define void @f1_4_2(ptr %a) { +; CHECK-LABEL: f1_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgle dummy@PLT +; CHECK-NEXT: .LBB27_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cmp.inv = icmp samesign ugt i32 %or, -3 + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'or' for (cc == 0|1). +define void @f1_4_3(ptr %a) { +; CHECK-LABEL: f1_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgle dummy@PLT +; CHECK-NEXT: .LBB28_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cmp = icmp samesign ult i32 %or, -2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll new file mode 100644 index 0000000..b9b9a4b --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll @@ -0,0 +1,1665 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -O2 | FileCheck %s +; Test implementation of combining select_ccmask for flag output operand and +; optimizing ipm sequence using conditional branches. + +; Test-1(f2_0_*): Both TrueVal and FalseVal non-const(14-valid CCMask). + +; Check (cc == 0). +define i64 @f2_0_eq_0(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_eq_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ber %r14 +; CHECK-NEXT: .LBB0_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp eq i32 %cc, 0 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc != 0). +define i64 @f2_0_ne_0(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_ne_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB1_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ugt i32 %cc, 0 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 1). +define i64 @f2_0_eq_1(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_eq_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB2_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp eq i32 %cc, 1 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc != 1). +define i64 @f2_0_ne_1(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_ne_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB3_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ne i32 %cc, 1 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 2). +define i64 @f2_0_eq_2(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_eq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB4_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp eq i32 %cc, 2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc != 2). +define i64 @f2_0_ne_2(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_ne_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnhr %r14 +; CHECK-NEXT: .LBB5_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ne i32 %cc, 2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 3). +define i64 @f2_0_eq_3(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_eq_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bor %r14 +; CHECK-NEXT: .LBB6_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp eq i32 %cc, 3 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc != 3). +define i64 @f2_0_ne_3(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_ne_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnor %r14 +; CHECK-NEXT: .LBB7_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ult i32 %cc, 3 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 0|1). +define i64 @f2_0_01(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB8_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ult i32 %cc, 2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 0|2). +define i64 @f2_0_02(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB9_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 0|3). +define i64 @f2_0_03(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_0_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB10_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cmp0 = icmp ne i32 %cc, 0 + %cmp3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmp0, %cmp3 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check (cc == 1|2). +define i64 @f2_0_12(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_0_12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB11_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check (cc == 1|3). +define i64 @f2_0_13(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_0_13: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB12_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check (cc == 2|3). +define i64 @f2_0_23(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_23: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB13_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ugt i32 %cc, 1 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Test-2(f2_1_*/f2_2_*/f2_3_*/f2_4_*). +; Both TrueVal and FalseVal are non-const with mixed patterns involving +; Binary Ops. + +; Check 'add' for (cc != 0). +define i64 @f2_1_1(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB14_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 3 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f2_1_2(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB15_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f2_1_3(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB16_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define i64 @f2_2_1(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB17_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmpne3, %cmpne0 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define i64 @f2_2_2(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB18_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %ugt1, %cmpne3 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'and/tm' for (cc == 0|2). +define i64 @f2_2_3(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB19_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check 'and/tm' for (cc == 1|3). +define i64 @f2_2_4(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB20_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define i64 @f2_2_5(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB21_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond = xor i1 %cmpne3, %trunc + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define i64 @f2_3_1(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB22_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = xor i1 %cmpne3, %xor + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define i64 @f2_3_2(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB23_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = xor i1 %cmpeq3, %trunc + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define i64 @f2_3_3(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB24_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cond.inv = xor i1 %cmpne0, %trunc + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1). +define i64 @f2_4_1(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB25_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = or i1 %cmpeq3, %cmpeq0 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f2_4_2(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB26_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond.inv = icmp samesign ugt i32 %or, -3 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f2_4_3(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB27_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond = icmp samesign ult i32 %or, -2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Test-3(f3_1_*/f3_2_*/f3_3_*/f3_4_*). +; TrueVal is non-const and FalseVal is const with mixed patterns involving +; Binary Ops. + +; Check 'add' for (cc != 0). +define i64 @f3_1_1(i64 %x, ptr %a) { +; CHECK-LABEL: f3_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB28_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 3 + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f3_1_2(i64 %x, ptr %a) { +; CHECK-LABEL: f3_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB29_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 2 + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f3_1_3(ptr %a, i64 %x) { +; CHECK-LABEL: f3_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB30_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define i64 @f3_2_1(ptr %a, i64 %x) { +; CHECK-LABEL: f3_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB31_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmpne3, %cmpne0 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define i64 @f3_2_2(ptr %a, i64 %x) { +; CHECK-LABEL: f3_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB32_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %ugt1, %cmpne3 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'and/tm' for (cc == 0|2). +define i64 @f3_2_3(i64 %x, ptr %a) { +; CHECK-LABEL: f3_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB33_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + +; Check 'and/tm' for (cc == 1|3). +define i64 @f3_2_4(ptr %a, i64 %x) { +; CHECK-LABEL: f3_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB34_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define i64 @f3_2_5(i64 %x, ptr %a) { +; CHECK-LABEL: f3_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB35_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond = xor i1 %cmpne3, %trunc + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define i64 @f3_3_1(ptr %a, i64 %x) { +; CHECK-LABEL: f3_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB36_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = xor i1 %cmpne3, %xor + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define i64 @f3_3_2(ptr %a, i64 %x) { +; CHECK-LABEL: f3_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB37_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = xor i1 %cmpeq3, %trunc + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define i64 @f3_3_3(ptr %a, i64 %x) { +; CHECK-LABEL: f3_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB38_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cond.inv = xor i1 %cmpne0, %trunc + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1). +define i64 @f3_4_1(ptr %a, i64 %x) { +; CHECK-LABEL: f3_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB39_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = or i1 %cmpeq3, %cmpeq0 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f3_4_2(ptr %a, i64 %x) { +; CHECK-LABEL: f3_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB40_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond.inv = icmp samesign ugt i32 %or, -3 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f3_4_3(i64 %x, ptr %a) { +; CHECK-LABEL: f3_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB41_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond = icmp samesign ult i32 %or, -2 + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + + +; Test-4(f4_1_*/f4_2_*/f4_3_*/f4_4_*). +; TrueVal is const and FalseVal is non-const with mixed patterns involving +; Binary Ops. + +; Check 'add' for (cc != 0). +define i64 @f4_1_1(ptr %a, i64 %y) { +; CHECK-LABEL: f4_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB42_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 3 + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f4_1_2(ptr %a, i64 %y) { +; CHECK-LABEL: f4_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB43_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 2 + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f4_1_3(i64 %y, ptr %a) { +; CHECK-LABEL: f4_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB44_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define i64 @f4_2_1(i64 %y, ptr %a) { +; CHECK-LABEL: f4_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB45_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmpne3, %cmpne0 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define i64 @f4_2_2(i64 %y, ptr %a) { +; CHECK-LABEL: f4_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB46_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %ugt1, %cmpne3 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'and/tm' for (cc == 0|2). +define i64 @f4_2_3(ptr %a, i64 %y) { +; CHECK-LABEL: f4_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB47_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + +; Check 'and/tm' for (cc == 1|3). +define i64 @f4_2_4(i64 %y, ptr %a) { +; CHECK-LABEL: f4_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB48_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define i64 @f4_2_5(ptr %a, i64 %y) { +; CHECK-LABEL: f4_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB49_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond = xor i1 %cmpne3, %trunc + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define i64 @f4_3_1(i64 %y, ptr %a) { +; CHECK-LABEL: f4_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB50_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = xor i1 %cmpne3, %xor + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define i64 @f4_3_2(i64 %y, ptr %a) { +; CHECK-LABEL: f4_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB51_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = xor i1 %cmpeq3, %trunc + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define i64 @f4_3_3(i64 %y, ptr %a) { +; CHECK-LABEL: f4_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB52_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cond.inv = xor i1 %cmpne0, %trunc + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1). +define i64 @f4_4_1(i64 %y,ptr %a) { +; CHECK-LABEL: f4_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB53_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = or i1 %cmpeq3, %cmpeq0 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f4_4_2(i64 %y, ptr %a) { +; CHECK-LABEL: f4_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB54_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond.inv = icmp samesign ugt i32 %or, -3 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f4_4_3(ptr %a, i64 %y) { +; CHECK-LABEL: f4_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB55_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond = icmp samesign ult i32 %or, -2 + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + +; Test-5(f5_1_*/f5_2_*/f5_3_*/f5_4_*). +; Both TrueVal and FalseVal are const with mixed patterns involving +; Binary Ops. + + +; Check 'add' for (cc != 0). +define i64 @f5_1_1(ptr %a) { +; CHECK-LABEL: f5_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB56_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 3 + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f5_1_2(ptr %a) { +; CHECK-LABEL: f5_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB57_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 2 + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f5_1_3(ptr %a) { +; CHECK-LABEL: f5_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB58_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define i64 @f5_2_1(ptr %a) { +; CHECK-LABEL: f5_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB59_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmpne3, %cmpne0 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define i64 @f5_2_2(ptr %a) { +; CHECK-LABEL: f5_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB60_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %ugt1, %cmpne3 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'and/tm' for (cc == 0|2). +define i64 @f5_2_3(ptr %a) { +; CHECK-LABEL: f5_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB61_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + +; Check 'and/tm' for (cc == 1|3). +define i64 @f5_2_4(ptr %a) { +; CHECK-LABEL: f5_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB62_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define i64 @f5_2_5(ptr %a) { +; CHECK-LABEL: f5_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB63_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond = xor i1 %cmpne3, %trunc + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define i64 @f5_3_1(ptr %a) { +; CHECK-LABEL: f5_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB64_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = xor i1 %cmpne3, %xor + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define i64 @f5_3_2(ptr %a) { +; CHECK-LABEL: f5_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB65_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = xor i1 %cmpeq3, %trunc + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define i64 @f5_3_3(ptr %a) { +; CHECK-LABEL: f5_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB66_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cond.inv = xor i1 %cmpne0, %trunc + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1). +define i64 @f5_4_1(ptr %a) { +; CHECK-LABEL: f5_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB67_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = or i1 %cmpeq3, %cmpeq0 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f5_4_2(ptr %a) { +; CHECK-LABEL: f5_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB68_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond.inv = icmp samesign ugt i32 %or, -3 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f5_4_3(ptr %a) { +; CHECK-LABEL: f5_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB69_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond = icmp samesign ult i32 %or, -2 + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + +; Nested select_ccmask with TrueVal and FalseVal swapped with each other. +define i64 @f6_1(ptr %a) { +; CHECK-LABEL: f6_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB70_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %select = select i1 %cmpeq3, i64 5, i64 15 + %res = select i1 %cmpeq0, i64 %select, i64 5 + ret i64 %res +} + diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll index ae170d7..d949068 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll @@ -104,6 +104,31 @@ define void @memset_i32(ptr %dest, i8 %val, i32 %len) { ret void } +; CHECK-LABEL: memcpy_0: +; CHECK-NEXT: .functype memcpy_0 (i32, i32) -> () +; CHECK-NEXT: return +define void @memcpy_0(ptr %dest, ptr %src) { + call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0) + ret void +} + +; CHECK-LABEL: memmove_0: +; CHECK-NEXT: .functype memmove_0 (i32, i32) -> () +; CHECK-NEXT: return +define void @memmove_0(ptr %dest, ptr %src) { + call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0) + ret void +} + +; CHECK-LABEL: memset_0: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_0 (i32, i32) -> () +; BULK-MEM-NEXT: return +define void @memset_0(ptr %dest, i8 %val) { + call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 0, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1: ; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> () ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) @@ -137,14 +162,8 @@ define void @memset_1(ptr %dest, i8 %val) { ; CHECK-LABEL: memcpy_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> () -; BULK-MEM-NEXT: block ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] -; BULK-MEM-NEXT: br_if 0, $pop[[L1]] -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memcpy_1024(ptr %dest, ptr %src) { call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0) @@ -154,14 +173,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memmove_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> () -; BULK-MEM-NEXT: block ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] -; BULK-MEM-NEXT: br_if 0, $pop[[L1]] -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memmove_1024(ptr %dest, ptr %src) { call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0) @@ -171,14 +184,8 @@ define void @memmove_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memset_1024: ; NO-BULK-MEM-NOT: memory.fill ; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> () -; BULK-MEM-NEXT: block ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] -; BULK-MEM-NEXT: br_if 0, $pop[[L1]] -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L2]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memset_1024(ptr %dest, i8 %val) { call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 1024, i1 0) @@ -201,17 +208,11 @@ define void @memset_1024(ptr %dest, i8 %val) { ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_src(ptr %dst) { %a = alloca [100 x i8] @@ -224,17 +225,11 @@ define void @memcpy_alloca_src(ptr %dst) { ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_dst(ptr %src) { %a = alloca [100 x i8] @@ -247,17 +242,11 @@ define void @memcpy_alloca_dst(ptr %src) { ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i32.sub $1=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i32.eqz $push[[L3:[0-9]+]]=, $pop[[L2]] -; BULK-MEM-NEXT: br_if 0, $pop[[L3]] -; BULK-MEM-NEXT: i32.const $push[[L4:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i32.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]] -; BULK-MEM-NEXT: i32.const $push[[L6:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memset_alloca(i8 %val) { %a = alloca [100 x i8] diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll index 0cf8493..d0206a3 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll @@ -110,6 +110,31 @@ define void @memset_i32(ptr %dest, i8 %val, i64 %len) { ret void } +; CHECK-LABEL: memcpy_0: +; CHECK-NEXT: .functype memcpy_0 (i64, i64) -> () +; CHECK-NEXT: return +define void @memcpy_0(ptr %dest, ptr %src) { + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0) + ret void +} + +; CHECK-LABEL: memmove_0: +; CHECK-NEXT: .functype memmove_0 (i64, i64) -> () +; CHECK-NEXT: return +define void @memmove_0(ptr %dest, ptr %src) { + call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0) + ret void +} + +; CHECK-LABEL: memset_0: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_0 (i64, i32) -> () +; BULK-MEM-NEXT: return +define void @memset_0(ptr %dest, i8 %val) { + call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 0, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1: ; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> () ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) @@ -143,14 +168,8 @@ define void @memset_1(ptr %dest, i8 %val) { ; CHECK-LABEL: memcpy_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> () -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] -; BULK-MEM-NEXT: br_if 0, $pop0 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block ; BULK-MEM-NEXT: return define void @memcpy_1024(ptr %dest, ptr %src) { call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0) @@ -160,14 +179,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memmove_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> () -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] -; BULK-MEM-NEXT: br_if 0, $pop0 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block ; BULK-MEM-NEXT: return define void @memmove_1024(ptr %dest, ptr %src) { call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0) @@ -177,14 +190,8 @@ define void @memmove_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memset_1024: ; NO-BULK-MEM-NOT: memory.fill ; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> () -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] -; BULK-MEM-NEXT: br_if 0, $pop0 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 ; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block ; BULK-MEM-NEXT: return define void @memset_1024(ptr %dest, i8 %val) { call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 1024, i1 0) @@ -207,17 +214,11 @@ define void @memset_1024(ptr %dest, i8 %val) { ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> () ; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_src(ptr %dst) { %a = alloca [100 x i8] @@ -230,17 +231,11 @@ define void @memcpy_alloca_src(ptr %dst) { ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> () ; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_dst(ptr %src) { %a = alloca [100 x i8] @@ -253,17 +248,11 @@ define void @memcpy_alloca_dst(ptr %src) { ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i64.sub $1=, $pop[[L1]], $pop[[L0]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L2:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i64.eqz $push[[L3:[0-9]+]]=, $pop[[L2]] -; BULK-MEM-NEXT: br_if 0, $pop[[L3]] -; BULK-MEM-NEXT: i64.const $push[[L4:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i64.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]] -; BULK-MEM-NEXT: i64.const $push[[L6:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memset_alloca(i8 %val) { %a = alloca [100 x i8] diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll index 28b4541..7bdc4e1 100644 --- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll +++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll @@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: callq __ubyte_convert_to_ctype ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_6 +; CHECK-NEXT: js LBB0_4 ; CHECK-NEXT: ## %bb.1: ## %cond_next.i ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: movq %rbx, %rdi @@ -53,84 +53,81 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: sarl $31, %ecx ; CHECK-NEXT: andl %eax, %ecx ; CHECK-NEXT: cmpl $-2, %ecx -; CHECK-NEXT: je LBB0_10 +; CHECK-NEXT: je LBB0_8 ; CHECK-NEXT: ## %bb.2: ## %cond_next.i ; CHECK-NEXT: cmpl $-1, %ecx -; CHECK-NEXT: jne LBB0_3 -; CHECK-NEXT: LBB0_8: ## %bb4 +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: LBB0_3: ## %bb4 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 16(%rax), %rax -; CHECK-NEXT: jmp LBB0_9 -; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: jmp LBB0_10 +; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit ; CHECK-NEXT: cmpl $-2, %eax -; CHECK-NEXT: je LBB0_10 -; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit -; CHECK-NEXT: cmpl $-1, %eax ; CHECK-NEXT: je LBB0_8 -; CHECK-NEXT: LBB0_3: ## %bb35 +; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: je LBB0_3 +; CHECK-NEXT: LBB0_6: ## %bb35 ; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %r14 ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *216(%rax) ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_4 -; CHECK-NEXT: ## %bb.12: ## %cond_false.i -; CHECK-NEXT: setne %dil +; CHECK-NEXT: je LBB0_11 +; CHECK-NEXT: ## %bb.7: ## %cond_false.i ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi ; CHECK-NEXT: movzbl %sil, %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movl %eax, %r15d ; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: testb %dil, %al -; CHECK-NEXT: jne LBB0_5 -; CHECK-NEXT: LBB0_13: ## %cond_true.i200 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: jne LBB0_15 -; CHECK-NEXT: ## %bb.14: ## %cond_true14.i -; CHECK-NEXT: movl $4, %edi -; CHECK-NEXT: callq _feraiseexcept -; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: jmp LBB0_16 -; CHECK-NEXT: LBB0_10: ## %bb17 +; CHECK-NEXT: jne LBB0_12 +; CHECK-NEXT: jmp LBB0_14 +; CHECK-NEXT: LBB0_8: ## %bb17 ; CHECK-NEXT: callq _PyErr_Occurred ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jne LBB0_23 -; CHECK-NEXT: ## %bb.11: ## %cond_next +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.9: ## %cond_next ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 80(%rax), %rax -; CHECK-NEXT: LBB0_9: ## %bb4 +; CHECK-NEXT: LBB0_10: ## %bb4 ; CHECK-NEXT: movq 96(%rax), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: movq %rbx, %rsi ; CHECK-NEXT: callq *40(%rax) -; CHECK-NEXT: jmp LBB0_24 -; CHECK-NEXT: LBB0_4: ## %cond_true.i +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_11: ## %cond_true.i ; CHECK-NEXT: movl $4, %edi ; CHECK-NEXT: callq _feraiseexcept ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; CHECK-NEXT: xorl %r15d, %r15d ; CHECK-NEXT: testb %sil, %sil -; CHECK-NEXT: sete %al +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: LBB0_12: ## %cond_false.i ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: sete %cl -; CHECK-NEXT: xorl %r15d, %r15d -; CHECK-NEXT: orb %al, %cl -; CHECK-NEXT: jne LBB0_13 -; CHECK-NEXT: LBB0_5: ## %cond_next17.i +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: ## %bb.13: ## %cond_next17.i ; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movzbl %ah, %ebx -; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: jmp LBB0_18 +; CHECK-NEXT: LBB0_14: ## %cond_true.i200 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: jne LBB0_17 +; CHECK-NEXT: ## %bb.16: ## %cond_true14.i +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: callq _feraiseexcept +; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *224(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je LBB0_19 -; CHECK-NEXT: ## %bb.17: ## %cond_true61 +; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: ## %bb.19: ## %cond_true61 ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq _.str5@GOTPCREL(%rip), %rdi @@ -139,8 +136,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: callq *200(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_23 -; CHECK-NEXT: ## %bb.18: ## %cond_next73 +; CHECK-NEXT: js LBB0_27 +; CHECK-NEXT: ## %bb.20: ## %cond_next73 ; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi @@ -149,13 +146,13 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: movl %ebp, %edx ; CHECK-NEXT: callq *232(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne LBB0_23 -; CHECK-NEXT: LBB0_19: ## %cond_next89 +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: LBB0_21: ## %cond_next89 ; CHECK-NEXT: movl $2, %edi ; CHECK-NEXT: callq _PyTuple_New ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_23 -; CHECK-NEXT: ## %bb.20: ## %cond_next97 +; CHECK-NEXT: je LBB0_27 +; CHECK-NEXT: ## %bb.22: ## %cond_next97 ; CHECK-NEXT: movq %rax, %r14 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %r12 ; CHECK-NEXT: movq (%r12), %rax @@ -163,8 +160,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.25: ## %cond_next135 +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.23: ## %cond_next135 ; CHECK-NEXT: movb %r15b, 16(%rax) ; CHECK-NEXT: movq %rax, 24(%r14) ; CHECK-NEXT: movq (%r12), %rax @@ -172,22 +169,22 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.26: ## %cond_next182 +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.24: ## %cond_next182 ; CHECK-NEXT: movb %bl, 16(%rax) ; CHECK-NEXT: movq %rax, 32(%r14) ; CHECK-NEXT: movq %r14, %rax -; CHECK-NEXT: jmp LBB0_24 -; CHECK-NEXT: LBB0_21: ## %cond_true113 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_25: ## %cond_true113 ; CHECK-NEXT: decq (%r14) -; CHECK-NEXT: jne LBB0_23 -; CHECK-NEXT: ## %bb.22: ## %cond_true126 +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.26: ## %cond_true126 ; CHECK-NEXT: movq 8(%r14), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: callq *48(%rax) -; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock ; CHECK-NEXT: addq $32, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll index df04b67..c2b7068 100644 --- a/llvm/test/CodeGen/X86/isel-fpclass.ll +++ b/llvm/test/CodeGen/X86/isel-fpclass.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86,X86-SDAGISEL ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL ; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL ; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL -; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X86 -; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X64,X64-GISEL +; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=1 | FileCheck %s -check-prefixes=X86,X86-GISEL +; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=1 | FileCheck %s -check-prefixes=X64-GISEL define i1 @isnone_f(float %x) nounwind { ; X86-LABEL: isnone_f: @@ -23,6 +23,11 @@ define i1 @isnone_f(float %x) nounwind { ; X86-FASTISEL-NEXT: fstp %st(0) ; X86-FASTISEL-NEXT: xorl %eax, %eax ; X86-FASTISEL-NEXT: retl +; +; X64-GISEL-LABEL: isnone_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %eax, %eax +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0) ret i1 %0 @@ -45,22 +50,27 @@ define i1 @isany_f(float %x) nounwind { ; X86-FASTISEL-NEXT: fstp %st(0) ; X86-FASTISEL-NEXT: movb $1, %al ; X86-FASTISEL-NEXT: retl +; +; X64-GISEL-LABEL: isany_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movb $1, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023) ret i1 %0 } define i1 @issignaling_f(float %x) nounwind { -; X86-LABEL: issignaling_f: -; X86: # %bb.0: -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X86-NEXT: setl %cl -; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; X86-NEXT: setge %al -; X86-NEXT: andb %cl, %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: issignaling_f: +; X86-SDAGISEL: # %bb.0: +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %cl +; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: andb %cl, %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: issignaling_f: ; X64: # %bb.0: @@ -87,18 +97,44 @@ define i1 @issignaling_f(float %x) nounwind { ; X86-FASTISEL-NEXT: andb %cl, %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: issignaling_f: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %dl +; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-GISEL-NEXT: setb %al +; X86-GISEL-NEXT: andb %dl, %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: issignaling_f: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %dl +; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GISEL-NEXT: setb %al +; X64-GISEL-NEXT: andb %dl, %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan" ret i1 %a0 } define i1 @isquiet_f(float %x) nounwind { -; X86-LABEL: isquiet_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X86-NEXT: setge %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: isquiet_f: ; X64: # %bb.0: # %entry @@ -119,19 +155,39 @@ define i1 @issignaling_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setge %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: isquiet_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-GISEL-NEXT: setae %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: isquiet_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GISEL-NEXT: setae %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan" ret i1 %0 } define i1 @not_isquiet_f(float %x) nounwind { -; X86-LABEL: not_isquiet_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X86-NEXT: setl %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: not_isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: not_isquiet_f: ; X64: # %bb.0: # %entry @@ -152,19 +208,57 @@ define i1 @not_isquiet_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setl %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: not_isquiet_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %dl +; X86-GISEL-NEXT: orb %cl, %dl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %cl +; X86-GISEL-NEXT: orb %dl, %cl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %dl +; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-GISEL-NEXT: setb %al +; X86-GISEL-NEXT: andb %dl, %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: not_isquiet_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %dl +; X64-GISEL-NEXT: orb %cl, %dl +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %cl +; X64-GISEL-NEXT: orb %dl, %cl +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %dl +; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GISEL-NEXT: setb %al +; X64-GISEL-NEXT: andb %dl, %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan" ret i1 %0 } define i1 @isinf_f(float %x) nounwind { -; X86-LABEL: isinf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: isinf_f: ; X64: # %bb.0: # %entry @@ -185,19 +279,39 @@ define i1 @isinf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: sete %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: isinf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: isinf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" ret i1 %0 } define i1 @not_isinf_f(float %x) nounwind { -; X86-LABEL: not_isinf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: setne %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: not_isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: not_isinf_f: ; X64: # %bb.0: # %entry @@ -218,17 +332,43 @@ define i1 @not_isinf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setne %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: not_isinf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %dl +; X86-GISEL-NEXT: orb %cl, %dl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %al +; X86-GISEL-NEXT: orb %dl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: not_isinf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %dl +; X64-GISEL-NEXT: orb %cl, %dl +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %al +; X64-GISEL-NEXT: orb %dl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf" ret i1 %0 } define i1 @is_plus_inf_f(float %x) nounwind { -; X86-LABEL: is_plus_inf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: is_plus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: is_plus_inf_f: ; X64: # %bb.0: # %entry @@ -246,17 +386,34 @@ define i1 @is_plus_inf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: sete %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: is_plus_inf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: is_plus_inf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" ret i1 %0 } define i1 @is_minus_inf_f(float %x) nounwind { -; X86-LABEL: is_minus_inf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: is_minus_inf_f: ; X64: # %bb.0: # %entry @@ -274,17 +431,34 @@ define i1 @is_minus_inf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: sete %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: is_minus_inf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: is_minus_inf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" ret i1 %0 } define i1 @not_is_minus_inf_f(float %x) nounwind { -; X86-LABEL: not_is_minus_inf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 -; X86-NEXT: setne %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: not_is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: not_is_minus_inf_f: ; X64: # %bb.0: # %entry @@ -302,19 +476,55 @@ define i1 @not_is_minus_inf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setne %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: not_is_minus_inf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: pushl %ebx +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: movl %eax, %ecx +; X86-GISEL-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %edx, %edx +; X86-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %bl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %ah +; X86-GISEL-NEXT: orb %dl, %ah +; X86-GISEL-NEXT: orb %bl, %ah +; X86-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %al +; X86-GISEL-NEXT: orb %ah, %al +; X86-GISEL-NEXT: popl %ebx +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: not_is_minus_inf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: movl %eax, %ecx +; X64-GISEL-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %edx, %edx +; X64-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %sil +; X64-GISEL-NEXT: orb %dl, %sil +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %dl +; X64-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %al +; X64-GISEL-NEXT: orb %dl, %al +; X64-GISEL-NEXT: orb %sil, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf" ret i1 %0 } define i1 @isfinite_f(float %x) nounwind { -; X86-LABEL: isfinite_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: setl %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: isfinite_f: ; X64: # %bb.0: # %entry @@ -335,19 +545,39 @@ define i1 @isfinite_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setl %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: isfinite_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: isfinite_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" ret i1 %0 } define i1 @not_isfinite_f(float %x) nounwind { -; X86-LABEL: not_isfinite_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: setge %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: not_isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: not_isfinite_f: ; X64: # %bb.0: # %entry @@ -368,17 +598,43 @@ define i1 @not_isfinite_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setge %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: not_isfinite_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %dl +; X86-GISEL-NEXT: orb %cl, %dl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %al +; X86-GISEL-NEXT: orb %dl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: not_isfinite_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %dl +; X64-GISEL-NEXT: orb %cl, %dl +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %al +; X64-GISEL-NEXT: orb %dl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite" ret i1 %0 } define i1 @is_plus_finite_f(float %x) nounwind { -; X86-LABEL: is_plus_finite_f: -; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; X86-NEXT: setb %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: is_plus_finite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setb %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: is_plus_finite_f: ; X64: # %bb.0: # %entry @@ -396,6 +652,23 @@ define i1 @is_plus_finite_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setb %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: is_plus_finite_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: is_plus_finite_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" ret i1 %0 @@ -418,6 +691,11 @@ define i1 @isnone_d(double %x) nounwind { ; X86-FASTISEL-NEXT: fstp %st(0) ; X86-FASTISEL-NEXT: xorl %eax, %eax ; X86-FASTISEL-NEXT: retl +; +; X64-GISEL-LABEL: isnone_d: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %eax, %eax +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0) ret i1 %0 @@ -440,6 +718,11 @@ define i1 @isany_d(double %x) nounwind { ; X86-FASTISEL-NEXT: fstp %st(0) ; X86-FASTISEL-NEXT: movb $1, %al ; X86-FASTISEL-NEXT: retl +; +; X64-GISEL-LABEL: isany_d: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movb $1, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023) ret i1 %0 diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 4cde581..caec02e 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -4765,6 +4765,66 @@ define void @scaleidx_scatter_outofrange(<8 x float> %value, ptr %base, <8 x i32 } declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>) +define <16 x i32> @pr163023_sext(ptr %a0, <16 x i32> %a1) { +; X64-LABEL: pr163023_sext: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 +; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1} +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 +; X64-NEXT: retq +; +; X86-LABEL: pr163023_sext: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kxnorw %k0, %k0, %k1 +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl + %addr.p = ptrtoint ptr %a0 to i64 + %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0 + %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer + %ofs = sext <16 x i32> %a1 to <16 x i64> + %addr = add nuw <16 x i64> %addr.splat, %ofs + %ptr = inttoptr <16 x i64> %addr to <16 x ptr> + %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) + ret <16 x i32> %gather +} + +define <16 x i32> @pr163023_zext(ptr %a0, <16 x i32> %a1) { +; X64-LABEL: pr163023_zext: +; X64: # %bb.0: +; X64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; X64-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-NEXT: kxnorw %k0, %k0, %k1 +; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-NEXT: kxnorw %k0, %k0, %k2 +; X64-NEXT: vpgatherqd (%rdi,%zmm0), %ymm3 {%k2} +; X64-NEXT: vpgatherqd (%rdi,%zmm1), %ymm2 {%k1} +; X64-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; X64-NEXT: retq +; +; X86-LABEL: pr163023_zext: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kxnorw %k0, %k0, %k1 +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl + %addr.p = ptrtoint ptr %a0 to i64 + %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0 + %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer + %ofs = zext <16 x i32> %a1 to <16 x i64> + %addr = add nuw <16 x i64> %addr.splat, %ofs + %ptr = inttoptr <16 x i64> %addr to <16 x ptr> + %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) + ret <16 x i32> %gather +} + ; ; PR45906 ; This used to cause fast-isel to generate bad copy instructions that would diff --git a/llvm/test/CodeGen/X86/pr160612.ll b/llvm/test/CodeGen/X86/pr160612.ll new file mode 100644 index 0000000..6572c42 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr160612.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s + +; Test for issue #160612: OR conditions in branches should use multiple branches +; instead of materializing booleans with SETCC when no special optimizations apply. + +declare void @subroutine_foo() +declare void @subroutine_bar() + +; Original issue: (x == 0 || y == 0) was generating SETCC + TEST + BRANCH +; instead of using two conditional branches directly. +define void @func_a(i32 noundef %x, i32 noundef %y) { +; CHECK-LABEL: func_a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: jne subroutine_bar@PLT # TAILCALL +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: jmp subroutine_foo@PLT # TAILCALL +entry: + %cmp = icmp eq i32 %x, 0 + %cmp1 = icmp eq i32 %y, 0 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.else + +if.then: + tail call void @subroutine_foo() + br label %if.end + +if.else: + tail call void @subroutine_bar() + br label %if.end + +if.end: + ret void +} + +; Reference implementation that already generated optimal code. +; This should continue to generate the same optimal code. +define void @func_b(i32 noundef %x, i32 noundef %y) { +; CHECK-LABEL: func_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.2: # %if.else3 +; CHECK-NEXT: jmp subroutine_bar@PLT # TAILCALL +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + tail call void @subroutine_foo() + br label %if.end4 + +if.else: + %cmp1 = icmp eq i32 %y, 0 + br i1 %cmp1, label %if.then2, label %if.else3 + +if.then2: + tail call void @subroutine_foo() + br label %if.end4 + +if.else3: + tail call void @subroutine_bar() + br label %if.end4 + +if.end4: + ret void +} diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll index 69abf6e..d018c53 100644 --- a/llvm/test/CodeGen/X86/setcc-wide-types.ll +++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll @@ -1493,15 +1493,23 @@ define i1 @allbits_i128_load_arg(ptr %w) { } define i1 @anybits_i256_load_arg(ptr %w) { -; ANY-LABEL: anybits_i256_load_arg: -; ANY: # %bb.0: -; ANY-NEXT: movq (%rdi), %rax -; ANY-NEXT: movq 8(%rdi), %rcx -; ANY-NEXT: orq 24(%rdi), %rcx -; ANY-NEXT: orq 16(%rdi), %rax -; ANY-NEXT: orq %rcx, %rax -; ANY-NEXT: setne %al -; ANY-NEXT: retq +; SSE-LABEL: anybits_i256_load_arg: +; SSE: # %bb.0: +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: orq 24(%rdi), %rcx +; SSE-NEXT: orq 16(%rdi), %rax +; SSE-NEXT: orq %rcx, %rax +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVXANY-LABEL: anybits_i256_load_arg: +; AVXANY: # %bb.0: +; AVXANY-NEXT: vmovdqu (%rdi), %ymm0 +; AVXANY-NEXT: vptest %ymm0, %ymm0 +; AVXANY-NEXT: setne %al +; AVXANY-NEXT: vzeroupper +; AVXANY-NEXT: retq %ld = load i256, ptr %w %cmp = icmp ne i256 %ld, 0 ret i1 %cmp @@ -1552,21 +1560,30 @@ define i1 @allbits_i256_load_arg(ptr %w) { } define i1 @anybits_i512_load_arg(ptr %w) { -; ANY-LABEL: anybits_i512_load_arg: -; ANY: # %bb.0: -; ANY-NEXT: movq 16(%rdi), %rax -; ANY-NEXT: movq (%rdi), %rcx -; ANY-NEXT: movq 8(%rdi), %rdx -; ANY-NEXT: movq 24(%rdi), %rsi -; ANY-NEXT: orq 56(%rdi), %rsi -; ANY-NEXT: orq 40(%rdi), %rdx -; ANY-NEXT: orq %rsi, %rdx -; ANY-NEXT: orq 48(%rdi), %rax -; ANY-NEXT: orq 32(%rdi), %rcx -; ANY-NEXT: orq %rax, %rcx -; ANY-NEXT: orq %rdx, %rcx -; ANY-NEXT: setne %al -; ANY-NEXT: retq +; NO512-LABEL: anybits_i512_load_arg: +; NO512: # %bb.0: +; NO512-NEXT: movq 16(%rdi), %rax +; NO512-NEXT: movq (%rdi), %rcx +; NO512-NEXT: movq 8(%rdi), %rdx +; NO512-NEXT: movq 24(%rdi), %rsi +; NO512-NEXT: orq 56(%rdi), %rsi +; NO512-NEXT: orq 40(%rdi), %rdx +; NO512-NEXT: orq %rsi, %rdx +; NO512-NEXT: orq 48(%rdi), %rax +; NO512-NEXT: orq 32(%rdi), %rcx +; NO512-NEXT: orq %rax, %rcx +; NO512-NEXT: orq %rdx, %rcx +; NO512-NEXT: setne %al +; NO512-NEXT: retq +; +; AVX512-LABEL: anybits_i512_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: setne %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %ld = load i512, ptr %w %cmp = icmp ne i512 %ld, 0 ret i1 %cmp diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll index 42617c1..18588aa 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -24,7 +24,7 @@ define float @sqrt_ieee_ninf(float %f) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr @@ -71,7 +71,7 @@ define float @sqrt_daz_ninf(float %f) #1 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll index b2064b1..02d4d88 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -181,40 +181,38 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key ; CHECK-LABEL: segmentedStack: ; CHECK: ## %bb.0: ; CHECK-NEXT: cmpq %gs:816, %rsp -; CHECK-NEXT: jbe LBB3_6 +; CHECK-NEXT: jbe LBB3_7 ; CHECK-NEXT: LBB3_1: ## %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: sete %al -; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: sete %cl -; CHECK-NEXT: orb %al, %cl ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: sete %al -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne LBB3_4 -; CHECK-NEXT: ## %bb.2: ## %if.end4.i +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je LBB3_5 +; CHECK-NEXT: ## %bb.2: ## %entry +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je LBB3_5 +; CHECK-NEXT: ## %bb.3: ## %if.end4.i ; CHECK-NEXT: movq 8(%rdi), %rdx ; CHECK-NEXT: cmpq 8(%rsi), %rdx -; CHECK-NEXT: jne LBB3_5 -; CHECK-NEXT: ## %bb.3: ## %land.rhs.i.i +; CHECK-NEXT: jne LBB3_6 +; CHECK-NEXT: ## %bb.4: ## %land.rhs.i.i ; CHECK-NEXT: movq (%rsi), %rsi ; CHECK-NEXT: movq (%rdi), %rdi ; CHECK-NEXT: callq _memcmp ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: sete %al -; CHECK-NEXT: LBB3_4: ## %__go_ptr_strings_equal.exit +; CHECK-NEXT: LBB3_5: ## %__go_ptr_strings_equal.exit ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_5: +; CHECK-NEXT: LBB3_6: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_6: +; CHECK-NEXT: LBB3_7: ; CHECK-NEXT: movl $8, %r10d ; CHECK-NEXT: movl $0, %r11d ; CHECK-NEXT: callq ___morestack @@ -224,43 +222,41 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key ; NOCOMPACTUNWIND-LABEL: segmentedStack: ; NOCOMPACTUNWIND: # %bb.0: ; NOCOMPACTUNWIND-NEXT: cmpq %fs:112, %rsp -; NOCOMPACTUNWIND-NEXT: jbe .LBB3_6 +; NOCOMPACTUNWIND-NEXT: jbe .LBB3_7 ; NOCOMPACTUNWIND-NEXT: .LBB3_1: # %entry ; NOCOMPACTUNWIND-NEXT: pushq %rax ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 -; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi -; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi -; NOCOMPACTUNWIND-NEXT: sete %cl -; NOCOMPACTUNWIND-NEXT: orb %al, %cl ; NOCOMPACTUNWIND-NEXT: movq %rdi, %rax ; NOCOMPACTUNWIND-NEXT: orq %rsi, %rax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: testb %cl, %cl -; NOCOMPACTUNWIND-NEXT: jne .LBB3_4 -; NOCOMPACTUNWIND-NEXT: # %bb.2: # %if.end4.i +; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi +; NOCOMPACTUNWIND-NEXT: je .LBB3_5 +; NOCOMPACTUNWIND-NEXT: # %bb.2: # %entry +; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi +; NOCOMPACTUNWIND-NEXT: je .LBB3_5 +; NOCOMPACTUNWIND-NEXT: # %bb.3: # %if.end4.i ; NOCOMPACTUNWIND-NEXT: movq 8(%rdi), %rdx ; NOCOMPACTUNWIND-NEXT: cmpq 8(%rsi), %rdx -; NOCOMPACTUNWIND-NEXT: jne .LBB3_5 -; NOCOMPACTUNWIND-NEXT: # %bb.3: # %land.rhs.i.i +; NOCOMPACTUNWIND-NEXT: jne .LBB3_6 +; NOCOMPACTUNWIND-NEXT: # %bb.4: # %land.rhs.i.i ; NOCOMPACTUNWIND-NEXT: movq (%rsi), %rsi ; NOCOMPACTUNWIND-NEXT: movq (%rdi), %rdi ; NOCOMPACTUNWIND-NEXT: callq memcmp@PLT ; NOCOMPACTUNWIND-NEXT: testl %eax, %eax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: .LBB3_4: # %__go_ptr_strings_equal.exit +; NOCOMPACTUNWIND-NEXT: .LBB3_5: # %__go_ptr_strings_equal.exit ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_5: +; NOCOMPACTUNWIND-NEXT: .LBB3_6: ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 ; NOCOMPACTUNWIND-NEXT: xorl %eax, %eax ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_6: +; NOCOMPACTUNWIND-NEXT: .LBB3_7: ; NOCOMPACTUNWIND-NEXT: movl $8, %r10d ; NOCOMPACTUNWIND-NEXT: movl $0, %r11d ; NOCOMPACTUNWIND-NEXT: callq __morestack diff --git a/llvm/test/MC/AArch64/data-directive-specifier.s b/llvm/test/MC/AArch64/data-directive-specifier.s index 2cb7eb3..2d1ec4f 100644 --- a/llvm/test/MC/AArch64/data-directive-specifier.s +++ b/llvm/test/MC/AArch64/data-directive-specifier.s @@ -12,6 +12,7 @@ l: # CHECK-NEXT: 0x8 R_AARCH64_PLT32 extern 0x4 # CHECK-NEXT: 0xC R_AARCH64_PLT32 g 0x8 # CHECK-NEXT: 0x10 R_AARCH64_PLT32 g 0x18 +# CHECK-NEXT: 0x14 R_AARCH64_FUNCINIT64 .text 0x0 # CHECK-NEXT: } .data .word l@plt - . @@ -21,6 +22,8 @@ l: .word g@plt - . + 8 .word g@plt - .data + 8 +.quad l@funcinit + # CHECK: Section ({{.*}}) .rela.data1 { # CHECK-NEXT: 0x0 R_AARCH64_GOTPCREL32 data1 0x0 # CHECK-NEXT: 0x4 R_AARCH64_GOTPCREL32 extern 0x4 diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 3eda077..475faf9 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -177,6 +177,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: static constexpr bool is_iterable = true; // CHECK-NEXT: }; // CHECK-NEXT: } // namespace llvm +// CHECK-EMPTY: // CHECK-NEXT: #endif // LLVM_Tdl_INC diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td index a25197c..ccc0944 100644 --- a/llvm/test/TableGen/directive2.td +++ b/llvm/test/TableGen/directive2.td @@ -150,6 +150,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: static constexpr bool is_iterable = true; // CHECK-NEXT: }; // CHECK-NEXT: } // namespace llvm +// CHECK-EMPTY: // CHECK-NEXT: #endif // LLVM_Tdl_INC // IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll index fdab67a..afc98ce 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=dfa-jump-threading,sccp,simplifycfg %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading,sccp,simplifycfg -verify-dom-info=1 %s | FileCheck %s ; This test checks that a constant propagation is applied for a basic loop. ; Related to bug 44679. diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index f45798b..5076517 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -S -passes=dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s -; RUN: opt -S -passes=dfa-jump-threading -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE ; This test checks that the analysis identifies all threadable paths in a ; simple CFG. A threadable path includes a list of basic blocks, the exit diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll index 092c854..426b51e 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals -; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s ; These tests check that the DFA jump threading transformation is applied ; properly to two CFGs. It checks that blocks are cloned, branches are updated, @@ -445,9 +445,67 @@ bb2: ; preds = %select.unfold unreachable } + +define i16 @DTU_update_crash() { +; CHECK-LABEL: @DTU_update_crash( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY_SELECTBLOCK:%.*]] +; CHECK: for.body.selectblock: +; CHECK-NEXT: br i1 false, label [[SWITCHBLOCK_JT0:%.*]], label [[SEL_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: sel.si.unfold.false: +; CHECK-NEXT: br label [[SWITCHBLOCK:%.*]] +; CHECK: sel.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ] +; CHECK-NEXT: br label [[SWITCHBLOCK_JT0]] +; CHECK: switchblock: +; CHECK-NEXT: [[SWITCHBLOCK_PHI:%.*]] = phi i32 [ poison, [[SEL_SI_UNFOLD_FALSE:%.*]] ] +; CHECK-NEXT: [[P_24_ADDR_3:%.*]] = phi i32 [ 0, [[SEL_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: switch i32 [[SWITCHBLOCK_PHI]], label [[CLEANUP:%.*]] [ +; CHECK-NEXT: i32 0, label [[FOR_INC:%.*]] +; CHECK-NEXT: i32 1, label [[CLEANUP]] +; CHECK-NEXT: i32 5, label [[FOR_BODY_SELECTBLOCK]] +; CHECK-NEXT: ] +; CHECK: switchblock.jt0: +; CHECK-NEXT: [[SWITCHBLOCK_PHI_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SEL_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: [[P_24_ADDR_3_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ], [ 0, [[SEL_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: br i1 false, label [[FOR_BODY_SELECTBLOCK]], label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[P_24_ADDR_3_JT0]]) +; CHECK-NEXT: ret i16 0 +; +entry: + br label %for.body.selectblock + +for.body.selectblock: ; preds = %for.inc, %switchblock, %entry + %sel = select i1 false, i32 0, i32 0 + br label %switchblock + +switchblock: ; preds = %for.body.selectblock + %switchblock.phi = phi i32 [ %sel, %for.body.selectblock ] + %p_24.addr.3 = phi i32 [ 0, %for.body.selectblock ] + switch i32 %switchblock.phi, label %cleanup [ + i32 0, label %for.inc + i32 1, label %cleanup + i32 5, label %for.body.selectblock + ] + +for.inc: ; preds = %switchblock + br i1 false, label %for.body.selectblock, label %cleanup + +cleanup: ; preds = %for.inc, %switchblock, %switchblock + call void (...) @llvm.fake.use(i32 %p_24.addr.3) + ret i16 0 +} + +declare void @llvm.fake.use(...) + !0 = !{!"function_entry_count", i32 10} !1 = !{!"branch_weights", i32 3, i32 5} ;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +;. ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5} ;. diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll index de38752..95d3ffa 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false -verify-dom-info=1 %s | FileCheck %s ; These tests check if selects are unfolded properly for jump threading ; opportunities. There are three different patterns to consider: diff --git a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll index 4555dfb..71a469d 100644 --- a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll +++ b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s declare void @do_something() declare void @user(i32) diff --git a/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll b/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll index 00500a7..cc117e7 100644 --- a/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll +++ b/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s define void @pr60254() { ; CHECK-LABEL: define void @pr60254() { diff --git a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll index 89b132e..9371fe2 100644 --- a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll +++ b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll @@ -18,7 +18,7 @@ define i64 @test_ptr_compare_guard(ptr %start, ptr %end) { ; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[EXIT_LOOPEXIT:.*]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1 -; CHECK-NEXT: [[I64_IV_NEXT]] = add i64 [[I64_IV]], 1 +; CHECK-NEXT: [[I64_IV_NEXT]] = add nuw i64 [[I64_IV]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT_LOOPEXIT]]: diff --git a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll index ba34930..bc988a9 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes=instcombine -S | FileCheck %s define i1 @a_true_implies_b_true(i8 %z, i1 %X, i1 %Y) { @@ -34,15 +34,15 @@ define <2 x i1> @a_true_implies_b_true_vec(i8 %z0, <2 x i1> %X, <2 x i1> %Y) { ret <2 x i1> %res } -define i1 @a_true_implies_b_true2(i8 %z, i1 %X, i1 %Y) { +define i1 @a_true_implies_b_true2(i8 %z, i1 %X, i1 %Y) !prof !0 { ; CHECK-LABEL: @a_true_implies_b_true2( ; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 [[Z:%.*]], 20 -; CHECK-NEXT: [[RES:%.*]] = select i1 [[A]], i1 [[X:%.*]], i1 false +; CHECK-NEXT: [[RES:%.*]] = select i1 [[A]], i1 [[X:%.*]], i1 false, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i1 [[RES]] ; %a = icmp ugt i8 %z, 20 %b = icmp ugt i8 %z, 10 - %sel = select i1 %b, i1 %X, i1 %Y + %sel = select i1 %b, i1 %X, i1 %Y, !prof !1 %res = and i1 %a, %sel ret i1 %res } @@ -258,3 +258,10 @@ define i1 @neg_icmp_eq_implies_trunc(i8 %x, i1 %c) { %sel2 = select i1 %cmp, i1 true, i1 %sel1 ret i1 %sel2 } + +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +;. diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll index 5e7c636..a3483af 100644 --- a/llvm/test/Transforms/InstSimplify/ptrmask.ll +++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll @@ -158,6 +158,26 @@ define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32(ptr addrspace(1) %p) { ret ptr addrspace(1) %r } +define ptr @ptrmask_simplify_ptrtoaddr(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_simplify_ptrtoaddr +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %m = ptrtoaddr ptr %p to i64 + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m) + ret ptr %r +} + +define ptr addrspace(1) @ptrmask_simplify_ptrtoaddr_i32(ptr addrspace(1) %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_ptrtoaddr_i32 +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: ret ptr addrspace(1) [[P]] +; + %m = ptrtoaddr ptr addrspace(1) %p to i32 + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m) + ret ptr addrspace(1) %r +} + define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) { ; CHECK-LABEL: define ptr @ptrmask_simplify_aligned_unused ; CHECK-SAME: (ptr align 64 [[P:%.*]]) { diff --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll index fa55eab..bc63f79 100644 --- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll +++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll @@ -465,8 +465,7 @@ define void @peel_int_eq_condition(i32 %start) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C_0:%.*]] = icmp eq i32 [[IV]], [[START]] -; CHECK-NEXT: br i1 [[C_0]], label [[IF_THEN:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[LOOP_LATCH]] ; CHECK: if.then: ; CHECK-NEXT: call void @fn(i32 [[IV]]) ; CHECK-NEXT: br label [[LOOP_LATCH]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll new file mode 100644 index 0000000..e4322cf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll @@ -0,0 +1,588 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s + +define i32 @umax_phi_used_outside(ptr %src, i32 %n) { +; CHECK-LABEL: define i32 @umax_phi_used_outside( +; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = tail call i32 @llvm.umax.i32(i32 [[MAX]], i32 [[L_EXT]]) +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_LCSSA:%.*]] = phi i32 [ [[MAX]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[MAX_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %spec.select, %loop ] + %gep.src = getelementptr inbounds i8, ptr %src, i32 %iv + %l = load i8, ptr %gep.src + %l.ext = zext i8 %l to i32 + %spec.select = tail call i32 @llvm.umax.i32(i32 %max, i32 %l.ext) + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max +} + +define i32 @chained_smax(i32 %x, ptr %src) { +; CHECK-LABEL: define i32 @chained_smax( +; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[VEC_PHI]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK: [[PRED_LOAD_IF3]]: +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK: [[PRED_LOAD_CONTINUE4]]: +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], %[[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_IF5]]: +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_CONTINUE6]]: +; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP26]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP25]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP27]]) +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[TMP28]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv + %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max) + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %l, i32 %max.1) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max.next +} + +define void @smax_with_invariant_store_user(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_invariant_store_user( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_same_addr(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 %max.next, ptr %dst, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_same_addr2(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr2( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 0, ptr [[DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 0, ptr %dst, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_same_addr3(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr3( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 0, ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 0, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 %max.next, ptr %dst, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_different_addr(ptr noalias %src, ptr noalias %dst, ptr noalias %dst.2, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_different_addr( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], ptr noalias [[DST_2:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST_2]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 %max.next, ptr %dst.2, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define i32 @chained_instructions_feeding_max1(i32 %x, ptr %src) { +; CHECK-LABEL: define i32 @chained_instructions_feeding_max1( +; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MAX]], [[L]] +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 [[L]]) +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %add = add i32 %max, %l + %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 %l) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max.next +} + +define i32 @chained_instructions_feeding_max2(i32 %x, ptr %src) { +; CHECK-LABEL: define i32 @chained_instructions_feeding_max2( +; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[MAX_1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 [[MAX]]) +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[L]], [[MAX_1]] +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 100) +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv + %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max) + %l = load i32, ptr %gep.src, align 4 + %add = add i32 %l, %max.1 + %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 100) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max.next +} + + +define i32 @test_predicated_smin(ptr %src) { +; CHECK-LABEL: define i32 @test_predicated_smin( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[TMP3:%.*]] = fptosi <4 x float> [[TMP2]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP3]]) +; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[PREDPHI]]) +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[TMP6]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %min = phi i32 [ 0, %entry ], [ %min.merge, %loop.latch ] + %gep.src = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep.src, align 4 + %c = fcmp une float %l, 0.0 + br i1 %c, label %then, label %loop.latch + +then: + %div = fdiv float %l, 3.0 + %div.i32 = fptosi float %div to i32 + %min.next = tail call i32 @llvm.smin.i32(i32 %min, i32 %div.i32) + br label %loop.latch + +loop.latch: + %min.merge = phi i32 [ %min.next, %then ], [ %min, %loop.header ] + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 111 + br i1 %ec, label %exit, label %loop.header + +exit: + ret i32 %min.merge +} + +define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) { +; CHECK-LABEL: define i32 @smax_reduction_multiple_incoming( +; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[COND]], label %[[LOOP_HEADER_PREHEADER:.*]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: [[IV_PH:%.*]] = phi i32 [ 10, %[[ELSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[MAX_PH:%.*]] = phi i32 [ 5, %[[ELSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[IV_PH]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[N]], [[IV_PH]] +; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[IV_PH]], [[N_VEC]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[MAX_PH]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IV_PH]], [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[IV_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[IV_PH]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[MAX_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[MAX_PH]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[MAX_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP_HEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] +; +entry: + br i1 %cond, label %loop.header, label %else + +else: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ 10, %else ], [ %iv.next, %loop.header ] + %max = phi i32 [ 0, %entry ], [ 5, %else ], [ %max.next, %loop.header ] + %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv, %n + br i1 %ec, label %exit, label %loop.header + +exit: + ret i32 %max.next +} diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll index 616617b..5d5a610 100644 --- a/llvm/test/Transforms/SROA/phi-and-select.ll +++ b/llvm/test/Transforms/SROA/phi-and-select.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -36,11 +36,11 @@ exit: ret i32 %result } -define i32 @test2() { +define i32 @test2() !prof !0 { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND:%.*]] = icmp sle i32 0, 1 -; CHECK-NEXT: [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], i32 1, i32 0 +; CHECK-NEXT: [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], i32 1, i32 0, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i32 [[RESULT_SROA_SPECULATED]] ; entry: @@ -53,7 +53,7 @@ entry: %v1 = load i32, ptr %a1 %cond = icmp sle i32 %v0, %v1 - %select = select i1 %cond, ptr %a1, ptr %a + %select = select i1 %cond, ptr %a1, ptr %a, !prof !1 %result = load i32, ptr %select ret i32 %result @@ -870,3 +870,17 @@ define i8 @volatile_select(ptr %p, i1 %b) { %v2 = load i8, ptr %px ret i8 %v2 } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 3, i32 5} +;. +; CHECK-PRESERVE-CFG: attributes #[[ATTR0:[0-9]+]] = { sanitize_address } +;. +; CHECK-MODIFY-CFG: attributes #[[ATTR0:[0-9]+]] = { sanitize_address } +;. +; CHECK-PRESERVE-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK-PRESERVE-CFG: [[PROF1]] = !{!"branch_weights", i32 3, i32 5} +;. +; CHECK-MODIFY-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK-MODIFY-CFG: [[PROF1]] = !{!"branch_weights", i32 3, i32 5} +;. diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll index 776624c..45c3bbd 100644 --- a/llvm/test/Transforms/SROA/phi-gep.ll +++ b/llvm/test/Transforms/SROA/phi-gep.ll @@ -1,9 +1,12 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -passes='sroa<preserve-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG %pair = type { i32, i32 } +;. +; CHECK: @g = global %pair zeroinitializer, align 4 +;. define i32 @test_sroa_phi_gep(i1 %cond) { ; CHECK-LABEL: @test_sroa_phi_gep( ; CHECK-NEXT: entry: @@ -334,18 +337,18 @@ exit: unreachable } -define void @test_sroa_gep_phi_select_same_block(i1 %c1, i1 %c2, ptr %ptr) { +define void @test_sroa_gep_phi_select_same_block(i1 %c1, i1 %c2, ptr %ptr) !prof !0 { ; CHECK-LABEL: @test_sroa_gep_phi_select_same_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[PAIR:%.*]], align 8 ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[WHILE_BODY]] ] -; CHECK-NEXT: [[SELECT]] = select i1 [[C1:%.*]], ptr [[PHI]], ptr [[PTR:%.*]] +; CHECK-NEXT: [[SELECT]] = select i1 [[C1:%.*]], ptr [[PHI]], ptr [[PTR:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[PHI_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PHI]], i64 1 ; CHECK-NEXT: [[PTR_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PTR]], i64 1 -; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[C1]], ptr [[PHI_SROA_GEP]], ptr [[PTR_SROA_GEP]] -; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[WHILE_BODY]] +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[C1]], ptr [[PHI_SROA_GEP]], ptr [[PTR_SROA_GEP]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[WHILE_BODY]], !prof [[PROF2:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -355,9 +358,9 @@ entry: while.body: %phi = phi ptr [ %alloca, %entry ], [ %select, %while.body ] - %select = select i1 %c1, ptr %phi, ptr %ptr + %select = select i1 %c1, ptr %phi, ptr %ptr, !prof !1 %gep = getelementptr inbounds %pair, ptr %select, i64 1 - br i1 %c2, label %exit, label %while.body + br i1 %c2, label %exit, label %while.body, !prof !2 exit: ret void @@ -747,6 +750,18 @@ declare ptr @foo() declare i32 @__gxx_personality_v0(...) declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 3, i32 5} +!2 = !{!"branch_weights", i32 7, i32 11} + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 7, i32 11} +;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} ; CHECK-PRESERVE-CFG: {{.*}} diff --git a/llvm/test/Transforms/SROA/select-gep.ll b/llvm/test/Transforms/SROA/select-gep.ll index b48b0f7..a701d78 100644 --- a/llvm/test/Transforms/SROA/select-gep.ll +++ b/llvm/test/Transforms/SROA/select-gep.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -passes='sroa<preserve-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG @@ -203,10 +203,10 @@ define i32 @test_select_idx_mem2reg(i1 %c) { ; Test gep with a select-like zext index unfolding on an alloca that is ; splittable and promotable. -define i64 @test_select_like_zext_idx_mem2reg(i1 %c) { +define i64 @test_select_like_zext_idx_mem2reg(i1 %c) !prof !0 { ; CHECK-LABEL: @test_select_like_zext_idx_mem2reg( ; CHECK-NEXT: [[IDX:%.*]] = zext i1 [[C:%.*]] to i64 -; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 2, i64 1 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 2, i64 1, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i64 [[RES]] ; %alloca = alloca [2 x i64], align 8 @@ -352,3 +352,16 @@ define i32 @test_select_idx_not_constant3(i1 %c, ptr %p, i64 %arg) { %res = load i32, ptr %gep, align 4 ret i32 %res } + +!0 = !{!"function_entry_count", i32 10} +;. +; CHECK-PRESERVE-CFG: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +;. +; CHECK-MODIFY-CFG: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +;. +; CHECK-PRESERVE-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK-PRESERVE-CFG: [[PROF1]] = !{!"unknown", !"sroa"} +;. +; CHECK-MODIFY-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK-MODIFY-CFG: [[PROF1]] = !{!"unknown", !"sroa"} +;. diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll index eabb697..3b77e49 100644 --- a/llvm/test/Transforms/SROA/slice-width.ll +++ b/llvm/test/Transforms/SROA/slice-width.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -8,6 +8,10 @@ declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind ; This tests that allocas are not split into slices that are not byte width multiple +;. +; CHECK: @foo_copy_source = external constant %union.Foo +; CHECK: @i64_sink = global i64 0 +;. define void @no_split_on_non_byte_width(i32) { ; CHECK-LABEL: @no_split_on_non_byte_width( ; CHECK-NEXT: [[ARG_SROA_0:%.*]] = alloca i8, align 8 @@ -92,12 +96,12 @@ declare i32 @memcpy_vec3float_helper(ptr) ; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte ; vector store, hence accidentally putting gibberish onto the stack. -define i32 @memcpy_vec3float_widening(ptr %x) { +define i32 @memcpy_vec3float_widening(ptr %x) !prof !0 { ; CHECK-LABEL: @memcpy_vec3float_widening( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> -; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef +; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> ; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4 @@ -158,6 +162,15 @@ define i1 @presplit_overlarge_load() { %L2 = load i1, ptr %A ret i1 %L2 } +!0 = !{!"function_entry_count", i32 10} + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"unknown", !"sroa"} +;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} ; CHECK-PRESERVE-CFG: {{.*}} diff --git a/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml new file mode 100644 index 0000000..e748eecf --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml @@ -0,0 +1,27 @@ +# RUN: yaml2obj %s -o %t.dxbc +# RUN: not llvm-objcopy --dump-section=FKE0=%t.fek0 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-ZEROSIZE -DFILE=%t.fek0 +# RUN: not llvm-objcopy --dump-section=FKE3=%t.fek1 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING -DFILE=%t.fek1 +# RUN: not llvm-objcopy --dump-section=FKE2=%t/does_not_exist/.fek2 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-BAD-PATH -DFILE=%t/does_not_exist/.fek2 -DMSG=%errc_ENOENT + +# CHECK-ZEROSIZE: error: '[[FILE]]': part 'FKE0' is empty +# CHECK-MISSING: error: '[[FILE]]': part 'FKE3' not found +# CHECK-BAD-PATH: error: '[[FILE]]': [[MSG]] + +--- !dxcontainer +Header: + Hash: [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ] + Version: + Major: 1 + Minor: 0 + FileSize: 108 + PartCount: 3 + PartOffsets: [ 60, 68, 76 ] +Parts: + - Name: FKE0 + Size: 0 + - Name: FKE1 + Size: 0 + - Name: FKE2 + Size: 8 +... diff --git a/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml new file mode 100644 index 0000000..7d80a2c --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml @@ -0,0 +1,278 @@ +# RUN: yaml2obj %s -o %t.dxbc +# RUN: llvm-objcopy --dump-section=DXIL=%t.bc %t.dxbc +# RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=BITCODE +# RUN: wc -c %t.bc | FileCheck %s --check-prefix=DXIL-SIZE + +## Verify that when dumping the DXIL part we get a valid bitcode file. +# BITCODE: define void @main() +## Verify the size of the bitcode data. +# DXIL-SIZE: 1708 + +## Dump the PSV0 part and verify its size. +# RUN: llvm-objcopy --dump-section=PSV0=%t.psv0 %t.dxbc +# RUN: wc -c %t.psv0 | FileCheck %s --check-prefix=PSV0-SIZE +# RUN: od -v -Ax -t x1 %t.psv0 | FileCheck %s --check-prefix=PSV0-CONTENTS +# PSV0-SIZE: 76 + +# For a compute shader the structure size is encoded followed by a bunch of 00'd +# bytes until you get to the unused wave size min and max (0xffff), followed by +# the shader stage (5 for compute). +# TODO: Update this test to use objdump or obj2yaml once we support +# --add-section in objcopy. See issue: +# https://github.com/llvm/llvm-project/issues/162159. +# PSV0-CONTENTS: 0000 34 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# PSV0-CONTENTS: 0010 00 00 00 00 00 00 00 00 ff ff ff ff 05 00 00 00 + +--- !dxcontainer +Header: + Hash: [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ] + Version: + Major: 1 + Minor: 0 + FileSize: 1872 + PartCount: 2 + PartOffsets: [ 40, 1780 ] +Parts: + - Name: DXIL + Size: 1732 + Program: + MajorVersion: 6 + MinorVersion: 0 + ShaderKind: 5 + Size: 433 + DXILMajorVersion: 1 + DXILMinorVersion: 0 + DXILSize: 1708 + DXIL: [ 0x42, 0x43, 0xC0, 0xDE, 0x21, 0xC, 0x0, 0x0, 0xA8, + 0x1, 0x0, 0x0, 0xB, 0x82, 0x20, 0x0, 0x2, 0x0, + 0x0, 0x0, 0x13, 0x0, 0x0, 0x0, 0x7, 0x81, 0x23, + 0x91, 0x41, 0xC8, 0x4, 0x49, 0x6, 0x10, 0x32, + 0x39, 0x92, 0x1, 0x84, 0xC, 0x25, 0x5, 0x8, 0x19, + 0x1E, 0x4, 0x8B, 0x62, 0x80, 0x10, 0x45, 0x2, + 0x42, 0x92, 0xB, 0x42, 0x84, 0x10, 0x32, 0x14, + 0x38, 0x8, 0x18, 0x4B, 0xA, 0x32, 0x42, 0x88, + 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xA5, + 0x0, 0x19, 0x32, 0x42, 0xE4, 0x48, 0xE, 0x90, + 0x11, 0x22, 0xC4, 0x50, 0x41, 0x51, 0x81, 0x8C, + 0xE1, 0x83, 0xE5, 0x8A, 0x4, 0x21, 0x46, 0x6, + 0x51, 0x18, 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x1B, + 0x90, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xC0, + 0x1, 0x24, 0x80, 0x2, 0x0, 0x0, 0x0, 0x49, 0x18, + 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x13, 0x82, 0x0, + 0x0, 0x89, 0x20, 0x0, 0x0, 0x11, 0x0, 0x0, 0x0, + 0x32, 0x22, 0x8, 0x9, 0x20, 0x64, 0x85, 0x4, 0x13, + 0x22, 0xA4, 0x84, 0x4, 0x13, 0x22, 0xE3, 0x84, + 0xA1, 0x90, 0x14, 0x12, 0x4C, 0x88, 0x8C, 0xB, + 0x84, 0x84, 0x4C, 0x10, 0x20, 0x73, 0x4, 0x8, + 0xC1, 0x65, 0xC3, 0x85, 0x2C, 0xE8, 0x3, 0x40, + 0x14, 0x91, 0x4E, 0xD1, 0x4A, 0x48, 0x44, 0x54, + 0x11, 0xC3, 0x9, 0x30, 0xC4, 0x18, 0x1, 0x30, + 0x2, 0x50, 0x82, 0x21, 0x1A, 0x8, 0x98, 0x23, + 0x0, 0x3, 0x0, 0x13, 0x14, 0x72, 0xC0, 0x87, 0x74, + 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x3, + 0x72, 0xC0, 0x87, 0xD, 0xAE, 0x50, 0xE, 0x6D, + 0xD0, 0xE, 0x7A, 0x50, 0xE, 0x6D, 0x0, 0xF, 0x7A, + 0x30, 0x7, 0x72, 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D, + 0x90, 0xE, 0x71, 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D, + 0x90, 0xE, 0x78, 0xA0, 0x7, 0x78, 0xD0, 0x6, 0xE9, + 0x10, 0x7, 0x76, 0xA0, 0x7, 0x71, 0x60, 0x7, 0x6D, + 0x90, 0xE, 0x73, 0x20, 0x7, 0x7A, 0x30, 0x7, 0x72, + 0xD0, 0x6, 0xE9, 0x60, 0x7, 0x74, 0xA0, 0x7, 0x76, + 0x40, 0x7, 0x6D, 0x60, 0xE, 0x71, 0x60, 0x7, 0x7A, + 0x10, 0x7, 0x76, 0xD0, 0x6, 0xE6, 0x30, 0x7, 0x72, + 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D, 0x60, 0xE, 0x76, + 0x40, 0x7, 0x7A, 0x60, 0x7, 0x74, 0xD0, 0x6, 0xEE, + 0x80, 0x7, 0x7A, 0x10, 0x7, 0x76, 0xA0, 0x7, 0x73, + 0x20, 0x7, 0x7A, 0x60, 0x7, 0x74, 0x30, 0xE4, + 0x21, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x2, 0x0, + 0x0, 0x0, 0x20, 0xB, 0x4, 0x7, 0x0, 0x0, 0x0, + 0x32, 0x1E, 0x98, 0xC, 0x19, 0x11, 0x4C, 0x90, + 0x8C, 0x9, 0x26, 0x47, 0xC6, 0x4, 0x43, 0xBA, + 0x12, 0x28, 0x88, 0x62, 0x18, 0x1, 0x28, 0x84, + 0x22, 0x0, 0x0, 0x0, 0x79, 0x18, 0x0, 0x0, 0xE5, + 0x0, 0x0, 0x0, 0x33, 0x8, 0x80, 0x1C, 0xC4, 0xE1, + 0x1C, 0x66, 0x14, 0x1, 0x3D, 0x88, 0x43, 0x38, + 0x84, 0xC3, 0x8C, 0x42, 0x80, 0x7, 0x79, 0x78, + 0x7, 0x73, 0x98, 0x71, 0xC, 0xE6, 0x0, 0xF, 0xED, + 0x10, 0xE, 0xF4, 0x80, 0xE, 0x33, 0xC, 0x42, 0x1E, + 0xC2, 0xC1, 0x1D, 0xCE, 0xA1, 0x1C, 0x66, 0x30, + 0x5, 0x3D, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1B, + 0xCC, 0x3, 0x3D, 0xC8, 0x43, 0x3D, 0x8C, 0x3, + 0x3D, 0xCC, 0x78, 0x8C, 0x74, 0x70, 0x7, 0x7B, + 0x8, 0x7, 0x79, 0x48, 0x87, 0x70, 0x70, 0x7, 0x7A, + 0x70, 0x3, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, + 0x19, 0xCC, 0x11, 0xE, 0xEC, 0x90, 0xE, 0xE1, + 0x30, 0xF, 0x6E, 0x30, 0xF, 0xE3, 0xF0, 0xE, 0xF0, + 0x50, 0xE, 0x33, 0x10, 0xC4, 0x1D, 0xDE, 0x21, + 0x1C, 0xD8, 0x21, 0x1D, 0xC2, 0x61, 0x1E, 0x66, + 0x30, 0x89, 0x3B, 0xBC, 0x83, 0x3B, 0xD0, 0x43, + 0x39, 0xB4, 0x3, 0x3C, 0xBC, 0x83, 0x3C, 0x84, + 0x3, 0x3B, 0xCC, 0xF0, 0x14, 0x76, 0x60, 0x7, + 0x7B, 0x68, 0x7, 0x37, 0x68, 0x87, 0x72, 0x68, + 0x7, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, + 0x60, 0x7, 0x76, 0x28, 0x7, 0x76, 0xF8, 0x5, 0x76, + 0x78, 0x87, 0x77, 0x80, 0x87, 0x5F, 0x8, 0x87, + 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, + 0x81, 0x2C, 0xEE, 0xF0, 0xE, 0xEE, 0xE0, 0xE, + 0xF5, 0xC0, 0xE, 0xEC, 0x30, 0x3, 0x62, 0xC8, + 0xA1, 0x1C, 0xE4, 0xA1, 0x1C, 0xCC, 0xA1, 0x1C, + 0xE4, 0xA1, 0x1C, 0xDC, 0x61, 0x1C, 0xCA, 0x21, + 0x1C, 0xC4, 0x81, 0x1D, 0xCA, 0x61, 0x6, 0xD6, + 0x90, 0x43, 0x39, 0xC8, 0x43, 0x39, 0x98, 0x43, + 0x39, 0xC8, 0x43, 0x39, 0xB8, 0xC3, 0x38, 0x94, + 0x43, 0x38, 0x88, 0x3, 0x3B, 0x94, 0xC3, 0x2F, + 0xBC, 0x83, 0x3C, 0xFC, 0x82, 0x3B, 0xD4, 0x3, + 0x3B, 0xB0, 0xC3, 0xC, 0xC7, 0x69, 0x87, 0x70, + 0x58, 0x87, 0x72, 0x70, 0x83, 0x74, 0x68, 0x7, + 0x78, 0x60, 0x87, 0x74, 0x18, 0x87, 0x74, 0xA0, + 0x87, 0x19, 0xCE, 0x53, 0xF, 0xEE, 0x0, 0xF, 0xF2, + 0x50, 0xE, 0xE4, 0x90, 0xE, 0xE3, 0x40, 0xF, 0xE1, + 0x20, 0xE, 0xEC, 0x50, 0xE, 0x33, 0x20, 0x28, + 0x1D, 0xDC, 0xC1, 0x1E, 0xC2, 0x41, 0x1E, 0xD2, + 0x21, 0x1C, 0xDC, 0x81, 0x1E, 0xDC, 0xE0, 0x1C, + 0xE4, 0xE1, 0x1D, 0xEA, 0x1, 0x1E, 0x66, 0x18, + 0x51, 0x38, 0xB0, 0x43, 0x3A, 0x9C, 0x83, 0x3B, + 0xCC, 0x50, 0x24, 0x76, 0x60, 0x7, 0x7B, 0x68, + 0x7, 0x37, 0x60, 0x87, 0x77, 0x78, 0x7, 0x78, + 0x98, 0x51, 0x4C, 0xF4, 0x90, 0xF, 0xF0, 0x50, + 0xE, 0x33, 0x1E, 0x6A, 0x1E, 0xCA, 0x61, 0x1C, + 0xE8, 0x21, 0x1D, 0xDE, 0xC1, 0x1D, 0x7E, 0x1, + 0x1E, 0xE4, 0xA1, 0x1C, 0xCC, 0x21, 0x1D, 0xF0, + 0x61, 0x6, 0x54, 0x85, 0x83, 0x38, 0xCC, 0xC3, + 0x3B, 0xB0, 0x43, 0x3D, 0xD0, 0x43, 0x39, 0xFC, + 0xC2, 0x3C, 0xE4, 0x43, 0x3B, 0x88, 0xC3, 0x3B, + 0xB0, 0xC3, 0x8C, 0xC5, 0xA, 0x87, 0x79, 0x98, + 0x87, 0x77, 0x18, 0x87, 0x74, 0x8, 0x7, 0x7A, + 0x28, 0x7, 0x72, 0x98, 0x81, 0x5C, 0xE3, 0x10, + 0xE, 0xEC, 0xC0, 0xE, 0xE5, 0x50, 0xE, 0xF3, 0x30, + 0x23, 0xC1, 0xD2, 0x41, 0x1E, 0xE4, 0xE1, 0x17, + 0xD8, 0xE1, 0x1D, 0xDE, 0x1, 0x1E, 0x66, 0x48, + 0x19, 0x3B, 0xB0, 0x83, 0x3D, 0xB4, 0x83, 0x1B, + 0x84, 0xC3, 0x38, 0x8C, 0x43, 0x39, 0xCC, 0xC3, + 0x3C, 0xB8, 0xC1, 0x39, 0xC8, 0xC3, 0x3B, 0xD4, + 0x3, 0x3C, 0xCC, 0x48, 0xB4, 0x71, 0x8, 0x7, 0x76, + 0x60, 0x7, 0x71, 0x8, 0x87, 0x71, 0x58, 0x87, + 0x19, 0xDB, 0xC6, 0xE, 0xEC, 0x60, 0xF, 0xED, + 0xE0, 0x6, 0xF0, 0x20, 0xF, 0xE5, 0x30, 0xF, 0xE5, + 0x20, 0xF, 0xF6, 0x50, 0xE, 0x6E, 0x10, 0xE, 0xE3, + 0x30, 0xE, 0xE5, 0x30, 0xF, 0xF3, 0xE0, 0x6, 0xE9, + 0xE0, 0xE, 0xE4, 0x50, 0xE, 0xF8, 0x30, 0x23, + 0xE2, 0xEC, 0x61, 0x1C, 0xC2, 0x81, 0x1D, 0xD8, + 0xE1, 0x17, 0xEC, 0x21, 0x1D, 0xE6, 0x21, 0x1D, + 0xC4, 0x21, 0x1D, 0xD8, 0x21, 0x1D, 0xE8, 0x21, + 0x1F, 0x66, 0x20, 0x9D, 0x3B, 0xBC, 0x43, 0x3D, + 0xB8, 0x3, 0x39, 0x94, 0x83, 0x39, 0xCC, 0x58, + 0xBC, 0x70, 0x70, 0x7, 0x77, 0x78, 0x7, 0x7A, + 0x8, 0x7, 0x7A, 0x48, 0x87, 0x77, 0x70, 0x87, + 0x19, 0xCB, 0xE7, 0xE, 0xEF, 0x30, 0xF, 0xE1, + 0xE0, 0xE, 0xE9, 0x40, 0xF, 0xE9, 0xA0, 0xF, 0xE5, + 0x30, 0xC3, 0x1, 0x3, 0x73, 0xA8, 0x7, 0x77, 0x18, + 0x87, 0x5F, 0x98, 0x87, 0x70, 0x70, 0x87, 0x74, + 0xA0, 0x87, 0x74, 0xD0, 0x87, 0x72, 0x98, 0x81, + 0x84, 0x41, 0x39, 0xE0, 0xC3, 0x38, 0xB0, 0x43, + 0x3D, 0x90, 0x43, 0x39, 0xCC, 0x40, 0xC4, 0xA0, + 0x1D, 0xCA, 0xA1, 0x1D, 0xE0, 0x41, 0x1E, 0xDE, + 0xC1, 0x1C, 0x66, 0x24, 0x63, 0x30, 0xE, 0xE1, + 0xC0, 0xE, 0xEC, 0x30, 0xF, 0xE9, 0x40, 0xF, 0xE5, + 0x30, 0x43, 0x21, 0x83, 0x75, 0x18, 0x7, 0x73, + 0x48, 0x87, 0x5F, 0xA0, 0x87, 0x7C, 0x80, 0x87, + 0x72, 0x98, 0xB1, 0x94, 0x1, 0x3C, 0x8C, 0xC3, + 0x3C, 0x94, 0xC3, 0x38, 0xD0, 0x43, 0x3A, 0xBC, + 0x83, 0x3B, 0xCC, 0xC3, 0x8C, 0xC5, 0xC, 0x48, + 0x21, 0x15, 0x42, 0x61, 0x1E, 0xE6, 0x21, 0x1D, + 0xCE, 0xC1, 0x1D, 0x52, 0x81, 0x14, 0x66, 0x4C, + 0x67, 0x30, 0xE, 0xEF, 0x20, 0xF, 0xEF, 0xE0, + 0x6, 0xEF, 0x50, 0xF, 0xF4, 0x30, 0xF, 0xE9, 0x40, + 0xE, 0xE5, 0xE0, 0x6, 0xE6, 0x20, 0xF, 0xE1, 0xD0, + 0xE, 0xE5, 0x30, 0xA3, 0x40, 0x83, 0x76, 0x68, + 0x7, 0x79, 0x8, 0x87, 0x19, 0x52, 0x1A, 0xB8, + 0xC3, 0x3B, 0x84, 0x3, 0x3B, 0xA4, 0x43, 0x38, + 0xCC, 0x83, 0x1B, 0x84, 0x3, 0x39, 0x90, 0x83, + 0x3C, 0xCC, 0x3, 0x3C, 0x84, 0xC3, 0x38, 0x94, + 0xC3, 0xC, 0x46, 0xD, 0xC6, 0x21, 0x1C, 0xD8, + 0x81, 0x1D, 0xCA, 0xA1, 0x1C, 0x7E, 0x81, 0x1E, + 0xF2, 0x1, 0x1E, 0xCA, 0x61, 0xC6, 0xB1, 0x6, + 0xEE, 0xF0, 0xE, 0xE6, 0x20, 0xF, 0xE5, 0x50, + 0xE, 0x33, 0x1C, 0x36, 0x20, 0x7, 0x7C, 0x70, + 0x3, 0x77, 0x78, 0x7, 0x77, 0xA8, 0x7, 0x77, 0x48, + 0x7, 0x73, 0x78, 0x7, 0x79, 0x68, 0x87, 0x19, + 0x55, 0x1B, 0x90, 0x3, 0x3E, 0xB8, 0xC1, 0x38, + 0xBC, 0x83, 0x3B, 0xD0, 0x83, 0x3C, 0xBC, 0x3, + 0x3B, 0x98, 0x3, 0x3B, 0xBC, 0xC3, 0x3D, 0xB8, + 0x1, 0x3A, 0xA4, 0x83, 0x3B, 0xD0, 0xC3, 0x3C, + 0xCC, 0x58, 0xDC, 0x80, 0x1C, 0xF0, 0xC1, 0xD, + 0xE0, 0x41, 0x1E, 0xCA, 0x61, 0x1C, 0xD2, 0x61, + 0x1E, 0xCA, 0x1, 0x0, 0x79, 0x28, 0x0, 0x0, 0x52, + 0x0, 0x0, 0x0, 0xC2, 0x3C, 0x90, 0x40, 0x86, 0x10, + 0x19, 0x32, 0xE2, 0x64, 0x90, 0x40, 0x46, 0x2, + 0x19, 0x23, 0x23, 0x46, 0x2, 0x13, 0x24, 0xC6, + 0x0, 0x13, 0x74, 0xCE, 0x61, 0x8C, 0x2D, 0xCC, + 0xED, 0xC, 0xC4, 0xAE, 0x4C, 0x6E, 0x2E, 0xED, + 0xCD, 0xD, 0x44, 0x46, 0xC6, 0x5, 0xC6, 0x5, 0xE6, + 0x2C, 0x8D, 0xE, 0x4, 0xE5, 0x2C, 0x8D, 0xE, 0xE8, + 0x2C, 0x8D, 0xE, 0xAD, 0x4E, 0xCC, 0x65, 0xEC, + 0xAD, 0x4D, 0x87, 0x8D, 0xCD, 0xAE, 0xED, 0x85, + 0x8D, 0xCD, 0xAE, 0xAD, 0x5, 0x4E, 0xEE, 0x4D, + 0xAD, 0x6C, 0x8C, 0xCE, 0xE5, 0x2C, 0x8D, 0xE, + 0xA4, 0xEC, 0xC6, 0x86, 0xA6, 0x2C, 0x26, 0x7, + 0xA6, 0xAC, 0xC, 0x26, 0x26, 0xE7, 0x46, 0x6C, + 0x2C, 0xA6, 0xC, 0x66, 0xA6, 0x6C, 0xC6, 0x4C, + 0x6, 0x86, 0x6C, 0x4C, 0x6, 0x46, 0xCC, 0x66, + 0x6C, 0x2C, 0xC, 0x27, 0x46, 0x6C, 0x86, 0x6C, + 0x2C, 0xE5, 0x8, 0x63, 0x73, 0x87, 0x68, 0xB, + 0x4B, 0x73, 0x3B, 0xCA, 0xDD, 0x18, 0x5A, 0x98, + 0xDC, 0xD7, 0x5C, 0x9A, 0x5E, 0xD9, 0x69, 0xCC, + 0xE4, 0xC2, 0xDA, 0xCA, 0x5A, 0xE0, 0xDE, 0xD2, + 0xDC, 0xE8, 0xCA, 0xE4, 0x86, 0x20, 0x1C, 0xC1, + 0x10, 0x84, 0x43, 0x18, 0x82, 0x70, 0xC, 0x43, + 0x10, 0xE, 0x62, 0x8, 0x42, 0x1, 0xC, 0x41, 0x38, + 0x8A, 0x21, 0x8, 0x87, 0x31, 0x6, 0xC1, 0x38, + 0xC6, 0x10, 0x4, 0x63, 0x18, 0x4, 0x24, 0x19, + 0x83, 0x60, 0x24, 0x63, 0x18, 0xC, 0xC3, 0x18, + 0x83, 0xB0, 0x44, 0x63, 0x28, 0x94, 0x1, 0x0, + 0xA4, 0x31, 0xC, 0x6, 0xB1, 0x8C, 0x61, 0x60, + 0xA, 0xC6, 0x24, 0x64, 0x78, 0x2E, 0x76, 0x61, + 0x6C, 0x76, 0x65, 0x72, 0x43, 0x9, 0x18, 0xA3, + 0xB0, 0xB1, 0xD9, 0xB5, 0xB9, 0xA4, 0x91, 0x95, + 0xB9, 0xD1, 0xD, 0x25, 0x68, 0x8C, 0x43, 0x86, + 0xE7, 0x32, 0x87, 0x16, 0x46, 0x56, 0x26, 0xD7, + 0xF4, 0x46, 0x56, 0xC6, 0x36, 0x94, 0xC0, 0x31, + 0xA, 0x19, 0x9E, 0x8B, 0x5D, 0x99, 0xDC, 0x5C, + 0xDA, 0x9B, 0xDB, 0x50, 0x82, 0xC7, 0x38, 0x64, + 0x78, 0x2E, 0x65, 0x6E, 0x74, 0x72, 0x79, 0x50, + 0x6F, 0x69, 0x6E, 0x74, 0x73, 0x43, 0x9, 0x24, + 0x13, 0xB1, 0xB1, 0xD9, 0xB5, 0xB9, 0xB4, 0xBD, + 0x91, 0xD5, 0xB1, 0x95, 0xB9, 0x98, 0xB1, 0x85, + 0x9D, 0xCD, 0xD, 0x45, 0x98, 0x28, 0x0, 0x71, + 0x20, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x6, 0x40, + 0x30, 0x0, 0xD2, 0x0, 0x0, 0x0, 0x61, 0x20, 0x0, + 0x0, 0x6, 0x0, 0x0, 0x0, 0x13, 0x4, 0x1, 0x86, + 0x3, 0x1, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x7, 0x50, + 0x10, 0xCD, 0x14, 0x61, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0 ] + - Name: PSV0 + Size: 76 + PSVInfo: + Version: 3 + ShaderStage: 5 + MinimumWaveLaneCount: 0 + MaximumWaveLaneCount: 4294967295 + UsesViewID: 0 + SigInputVectors: 0 + SigOutputVectors: [ 0, 0, 0, 0 ] + NumThreadsX: 1 + NumThreadsY: 1 + NumThreadsZ: 1 + EntryName: main + ResourceStride: 24 + Resources: [] + SigInputElements: [] + SigOutputElements: [] + SigPatchOrPrimElements: [] + InputOutputMap: + - [ ] + - [ ] + - [ ] + - [ ] +... diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test index fd1492f..bcffd40 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test @@ -34,6 +34,7 @@ # CHECK-NEXT: { # CHECK-NEXT: ID: 0 # CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: Hash: 0x0 # CHECK-NEXT: Size: 0x1 # CHECK-NEXT: HasReturn: No # CHECK-NEXT: HasTailCall: Yes @@ -50,6 +51,7 @@ # CHECK-NEXT: ID: 2 # CHECK-NEXT: Offset: 0x3 # CHECK-NEXT: Callsite End Offsets: [1, 3] +# CHECK-NEXT: Hash: 0x123 # CHECK-NEXT: Size: 0x7 # CHECK-NEXT: HasReturn: Yes # CHECK-NEXT: HasTailCall: No @@ -144,8 +146,8 @@ Sections: ShSize: [[SIZE=<none>]] Link: .text Entries: - - Version: 3 - Feature: 0x28 + - Version: 4 + Feature: 0x68 BBRanges: - BaseAddress: [[ADDR=0x11111]] BBEntries: @@ -160,6 +162,7 @@ Sections: Size: 0x4 Metadata: 0x15 CallsiteEndOffsets: [ 0x1 , 0x2 ] + Hash: 0x123 - Version: 2 BBRanges: - BaseAddress: 0x22222 diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml index dc14025..7a22efe 100644 --- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml @@ -162,6 +162,92 @@ Sections: BBRanges: - BaseAddress: 0x20 +## Check that obj2yaml can dump basic block hash in the .llvm_bb_addr_map section. + +# RUN: yaml2obj --docnum=4 %s -o %t4 +# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=BBHASH + +# BBHASH: --- !ELF +# BBHASH-NEXT: FileHeader: +# BBHASH-NEXT: Class: ELFCLASS64 +# BBHASH-NEXT: Data: ELFDATA2LSB +# BBHASH-NEXT: Type: ET_EXEC +# BBHASH-NEXT: Sections: +# BBHASH-NEXT: - Name: .llvm_bb_addr_map +# BBHASH-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# BBHASH-NEXT: Entries: +# BBHASH-NEXT: - Version: 4 +# BBHASH-NEXT: Feature: 0x40 +# BBHASH-NEXT: BBRanges: +# BBHASH-NEXT: - BBEntries: +# BBHASH-NEXT: - ID: 0 +# BBHASH-NEXT: AddressOffset: 0x1 +# BBHASH-NEXT: Size: 0x2 +# BBHASH-NEXT: Metadata: 0x3 +# BBHASH-NEXT: Hash: 0x1 +# BBHASH-NEXT: - ID: 2 +# BBHASH-NEXT: AddressOffset: 0x4 +# BBHASH-NEXT: Size: 0x5 +# BBHASH-NEXT: Metadata: 0x6 +# BBHASH-NEXT: Hash: 0x2 +# BBHASH-NEXT: - ID: 4 +# BBHASH-NEXT: AddressOffset: 0xFFFFFFFFFFFFFFF7 +# BBHASH-NEXT: Size: 0xFFFFFFFFFFFFFFF8 +# BBHASH-NEXT: Metadata: 0xFFFFFFFFFFFFFFF9 +# BBHASH-NEXT: Hash: 0x3 +# BBHASH-NEXT: - Version: 4 +# BBHASH-NEXT: Feature: 0x68 +# BBHASH-NEXT: BBRanges: +# BBHASH-NEXT: - BaseAddress: 0xFFFFFFFFFFFFFF20 +# BBHASH-NEXT: BBEntries: +# BBHASH-NEXT: - ID: 6 +# BBHASH-NEXT: AddressOffset: 0xA +# BBHASH-NEXT: Size: 0xB +# BBHASH-NEXT: Metadata: 0xC +# BBHASH-NEXT: CallsiteEndOffsets: [ 0x1, 0x2 ] +# BBHASH-NEXT: Hash: 0x123 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .llvm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 4 + Feature: 0x40 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - ID: 0 + AddressOffset: 0x1 + Size: 0x2 + Metadata: 0x3 + Hash: 0x1 + - ID: 2 + AddressOffset: 0x4 + Size: 0x5 + Metadata: 0x6 + Hash: 0x2 + - ID: 4 + AddressOffset: 0xFFFFFFFFFFFFFFF7 + Size: 0xFFFFFFFFFFFFFFF8 + Metadata: 0xFFFFFFFFFFFFFFF9 + Hash: 0x3 + - Version: 4 + Feature: 0x68 + BBRanges: + - BaseAddress: 0xFFFFFFFFFFFFFF20 + BBEntries: + - ID: 6 + AddressOffset: 0xA + Size: 0xB + Metadata: 0xC + CallsiteEndOffsets: [ 0x1, 0x2 ] + Hash: 0x123 + ## Check that obj2yaml uses the "Content" tag to describe an .llvm_bb_addr_map section ## when it can't extract the entries, for example, when the section is truncated, or ## when an invalid 'NumBlocks' or 'NumBBRanges` field is specified. diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml index 418f90f..339e419 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml @@ -72,6 +72,13 @@ # CHECK-NEXT: 0000: 03202000 00000000 0000010E 01000203 # CHECK-NEXT: ) +# Case 10: Specify basic block hash. +# CHECK: Name: .llvm_bb_addr_map (1) +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 04602000 00000000 0000010E 01000203 +# CHECK-NEXT: 0010: 23010000 00000000 +# CHECK-NEXT: ) + --- !ELF FileHeader: @@ -176,6 +183,22 @@ Sections: Metadata: 0x00000003 CallsiteEndOffsets: [] +## 10) We can produce a SHT_LLVM_BB_ADDR_MAP section with basic block hash. + - Name: '.llvm_bb_addr_map (10)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 4 + Feature: 0x60 + BBRanges: + - BaseAddress: 0x0000000000000020 + BBEntries: + - ID: 14 + AddressOffset: 0x00000001 + Size: 0x00000002 + Metadata: 0x00000003 + CallsiteEndOffsets: [] + Hash: 0x123 + ## Check we can't use Entries at the same time as either Content or Size. # RUN: not yaml2obj --docnum=2 -DCONTENT="00" %s 2>&1 | FileCheck %s --check-prefix=INVALID # RUN: not yaml2obj --docnum=2 -DSIZE="0" %s 2>&1 | FileCheck %s --check-prefix=INVALID @@ -197,7 +220,7 @@ Sections: ## Check that yaml2obj generates a warning when we use unsupported versions. # RUN: yaml2obj --docnum=3 %s 2>&1 | FileCheck %s --check-prefix=INVALID-VERSION -# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 4; encoding using the most recent version +# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 5; encoding using the most recent version --- !ELF FileHeader: @@ -209,4 +232,4 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP Entries: ## Specify unsupported version - - Version: 4 + - Version: 5 diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index de83a0d..4c08b57 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -386,7 +386,9 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile, // Make a DWARF transformer object and populate the ranges of the code // so we don't end up adding invalid functions to GSYM data. - DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites); + bool IsMachO = dyn_cast<object::MachOObjectFile>(&Obj) != nullptr; + + DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites, IsMachO); if (!TextRanges.empty()) Gsym.SetValidTextRanges(TextRanges); diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index ab93316..9c9b2dd 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -8155,6 +8155,8 @@ void LLVMELFDumper<ELFT>::printBBAddrMaps(bool PrettyPGOAnalysis) { W.printHex("Offset", BBE.Offset); if (!BBE.CallsiteEndOffsets.empty()) W.printList("Callsite End Offsets", BBE.CallsiteEndOffsets); + if (PAM.FeatEnable.BBHash) + W.printHex("Hash", BBE.Hash); W.printHex("Size", BBE.Size); W.printBoolean("HasReturn", BBE.hasReturn()); W.printBoolean("HasTailCall", BBE.hasTailCall()); diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index ef4552f..68e18f6 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -900,7 +900,7 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { while (Cur && Cur.tell() < Content.size()) { if (Shdr->sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) { Version = Data.getU8(Cur); - if (Cur && Version > 3) + if (Cur && Version > 4) return createStringError( errc::invalid_argument, "invalid SHT_LLVM_BB_ADDR_MAP section version: " + @@ -946,8 +946,11 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { } uint64_t Size = Data.getULEB128(Cur); uint64_t Metadata = Data.getULEB128(Cur); + std::optional<llvm::yaml::Hex64> Hash; + if (FeatureOrErr->BBHash) + Hash = Data.getU64(Cur); BBEntries.push_back( - {ID, Offset, Size, Metadata, std::move(CallsiteEndOffsets)}); + {ID, Offset, Size, Metadata, std::move(CallsiteEndOffsets), Hash}); } TotalNumBlocks += BBEntries.size(); BBRanges.push_back({BaseAddress, /*NumBlocks=*/{}, BBEntries}); diff --git a/llvm/unittests/ADT/TypeTraitsTest.cpp b/llvm/unittests/ADT/TypeTraitsTest.cpp index a56aa7e..f9b8d6d 100644 --- a/llvm/unittests/ADT/TypeTraitsTest.cpp +++ b/llvm/unittests/ADT/TypeTraitsTest.cpp @@ -40,9 +40,7 @@ struct Foo { struct CheckMethodPointer : CheckFunctionTraits<decltype(&Foo::func)> {}; /// Test lambda references. -LLVM_ATTRIBUTE_UNUSED auto lambdaFunc = [](const int &v) -> bool { - return true; -}; +[[maybe_unused]] auto lambdaFunc = [](const int &v) -> bool { return true; }; struct CheckLambda : CheckFunctionTraits<decltype(lambdaFunc)> {}; } // end anonymous namespace diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp index 33f53de..d560073 100644 --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -4899,3 +4899,189 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) { for (const auto &Line : ExpectedDumpLines) EXPECT_TRUE(DumpStr.find(Line) != std::string::npos); } + +TEST(GSYMTest, TestUnableToLocateDWO) { + // Test that llvm-gsymutil will not produce "uanble to locate DWO file" for + // Apple binaries. Apple uses DW_AT_GNU_dwo_id for non split DWARF purposes + // and this makes llvm-gsymutil create warnings and errors. + // + // 0x0000000b: DW_TAG_compile_unit + // DW_AT_name ("main.cpp") + // DW_AT_language (DW_LANG_C) + // DW_AT_GNU_dwo_id (0xfffffffe) + StringRef yamldata = R"( + debug_str: + - '' + - main.cpp + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_udata + - Attribute: DW_AT_GNU_dwo_id + Form: DW_FORM_data4 + debug_info: + - Length: 0x11 + Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x1 + - Value: 0x2 + - Value: 0xFFFFFFFE + )"; + auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr<DWARFContext> DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + std::string errors; + raw_string_ostream OS(errors); + OutputAggregator OSAgg(&OS); + GsymCreator GC; + // Make a DWARF transformer that is MachO (Apple) to avoid warnings about + // not finding DWO files. + DwarfTransformer DT(*DwarfContext, GC, /*LDCS=*/false, /*MachO*/ true); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OSAgg), Succeeded()); + + // Make sure this warning is not in the binary + std::string warn("warning: Unable to retrieve DWO .debug_info section for"); + EXPECT_TRUE(errors.find(warn) == std::string::npos); +} + +TEST(GSYMTest, TestDWARFTransformNoErrorForMissingFileDecl) { + // Test that if llvm-gsymutil finds a line table for a compile unit and if + // there are no matching entries for a function in that compile unit, that + // it doesn't print out a error saying that a DIE has an invalid file index + // if there is no DW_AT_decl_file attribute. + // + // 0x0000000b: DW_TAG_compile_unit + // DW_AT_name ("main.cpp") + // DW_AT_language (DW_LANG_C) + // DW_AT_stmt_list (0x00000000) + // + // 0x00000015: DW_TAG_subprogram + // DW_AT_name ("foo") + // DW_AT_low_pc (0x0000000000001000) + // DW_AT_high_pc (0x0000000000001050) + // + // 0x0000002a: NULL + // + // Line table that has entries, but none that match "foo": + // + // Address Line Column File ISA Discriminator OpIndex Flags + // ------------------ ------ ------ ------ --- ------------- ------- ----- + // 0x0000000000002000 10 0 1 0 0 0 is_stmt + // 0x0000000000002050 13 0 1 0 0 0 is_stmt + + StringRef yamldata = R"( + debug_str: + - '' + - main.cpp + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_udata + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + - Code: 0x2 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_addr + debug_info: + - Length: 0x27 + Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x1 + - Value: 0x2 + - Value: 0x0 + - AbbrCode: 0x2 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: foo + - Value: 0x1000 + - Value: 0x1050 + - AbbrCode: 0x0 + debug_line: + - Length: 58 + Version: 2 + PrologueLength: 31 + MinInstLength: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + Files: + - Name: main.cpp + DirIdx: 0 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 8192 + - Opcode: DW_LNS_advance_line + SData: 9 + Data: 0 + - Opcode: DW_LNS_copy + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 80 + - Opcode: DW_LNS_advance_line + SData: 3 + Data: 0 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 0 + )"; + auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr<DWARFContext> DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + std::string errors; + raw_string_ostream OS(errors); + OutputAggregator OSAgg(&OS); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OSAgg), Succeeded()); + + // Make sure this warning is not in the binary + std::string error_str("error: function DIE at 0x00000015 has an invalid file " + "index 4294967295 in its DW_AT_decl_file attribute"); + EXPECT_TRUE(errors.find(error_str) == std::string::npos); +} diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp index 54c7ddd..6376165 100644 --- a/llvm/unittests/IR/ConstantsTest.cpp +++ b/llvm/unittests/IR/ConstantsTest.cpp @@ -564,13 +564,17 @@ TEST(ConstantsTest, FoldGlobalVariablePtr) { Global->setAlignment(Align(4)); - ConstantInt *TheConstant(ConstantInt::get(IntType, 2)); + ConstantInt *TheConstant = ConstantInt::get(IntType, 2); - Constant *TheConstantExpr(ConstantExpr::getPtrToInt(Global.get(), IntType)); + Constant *PtrToInt = ConstantExpr::getPtrToInt(Global.get(), IntType); + ASSERT_TRUE( + ConstantFoldBinaryInstruction(Instruction::And, PtrToInt, TheConstant) + ->isNullValue()); - ASSERT_TRUE(ConstantFoldBinaryInstruction(Instruction::And, TheConstantExpr, - TheConstant) - ->isNullValue()); + Constant *PtrToAddr = ConstantExpr::getPtrToAddr(Global.get(), IntType); + ASSERT_TRUE( + ConstantFoldBinaryInstruction(Instruction::And, PtrToAddr, TheConstant) + ->isNullValue()); } // Check that containsUndefOrPoisonElement and containsPoisonElement is working diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index 17d9f50..d6a3ca5 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -531,7 +531,7 @@ Sections: // Check that we can detect unsupported versions. SmallString<128> UnsupportedVersionYamlString(CommonYamlString); UnsupportedVersionYamlString += R"( - - Version: 4 + - Version: 5 BBRanges: - BaseAddress: 0x11111 BBEntries: @@ -543,7 +543,7 @@ Sections: { SCOPED_TRACE("unsupported version"); DoCheck(UnsupportedVersionYamlString, - "unsupported SHT_LLVM_BB_ADDR_MAP version: 4"); + "unsupported SHT_LLVM_BB_ADDR_MAP version: 5"); } SmallString<128> ZeroBBRangesYamlString(CommonYamlString); @@ -761,14 +761,14 @@ Sections: BBAddrMap E1 = { {{0x11111, - {{1, 0x0, 0x3, {false, true, false, false, false}, {0x1, 0x2}}}}}}; + {{1, 0x0, 0x3, {false, true, false, false, false}, {0x1, 0x2}, 0}}}}}; BBAddrMap E2 = { - {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}}}}, - {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}, {}}}}}}; + {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}, + {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}, {}, 0}}}}}; BBAddrMap E3 = { - {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}}}}}}; + {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0}}}}}; BBAddrMap E4 = { - {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}}}}}}; + {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0}}}}}; std::vector<BBAddrMap> Section0BBAddrMaps = {E4}; std::vector<BBAddrMap> Section1BBAddrMaps = {E3}; @@ -988,6 +988,123 @@ Sections: } } +// Test for the ELFObjectFile::readBBAddrMap API with BBHash. +TEST(ELFObjectFileTest, ReadBBHash) { + StringRef CommonYamlString(R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .llvm_bb_addr_map_1 + Type: SHT_LLVM_BB_ADDR_MAP + Link: 1 + Entries: + - Version: 4 + Feature: 0x60 + BBRanges: + - BaseAddress: 0x11111 + BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 + CallsiteEndOffsets: [ 0x1 , 0x1 ] + Hash: 0x1 + - Name: .llvm_bb_addr_map_2 + Type: SHT_LLVM_BB_ADDR_MAP + Link: 1 + Entries: + - Version: 4 + Feature: 0x48 + BBRanges: + - BaseAddress: 0x22222 + BBEntries: + - ID: 2 + AddressOffset: 0x0 + Size: 0x2 + Metadata: 0x4 + Hash: 0x2 + - BaseAddress: 0xFFFFF + BBEntries: + - ID: 15 + AddressOffset: 0xF0 + Size: 0xF1 + Metadata: 0x1F + Hash: 0xF + - Name: .llvm_bb_addr_map_3 + Type: SHT_LLVM_BB_ADDR_MAP + Link: 2 + Entries: + - Version: 4 + Feature: 0x40 + BBRanges: + - BaseAddress: 0x33333 + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x3 + Metadata: 0x6 + Hash: 0x3 + - Name: .llvm_bb_addr_map_4 + Type: SHT_LLVM_BB_ADDR_MAP + # Link: 0 (by default, can be overriden) + Entries: + - Version: 4 + Feature: 0x40 + BBRanges: + - BaseAddress: 0x44444 + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x18 + Hash: 0x4 +)"); + + BBAddrMap E1 = { + {{0x11111, + {{1, 0x0, 0x3, {false, true, false, false, false}, {0x1, 0x2}, 0x1}}}}}; + BBAddrMap E2 = { + {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0x2}}}, + {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}, {}, 0xF}}}}}; + BBAddrMap E3 = { + {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0x3}}}}}; + BBAddrMap E4 = { + {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0x4}}}}}; + + std::vector<BBAddrMap> Section0BBAddrMaps = {E4}; + std::vector<BBAddrMap> Section1BBAddrMaps = {E3}; + std::vector<BBAddrMap> Section2BBAddrMaps = {E1, E2}; + std::vector<BBAddrMap> AllBBAddrMaps = {E1, E2, E3, E4}; + + auto DoCheckSucceeds = [&](StringRef YamlString, + std::optional<unsigned> TextSectionIndex, + std::vector<BBAddrMap> ExpectedResult) { + SCOPED_TRACE("for TextSectionIndex: " + + (TextSectionIndex ? llvm::Twine(*TextSectionIndex) : "{}") + + " and object yaml:\n" + YamlString); + SmallString<0> Storage; + Expected<ELFObjectFile<ELF64LE>> ElfOrErr = + toBinary<ELF64LE>(Storage, YamlString); + ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded()); + + Expected<const typename ELF64LE::Shdr *> BBAddrMapSecOrErr = + ElfOrErr->getELFFile().getSection(1); + ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded()); + auto BBAddrMaps = ElfOrErr->readBBAddrMap(TextSectionIndex); + ASSERT_THAT_EXPECTED(BBAddrMaps, Succeeded()); + EXPECT_EQ(*BBAddrMaps, ExpectedResult); + }; + + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, Section1BBAddrMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section2BBAddrMaps); +} + // Test for the ELFObjectFile::readBBAddrMap API with PGOAnalysisMap. TEST(ELFObjectFileTest, ReadPGOAnalysisMap) { StringRef CommonYamlString(R"( @@ -1159,29 +1276,32 @@ Sections: )"); BBAddrMap E1 = { - {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}, {}}}}}}; - PGOAnalysisMap P1 = {892, {}, {true, false, false, false, false, false}}; + {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}, {}, 0}}}}}; + PGOAnalysisMap P1 = { + 892, {}, {true, false, false, false, false, false, false}}; BBAddrMap E2 = { - {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}}}}}}; + {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}}; PGOAnalysisMap P2 = {{}, {{BlockFrequency(343), {}}}, - {false, true, false, false, false, false}}; - BBAddrMap E3 = {{{0x33333, - {{0, 0x0, 0x3, {false, true, true, false, false}, {}}, - {1, 0x3, 0x3, {false, false, true, false, false}, {}}, - {2, 0x6, 0x3, {false, false, false, false, false}, {}}}}}}; + {false, true, false, false, false, false, false}}; + BBAddrMap E3 = { + {{0x33333, + {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0}, + {1, 0x3, 0x3, {false, false, true, false, false}, {}, 0}, + {2, 0x6, 0x3, {false, false, false, false, false}, {}, 0}}}}}; PGOAnalysisMap P3 = {{}, {{{}, {{1, BranchProbability::getRaw(0x1111'1111)}, {2, BranchProbability::getRaw(0xeeee'eeee)}}}, {{}, {{2, BranchProbability::getRaw(0xffff'ffff)}}}, {{}, {}}}, - {false, false, true, false, false, false}}; - BBAddrMap E4 = {{{0x44444, - {{0, 0x0, 0x4, {false, false, false, true, true}, {}}, - {1, 0x4, 0x4, {false, false, false, false, false}, {}}, - {2, 0x8, 0x4, {false, false, false, false, false}, {}}, - {3, 0xc, 0x4, {false, false, false, false, false}, {}}}}}}; + {false, false, true, false, false, false, false}}; + BBAddrMap E4 = { + {{0x44444, + {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0}, + {1, 0x4, 0x4, {false, false, false, false, false}, {}, 0}, + {2, 0x8, 0x4, {false, false, false, false, false}, {}, 0}, + {3, 0xc, 0x4, {false, false, false, false, false}, {}, 0}}}}}; PGOAnalysisMap P4 = { 1000, {{BlockFrequency(1000), @@ -1193,22 +1313,24 @@ Sections: {3, BranchProbability::getRaw(0xeeee'eeee)}}}, {BlockFrequency(18), {{3, BranchProbability::getRaw(0xffff'ffff)}}}, {BlockFrequency(1000), {}}}, - {true, true, true, false, false, false}}; + {true, true, true, false, false, false, false}}; BBAddrMap E5 = { - {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}, {}}}}}}; - PGOAnalysisMap P5 = {{}, {}, {false, false, false, false, false, false}}; + {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}}; + PGOAnalysisMap P5 = { + {}, {}, {false, false, false, false, false, false, false}}; BBAddrMap E6 = { {{0x66666, - {{0, 0x0, 0x6, {false, true, true, false, false}, {}}, - {1, 0x6, 0x6, {false, false, true, false, false}, {}}}}, - {0x666661, {{2, 0x0, 0x6, {false, false, false, false, false}, {}}}}}}; + {{0, 0x0, 0x6, {false, true, true, false, false}, {}, 0}, + {1, 0x6, 0x6, {false, false, true, false, false}, {}, 0}}}, + {0x666661, + {{2, 0x0, 0x6, {false, false, false, false, false}, {}, 0}}}}}; PGOAnalysisMap P6 = {{}, {{{}, {{1, BranchProbability::getRaw(0x2222'2222)}, {2, BranchProbability::getRaw(0xcccc'cccc)}}}, {{}, {{2, BranchProbability::getRaw(0x8888'8888)}}}, {{}, {}}}, - {false, false, true, true, false, false}}; + {false, false, true, true, false, false, false}}; std::vector<BBAddrMap> Section0BBAddrMaps = {E4, E5, E6}; std::vector<BBAddrMap> Section1BBAddrMaps = {E3}; diff --git a/llvm/unittests/Object/ELFTypesTest.cpp b/llvm/unittests/Object/ELFTypesTest.cpp index f88931b5f..1765e15 100644 --- a/llvm/unittests/Object/ELFTypesTest.cpp +++ b/llvm/unittests/Object/ELFTypesTest.cpp @@ -101,21 +101,22 @@ static_assert( "PGOAnalysisMap should use the same type for basic block ID as BBAddrMap"); TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) { - const std::array<BBAddrMap::Features, 11> Decoded = { - {{false, false, false, false, false, false}, - {true, false, false, false, false, false}, - {false, true, false, false, false, false}, - {false, false, true, false, false, false}, - {false, false, false, true, false, false}, - {true, true, false, false, false, false}, - {false, true, true, false, false, false}, - {false, true, true, true, false, false}, - {true, true, true, true, false, false}, - {false, false, false, false, true, false}, - {false, false, false, false, false, true}}}; - const std::array<uint8_t, 11> Encoded = {{0b0000, 0b0001, 0b0010, 0b0100, - 0b1000, 0b0011, 0b0110, 0b1110, - 0b1111, 0b1'0000, 0b10'0000}}; + const std::array<BBAddrMap::Features, 12> Decoded = { + {{false, false, false, false, false, false, false}, + {true, false, false, false, false, false, false}, + {false, true, false, false, false, false, false}, + {false, false, true, false, false, false, false}, + {false, false, false, true, false, false, false}, + {true, true, false, false, false, false, false}, + {false, true, true, false, false, false, false}, + {false, true, true, true, false, false, false}, + {true, true, true, true, false, false, false}, + {false, false, false, false, true, false, false}, + {false, false, false, false, false, true, false}, + {false, false, false, false, false, false, true}}}; + const std::array<uint8_t, 12> Encoded = { + {0b0000, 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b0110, 0b1110, 0b1111, + 0b1'0000, 0b10'0000, 0b100'0000}}; for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) EXPECT_EQ(Feat.encode(), EncodedVal); for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) { @@ -128,9 +129,9 @@ TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) { TEST(ELFTypesTest, BBAddrMapFeaturesInvalidEncodingTest) { const std::array<std::string, 2> Errors = { - "invalid encoding for BBAddrMap::Features: 0x40", + "invalid encoding for BBAddrMap::Features: 0x80", "invalid encoding for BBAddrMap::Features: 0xf0"}; - const std::array<uint8_t, 2> Values = {{0b100'0000, 0b1111'0000}}; + const std::array<uint8_t, 2> Values = {{0b1000'0000, 0b1111'0000}}; for (const auto &[Val, Error] : llvm::zip(Values, Errors)) { EXPECT_THAT_ERROR(BBAddrMap::Features::decode(Val).takeError(), FailedWithMessage(Error)); diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp index 5ac4c53..809960d 100644 --- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp @@ -228,6 +228,114 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_SUB) { EXPECT_EQ(DAG->ComputeNumSignBits(Op), 2u); } +TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_ADD) { + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto Nneg1 = DAG->getConstant(0xFF, Loc, IntVT); + auto N0 = DAG->getConstant(0x00, Loc, IntVT); + auto N1 = DAG->getConstant(0x01, Loc, IntVT); + auto N5 = DAG->getConstant(0x05, Loc, IntVT); + auto N8 = DAG->getConstant(0x08, Loc, IntVT); + auto Nsign1 = DAG->getConstant(0x55, Loc, IntVT); + auto UnknownOp = DAG->getRegister(0, IntVT); + auto Mask = DAG->getConstant(0x1e, Loc, IntVT); + auto Nsign3 = DAG->getNode(ISD::AND, Loc, IntVT, Mask, UnknownOp); + // RHS early out + // Nsign1 = 01010101 + // Nsign3 = 000????0 + auto OpRhsEo = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign3, Nsign1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpRhsEo), 1u); + + // ADD 0 -1 + // N0 = 00000000 + // Nneg1 = 11111111 + auto OpNegZero = DAG->getNode(ISD::ADD, Loc, IntVT, N0, Nneg1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpNegZero), 8u); + + // ADD 1 -1 + // N1 = 00000001 + // Nneg1 = 11111111 + auto OpNegOne = DAG->getNode(ISD::ADD, Loc, IntVT, N1, Nneg1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpNegOne), 8u); + + // ADD 8 -1 + // N8 = 00001000 + // Nneg1 = 11111111 + auto OpSeven = DAG->getNode(ISD::ADD, Loc, IntVT, N8, Nneg1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpSeven), 5u); + + // Non negative + // Nsign3 = 000????0 + // Nneg1 = 11111111 + auto OpNonNeg = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign3, Nneg1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpNonNeg), 3u); + + // LHS early out + // Nsign1 = 01010101 + // Nsign3 = 000????0 + auto OpLhsEo = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign1, Nsign3); + EXPECT_EQ(DAG->ComputeNumSignBits(OpLhsEo), 1u); + + // Nsign3 = 000????0 + // N5 = 00000101 + auto Op = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign3, N5); + EXPECT_EQ(DAG->ComputeNumSignBits(Op), 2u); +} + +TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_ADDC) { + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto Nneg1 = DAG->getConstant(0xFF, Loc, IntVT); + auto N0 = DAG->getConstant(0x00, Loc, IntVT); + auto N1 = DAG->getConstant(0x01, Loc, IntVT); + auto N5 = DAG->getConstant(0x05, Loc, IntVT); + auto N8 = DAG->getConstant(0x08, Loc, IntVT); + auto Nsign1 = DAG->getConstant(0x55, Loc, IntVT); + auto UnknownOp = DAG->getRegister(0, IntVT); + auto Mask = DAG->getConstant(0x1e, Loc, IntVT); + auto Nsign3 = DAG->getNode(ISD::AND, Loc, IntVT, Mask, UnknownOp); + // RHS early out + // Nsign1 = 01010101 + // Nsign3 = 000????0 + auto OpRhsEo = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign3, Nsign1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpRhsEo), 1u); + + // ADD 0 -1 + // N0 = 00000000 + // Nneg1 = 11111111 + auto OpNegZero = DAG->getNode(ISD::ADDC, Loc, IntVT, N0, Nneg1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpNegZero), 8u); + + // ADD 1 -1 + // N1 = 00000001 + // Nneg1 = 11111111 + auto OpNegOne = DAG->getNode(ISD::ADDC, Loc, IntVT, N1, Nneg1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpNegOne), 8u); + + // ADD 8 -1 + // N8 = 00001000 + // Nneg1 = 11111111 + auto OpSeven = DAG->getNode(ISD::ADDC, Loc, IntVT, N8, Nneg1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpSeven), 4u); + + // Non negative + // Nsign3 = 000????0 + // Nneg1 = 11111111 + auto OpNonNeg = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign3, Nneg1); + EXPECT_EQ(DAG->ComputeNumSignBits(OpNonNeg), 3u); + + // LHS early out + // Nsign1 = 01010101 + // Nsign3 = 000????0 + auto OpLhsEo = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign1, Nsign3); + EXPECT_EQ(DAG->ComputeNumSignBits(OpLhsEo), 1u); + + // Nsign3 = 000????0 + // N5 = 00000101 + auto Op = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign3, N5); + EXPECT_EQ(DAG->ComputeNumSignBits(Op), 2u); +} + TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) { TargetLowering TL(*TM); diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp index b4d816e..3c6ff11 100644 --- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp @@ -266,10 +266,9 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { return; StringRef Lang = DirLang.getName(); + IncludeGuardEmitter IncGuard(OS, (Twine("LLVM_") + Lang + "_INC").str()); - OS << "#ifndef LLVM_" << Lang << "_INC\n"; - OS << "#define LLVM_" << Lang << "_INC\n"; - OS << "\n#include \"llvm/ADT/ArrayRef.h\"\n"; + OS << "#include \"llvm/ADT/ArrayRef.h\"\n"; if (DirLang.hasEnableBitmaskEnumInNamespace()) OS << "#include \"llvm/ADT/BitmaskEnum.h\"\n"; @@ -370,7 +369,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { OS << "};\n"; } LlvmNS.close(); - OS << "#endif // LLVM_" << Lang << "_INC\n"; } // Given a list of spellings (for a given clause/directive), order them diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index 343c2bb71..3f8be5e 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -906,7 +906,6 @@ Transforms/InstCombine/select_frexp.ll Transforms/InstCombine/select.ll Transforms/InstCombine/select-min-max.ll Transforms/InstCombine/select-of-symmetric-selects.ll -Transforms/InstCombine/select-safe-impliedcond-transforms.ll Transforms/InstCombine/select-safe-transforms.ll Transforms/InstCombine/select-select.ll Transforms/InstCombine/select-with-extreme-eq-cond.ll @@ -1237,7 +1236,6 @@ Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll Transforms/PGOProfile/chr-dead-pred.ll Transforms/PGOProfile/chr-dup-threshold.ll Transforms/PGOProfile/chr-lifetimes.ll -Transforms/PGOProfile/chr.ll Transforms/PGOProfile/chr-poison.ll Transforms/PGOProfile/comdat.ll Transforms/PGOProfile/memop_profile_funclet_wasm.ll @@ -1312,17 +1310,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll Transforms/SimpleLoopUnswitch/trivial-unswitch.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll -Transforms/SROA/addrspacecast.ll -Transforms/SROA/phi-and-select.ll -Transforms/SROA/phi-gep.ll -Transforms/SROA/scalable-vectors-with-known-vscale.ll -Transforms/SROA/select-gep.ll -Transforms/SROA/select-load.ll -Transforms/SROA/slice-width.ll -Transforms/SROA/std-clamp.ll -Transforms/SROA/vector-conversion.ll -Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll -Transforms/SROA/vector-promotion.ll Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll Transforms/StructurizeCFG/hoist-zerocost.ll diff --git a/mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h new file mode 100644 index 0000000..72ac247 --- /dev/null +++ b/mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h @@ -0,0 +1,54 @@ +//===- StridedMetadataRange.h - Strided metadata range analysis -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_DATAFLOW_STRIDEDMETADATARANGE_H +#define MLIR_ANALYSIS_DATAFLOW_STRIDEDMETADATARANGE_H + +#include "mlir/Analysis/DataFlow/SparseAnalysis.h" +#include "mlir/Interfaces/InferStridedMetadataInterface.h" + +namespace mlir { +namespace dataflow { + +/// This lattice element represents the strided metadata of an SSA value. +class StridedMetadataRangeLattice : public Lattice<StridedMetadataRange> { +public: + using Lattice::Lattice; +}; + +/// Strided metadata range analysis determines the strided metadata ranges of +/// SSA values using operations that define `InferStridedMetadataInterface`. +/// +/// This analysis depends on DeadCodeAnalysis, SparseConstantPropagation, and +/// IntegerRangeAnalysis, and will be a silent no-op if the analyses are not +/// loaded in the same solver context. +class StridedMetadataRangeAnalysis + : public SparseForwardDataFlowAnalysis<StridedMetadataRangeLattice> { +public: + StridedMetadataRangeAnalysis(DataFlowSolver &solver, + int32_t indexBitwidth = 64); + + /// At an entry point, we cannot reason about strided metadata ranges unless + /// the type also encodes the data. For example, a memref with static layout. + void setToEntryState(StridedMetadataRangeLattice *lattice) override; + + /// Visit an operation. Invoke the transfer function on each operation that + /// implements `InferStridedMetadataInterface`. + LogicalResult + visitOperation(Operation *op, + ArrayRef<const StridedMetadataRangeLattice *> operands, + ArrayRef<StridedMetadataRangeLattice *> results) override; + +private: + /// Index bitwidth to use when operating with the int-ranges. + int32_t indexBitwidth = 64; +}; +} // namespace dataflow +} // end namespace mlir + +#endif // MLIR_ANALYSIS_DATAFLOW_STRIDEDMETADATARANGE_H diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h index 30f33ed..69447f7 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h @@ -17,6 +17,7 @@ #include "mlir/Interfaces/CastInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/InferIntRangeInterface.h" +#include "mlir/Interfaces/InferStridedMetadataInterface.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/MemOpInterfaces.h" #include "mlir/Interfaces/MemorySlotInterfaces.h" diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index 89bd0f1..b39207f 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -14,6 +14,7 @@ include "mlir/Dialect/MemRef/IR/MemRefBase.td" include "mlir/Interfaces/CastInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/InferIntRangeInterface.td" +include "mlir/Interfaces/InferStridedMetadataInterface.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/MemOpInterfaces.td" include "mlir/Interfaces/MemorySlotInterfaces.td" @@ -2085,6 +2086,7 @@ def MemRef_StoreOp : MemRef_Op<"store", def SubViewOp : MemRef_OpWithOffsetSizesAndStrides<"subview", [ DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>, + DeclareOpInterfaceMethods<InferStridedMetadataOpInterface>, DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, DeclareOpInterfaceMethods<ViewLikeOpInterface>, AttrSizedOperandSegments, diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 77e833f..fecf81b 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -1316,6 +1316,24 @@ def OpenACC_PrivateRecipeOp }]; let hasRegionVerifier = 1; + + let extraClassDeclaration = [{ + /// Creates a PrivateRecipeOp and populates its regions based on the + /// variable type as long as the type implements MappableType or + /// PointerLikeType interface. If a type implements both, the MappableType + /// API will be preferred. Returns std::nullopt if the recipe cannot be + /// created or populated. The builder's current insertion point will be used + /// and it must be a valid place for this operation to be inserted. The + /// `recipeName` must be a unique name to prevent "redefinition of symbol" + /// IR errors. + static std::optional<PrivateRecipeOp> createAndPopulate( + ::mlir::OpBuilder &builder, + ::mlir::Location loc, + ::llvm::StringRef recipeName, + ::mlir::Type varType, + ::llvm::StringRef varName = "", + ::mlir::ValueRange bounds = {}); + }]; } //===----------------------------------------------------------------------===// @@ -1410,6 +1428,24 @@ def OpenACC_FirstprivateRecipeOp }]; let hasRegionVerifier = 1; + + let extraClassDeclaration = [{ + /// Creates a FirstprivateRecipeOp and populates its regions based on the + /// variable type as long as the type implements MappableType or + /// PointerLikeType interface. If a type implements both, the MappableType + /// API will be preferred. Returns std::nullopt if the recipe cannot be + /// created or populated. The builder's current insertion point will be used + /// and it must be a valid place for this operation to be inserted. The + /// `recipeName` must be a unique name to prevent "redefinition of symbol" + /// IR errors. + static std::optional<FirstprivateRecipeOp> createAndPopulate( + ::mlir::OpBuilder &builder, + ::mlir::Location loc, + ::llvm::StringRef recipeName, + ::mlir::Type varType, + ::llvm::StringRef varName = "", + ::mlir::ValueRange bounds = {}); + }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td index 0d16255..6736bc8 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td @@ -83,7 +83,15 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> { The `originalVar` parameter is optional but enables support for dynamic types (e.g., dynamic memrefs). When provided, implementations can extract runtime dimension information from the original variable to create - allocations with matching dynamic sizes. + allocations with matching dynamic sizes. When generating recipe bodies, + `originalVar` should be the block argument representing the original + variable in the recipe region. + + The `needsFree` output parameter indicates whether the allocated memory + requires explicit deallocation. Implementations should set this to true + for heap allocations that need a matching deallocation operation (e.g., + alloc) and false for stack-based allocations (e.g., alloca). During + recipe generation, this determines whether a destroy region is created. Returns a Value representing the result of the allocation. If no value is returned, it means the allocation was not successfully generated. @@ -94,7 +102,8 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> { "::mlir::Location":$loc, "::llvm::StringRef":$varName, "::mlir::Type":$varType, - "::mlir::Value":$originalVar), + "::mlir::Value":$originalVar, + "bool &":$needsFree), /*methodBody=*/"", /*defaultImplementation=*/[{ return {}; @@ -102,23 +111,34 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> { >, InterfaceMethod< /*description=*/[{ - Generates deallocation operations for the pointer-like type. It deallocates - the instance provided. + Generates deallocation operations for the pointer-like type. - The `varPtr` parameter is required and must represent an instance that was - previously allocated. If the current type is represented in a way that it - does not capture the pointee type, `varType` must be passed in to provide - the necessary type information. Nothing is generated in case the allocate - is `alloca`-like. + The `varToFree` parameter is required and must represent an instance + that was previously allocated. When generating recipe bodies, this + should be the block argument representing the private variable in the + destroy region. + + The `allocRes` parameter is optional and provides the result of the + corresponding allocation from the init region. This allows implementations + to inspect the allocation operation to determine the appropriate + deallocation strategy. This is necessary because in recipe generation, + the allocation and deallocation occur in separate regions. Dialects that + use only one allocation type or can determine deallocation from type + information alone may ignore this parameter. - Returns true if deallocation was successfully generated or successfully - deemed as not needed to be generated, false otherwise. + The `varType` parameter must be provided if the current type does not + capture the pointee type information. No deallocation is generated for + stack-based allocations (e.g., alloca). + + Returns true if deallocation was successfully generated or determined to + be unnecessary, false otherwise. }], /*retTy=*/"bool", /*methodName=*/"genFree", /*args=*/(ins "::mlir::OpBuilder &":$builder, "::mlir::Location":$loc, - "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr, + "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varToFree, + "::mlir::Value":$allocRes, "::mlir::Type":$varType), /*methodBody=*/"", /*defaultImplementation=*/[{ @@ -274,6 +294,14 @@ def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> { The `initVal` can be empty - it is primarily needed for reductions to ensure the variable is also initialized with appropriate value. + The `needsDestroy` out-parameter is set by implementations to indicate + that destruction code must be generated after the returned private + variable usages, typically in the destroy region of recipe operations + (for example, when heap allocations or temporaries requiring cleanup + are created during initialization). When `needsDestroy` is set, callers + should invoke `generatePrivateDestroy` in the recipe's destroy region + with the privatized value returned by this method. + If the return value is empty, it means that recipe body was not successfully generated. }], @@ -284,12 +312,38 @@ def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> { "::mlir::TypedValue<::mlir::acc::MappableType>":$var, "::llvm::StringRef":$varName, "::mlir::ValueRange":$extents, - "::mlir::Value":$initVal), + "::mlir::Value":$initVal, + "bool &":$needsDestroy), /*methodBody=*/"", /*defaultImplementation=*/[{ return {}; }] >, + InterfaceMethod< + /*description=*/[{ + Generates destruction operations for a privatized value previously + produced by `generatePrivateInit`. This is typically inserted in a + recipe's destroy region, after all uses of the privatized value. + + The `privatized` value is the SSA value yielded by the init region + (and passed as the privatized argument to the destroy region). + Implementations should free heap-allocated storage or perform any + cleanup required for the given type. If no destruction is required, + this function should be a no-op and return `true`. + + Returns true if destruction was successfully generated or deemed not + necessary, false otherwise. + }], + /*retTy=*/"bool", + /*methodName=*/"generatePrivateDestroy", + /*args=*/(ins "::mlir::OpBuilder &":$builder, + "::mlir::Location":$loc, + "::mlir::Value":$privatized), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return true; + }] + >, ]; } diff --git a/mlir/include/mlir/IR/Remarks.h b/mlir/include/mlir/IR/Remarks.h index 20e84ec..9877926 100644 --- a/mlir/include/mlir/IR/Remarks.h +++ b/mlir/include/mlir/IR/Remarks.h @@ -18,7 +18,6 @@ #include "llvm/Remarks/Remark.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Regex.h" -#include <optional> #include "mlir/IR/Diagnostics.h" #include "mlir/IR/MLIRContext.h" @@ -144,7 +143,7 @@ public: llvm::StringRef getCategoryName() const { return categoryName; } - llvm::StringRef getFullCategoryName() const { + llvm::StringRef getCombinedCategoryName() const { if (categoryName.empty() && subCategoryName.empty()) return {}; if (subCategoryName.empty()) @@ -318,7 +317,7 @@ private: }; //===----------------------------------------------------------------------===// -// MLIR Remark Streamer +// Pluggable Remark Utilities //===----------------------------------------------------------------------===// /// Base class for MLIR remark streamers that is used to stream @@ -338,6 +337,26 @@ public: virtual void finalize() {} // optional }; +using ReportFn = llvm::unique_function<void(const Remark &)>; + +/// Base class for MLIR remark emitting policies that is used to emit +/// optimization remarks to the underlying remark streamer. The derived classes +/// should implement the `reportRemark` method to provide the actual emitting +/// implementation. +class RemarkEmittingPolicyBase { +protected: + ReportFn reportImpl; + +public: + RemarkEmittingPolicyBase() = default; + virtual ~RemarkEmittingPolicyBase() = default; + + void initialize(ReportFn fn) { reportImpl = std::move(fn); } + + virtual void reportRemark(const Remark &remark) = 0; + virtual void finalize() = 0; +}; + //===----------------------------------------------------------------------===// // Remark Engine (MLIR Context will own this class) //===----------------------------------------------------------------------===// @@ -355,6 +374,8 @@ private: std::optional<llvm::Regex> failedFilter; /// The MLIR remark streamer that will be used to emit the remarks. std::unique_ptr<MLIRRemarkStreamerBase> remarkStreamer; + /// The MLIR remark policy that will be used to emit the remarks. + std::unique_ptr<RemarkEmittingPolicyBase> remarkEmittingPolicy; /// When is enabled, engine also prints remarks as mlir::emitRemarks. bool printAsEmitRemarks = false; @@ -392,6 +413,8 @@ private: InFlightRemark emitIfEnabled(Location loc, RemarkOpts opts, bool (RemarkEngine::*isEnabled)(StringRef) const); + /// Report a remark. + void reportImpl(const Remark &remark); public: /// Default constructor is deleted, use the other constructor. @@ -407,8 +430,15 @@ public: ~RemarkEngine(); /// Setup the remark engine with the given output path and format. - LogicalResult initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer, - std::string *errMsg); + LogicalResult + initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer, + std::unique_ptr<RemarkEmittingPolicyBase> remarkEmittingPolicy, + std::string *errMsg); + + /// Get the remark emitting policy. + RemarkEmittingPolicyBase *getRemarkEmittingPolicy() const { + return remarkEmittingPolicy.get(); + } /// Report a remark. void report(const Remark &&remark); @@ -446,6 +476,46 @@ inline InFlightRemark withEngine(Fn fn, Location loc, Args &&...args) { namespace mlir::remark { +//===----------------------------------------------------------------------===// +// Remark Emitting Policies +//===----------------------------------------------------------------------===// + +/// Policy that emits all remarks. +class RemarkEmittingPolicyAll : public detail::RemarkEmittingPolicyBase { +public: + RemarkEmittingPolicyAll(); + + void reportRemark(const detail::Remark &remark) override { + assert(reportImpl && "reportImpl is not set"); + reportImpl(remark); + } + void finalize() override {} +}; + +/// Policy that emits final remarks. +class RemarkEmittingPolicyFinal : public detail::RemarkEmittingPolicyBase { +private: + /// user can intercept them for custom processing via a registered callback, + /// otherwise they will be reported on engine destruction. + llvm::DenseSet<detail::Remark> postponedRemarks; + +public: + RemarkEmittingPolicyFinal(); + + void reportRemark(const detail::Remark &remark) override { + postponedRemarks.erase(remark); + postponedRemarks.insert(remark); + } + + void finalize() override { + assert(reportImpl && "reportImpl is not set"); + for (auto &remark : postponedRemarks) { + if (reportImpl) + reportImpl(remark); + } + } +}; + /// Create a Reason with llvm::formatv formatting. template <class... Ts> inline detail::LazyTextBuild reason(const char *fmt, Ts &&...ts) { @@ -505,16 +575,72 @@ inline detail::InFlightRemark analysis(Location loc, RemarkOpts opts) { /// Setup remarks for the context. This function will enable the remark engine /// and set the streamer to be used for optimization remarks. The remark -/// categories are used to filter the remarks that will be emitted by the remark -/// engine. If a category is not specified, it will not be emitted. If +/// categories are used to filter the remarks that will be emitted by the +/// remark engine. If a category is not specified, it will not be emitted. If /// `printAsEmitRemarks` is true, the remarks will be printed as /// mlir::emitRemarks. 'streamer' must inherit from MLIRRemarkStreamerBase and /// will be used to stream the remarks. LogicalResult enableOptimizationRemarks( MLIRContext &ctx, std::unique_ptr<remark::detail::MLIRRemarkStreamerBase> streamer, + std::unique_ptr<remark::detail::RemarkEmittingPolicyBase> + remarkEmittingPolicy, const remark::RemarkCategories &cats, bool printAsEmitRemarks = false); } // namespace mlir::remark +// DenseMapInfo specialization for Remark +namespace llvm { +template <> +struct DenseMapInfo<mlir::remark::detail::Remark> { + static constexpr StringRef kEmptyKey = "<EMPTY_KEY>"; + static constexpr StringRef kTombstoneKey = "<TOMBSTONE_KEY>"; + + /// Helper to provide a static dummy context for sentinel keys. + static mlir::MLIRContext *getStaticDummyContext() { + static mlir::MLIRContext dummyContext; + return &dummyContext; + } + + /// Create an empty remark + static inline mlir::remark::detail::Remark getEmptyKey() { + return mlir::remark::detail::Remark( + mlir::remark::RemarkKind::RemarkUnknown, mlir::DiagnosticSeverity::Note, + mlir::UnknownLoc::get(getStaticDummyContext()), + mlir::remark::RemarkOpts::name(kEmptyKey)); + } + + /// Create a dead remark + static inline mlir::remark::detail::Remark getTombstoneKey() { + return mlir::remark::detail::Remark( + mlir::remark::RemarkKind::RemarkUnknown, mlir::DiagnosticSeverity::Note, + mlir::UnknownLoc::get(getStaticDummyContext()), + mlir::remark::RemarkOpts::name(kTombstoneKey)); + } + + /// Compute the hash value of the remark + static unsigned getHashValue(const mlir::remark::detail::Remark &remark) { + return llvm::hash_combine( + remark.getLocation().getAsOpaquePointer(), + llvm::hash_value(remark.getRemarkName()), + llvm::hash_value(remark.getCombinedCategoryName())); + } + + static bool isEqual(const mlir::remark::detail::Remark &lhs, + const mlir::remark::detail::Remark &rhs) { + // Check for empty/tombstone keys first + if (lhs.getRemarkName() == kEmptyKey || + lhs.getRemarkName() == kTombstoneKey || + rhs.getRemarkName() == kEmptyKey || + rhs.getRemarkName() == kTombstoneKey) { + return lhs.getRemarkName() == rhs.getRemarkName(); + } + + // For regular remarks, compare key identifying fields + return lhs.getLocation() == rhs.getLocation() && + lhs.getRemarkName() == rhs.getRemarkName() && + lhs.getCombinedCategoryName() == rhs.getCombinedCategoryName(); + } +}; +} // namespace llvm #endif // MLIR_IR_REMARKS_H diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt index a5feb59..72ed046 100644 --- a/mlir/include/mlir/Interfaces/CMakeLists.txt +++ b/mlir/include/mlir/Interfaces/CMakeLists.txt @@ -6,6 +6,7 @@ add_mlir_interface(DestinationStyleOpInterface) add_mlir_interface(FunctionInterfaces) add_mlir_interface(IndexingMapOpInterface) add_mlir_interface(InferIntRangeInterface) +add_mlir_interface(InferStridedMetadataInterface) add_mlir_interface(InferTypeOpInterface) add_mlir_interface(LoopLikeInterface) add_mlir_interface(MemOpInterfaces) diff --git a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h index 0e107e8..a6de3d1 100644 --- a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h +++ b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h @@ -117,7 +117,8 @@ public: IntegerValueRange(ConstantIntRanges value) : value(std::move(value)) {} /// Create an integer value range lattice value. - IntegerValueRange(std::optional<ConstantIntRanges> value = std::nullopt) + explicit IntegerValueRange( + std::optional<ConstantIntRanges> value = std::nullopt) : value(std::move(value)) {} /// Whether the range is uninitialized. This happens when the state hasn't @@ -167,6 +168,15 @@ using SetIntRangeFn = using SetIntLatticeFn = llvm::function_ref<void(Value, const IntegerValueRange &)>; +/// Helper callback type to get the integer range of a value. +using GetIntRangeFn = function_ref<IntegerValueRange(Value)>; + +/// Helper function to collect the integer range values of an array of op fold +/// results. +SmallVector<IntegerValueRange> getIntValueRanges(ArrayRef<OpFoldResult> values, + GetIntRangeFn getIntRange, + int32_t indexBitwidth); + class InferIntRangeInterface; namespace intrange::detail { diff --git a/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h new file mode 100644 index 0000000..0c572e0 --- /dev/null +++ b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h @@ -0,0 +1,145 @@ +//===- InferStridedMetadataInterface.h - Strided Metadata Inference -C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions of the strided metadata inference interface +// defined in `InferStridedMetadataInterface.td` +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE_H +#define MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE_H + +#include "mlir/Interfaces/InferIntRangeInterface.h" + +namespace mlir { +/// A class that represents the strided metadata range information, including +/// offsets, sizes, and strides as integer ranges. +class StridedMetadataRange { +public: + /// Default constructor creates uninitialized ranges. + StridedMetadataRange() = default; + + /// Returns a ranked strided metadata range. + static StridedMetadataRange + getRanked(SmallVectorImpl<ConstantIntRanges> &&offsets, + SmallVectorImpl<ConstantIntRanges> &&sizes, + SmallVectorImpl<ConstantIntRanges> &&strides) { + return StridedMetadataRange(std::move(offsets), std::move(sizes), + std::move(strides)); + } + + /// Returns a strided metadata range with maximum ranges. + static StridedMetadataRange getMaxRanges(int32_t indexBitwidth, + int32_t offsetsRank, + int32_t sizeRank, + int32_t stridedRank) { + return StridedMetadataRange( + SmallVector<ConstantIntRanges>( + offsetsRank, ConstantIntRanges::maxRange(indexBitwidth)), + SmallVector<ConstantIntRanges>( + sizeRank, ConstantIntRanges::maxRange(indexBitwidth)), + SmallVector<ConstantIntRanges>( + stridedRank, ConstantIntRanges::maxRange(indexBitwidth))); + } + + static StridedMetadataRange getMaxRanges(int32_t indexBitwidth, + int32_t rank) { + return getMaxRanges(indexBitwidth, 1, rank, rank); + } + + /// Returns whether the metadata is uninitialized. + bool isUninitialized() const { return !offsets.has_value(); } + + /// Get the offsets range. + ArrayRef<ConstantIntRanges> getOffsets() const { + return offsets ? *offsets : ArrayRef<ConstantIntRanges>(); + } + MutableArrayRef<ConstantIntRanges> getOffsets() { + return offsets ? *offsets : MutableArrayRef<ConstantIntRanges>(); + } + + /// Get the sizes ranges. + ArrayRef<ConstantIntRanges> getSizes() const { return sizes; } + MutableArrayRef<ConstantIntRanges> getSizes() { return sizes; } + + /// Get the strides ranges. + ArrayRef<ConstantIntRanges> getStrides() const { return strides; } + MutableArrayRef<ConstantIntRanges> getStrides() { return strides; } + + /// Compare two strided metadata ranges. + bool operator==(const StridedMetadataRange &other) const { + return offsets == other.offsets && sizes == other.sizes && + strides == other.strides; + } + + /// Print the strided metadata range. + void print(raw_ostream &os) const; + + /// Join two strided metadata ranges, by taking the element-wise union of the + /// metadata. + static StridedMetadataRange join(const StridedMetadataRange &lhs, + const StridedMetadataRange &rhs) { + if (lhs.isUninitialized()) + return rhs; + if (rhs.isUninitialized()) + return lhs; + + // Helper fuction to compute the range union of constant ranges. + auto rangeUnion = + +[](const std::tuple<ConstantIntRanges, ConstantIntRanges> &lhsRhs) + -> ConstantIntRanges { + return std::get<0>(lhsRhs).rangeUnion(std::get<1>(lhsRhs)); + }; + + // Get the elementwise range union. Note, that `zip_equal` will assert if + // sizes are not equal. + SmallVector<ConstantIntRanges> offsets = llvm::map_to_vector( + llvm::zip_equal(*lhs.offsets, *rhs.offsets), rangeUnion); + SmallVector<ConstantIntRanges> sizes = + llvm::map_to_vector(llvm::zip_equal(lhs.sizes, rhs.sizes), rangeUnion); + SmallVector<ConstantIntRanges> strides = llvm::map_to_vector( + llvm::zip_equal(lhs.strides, rhs.strides), rangeUnion); + + // Return the joined metadata. + return StridedMetadataRange(std::move(offsets), std::move(sizes), + std::move(strides)); + } + +private: + /// Create a strided metadata range with the given offset, sizes, and strides. + StridedMetadataRange(SmallVectorImpl<ConstantIntRanges> &&offsets, + SmallVectorImpl<ConstantIntRanges> &&sizes, + SmallVectorImpl<ConstantIntRanges> &&strides) + : offsets(std::move(offsets)), sizes(std::move(sizes)), + strides(std::move(strides)) {} + + /// The offsets range. + std::optional<SmallVector<ConstantIntRanges>> offsets; + + /// The sizes ranges. + SmallVector<ConstantIntRanges> sizes; + + /// The strides ranges. + SmallVector<ConstantIntRanges> strides; +}; + +/// Print the strided metadata to `os`. +inline raw_ostream &operator<<(raw_ostream &os, + const StridedMetadataRange &range) { + range.print(os); + return os; +} + +/// Callback function type for setting the strided metadata of a value. +using SetStridedMetadataRangeFn = + function_ref<void(Value, const StridedMetadataRange &)>; +} // end namespace mlir + +#include "mlir/Interfaces/InferStridedMetadataInterface.h.inc" + +#endif // MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE_H diff --git a/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td new file mode 100644 index 0000000..ee5b094 --- /dev/null +++ b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td @@ -0,0 +1,45 @@ +//===- InferStridedMetadataInterface.td - Strided MD Inference ----------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the interface for strided metadata range analysis +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE +#define MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE + +include "mlir/IR/OpBase.td" + +def InferStridedMetadataOpInterface : + OpInterface<"InferStridedMetadataOpInterface"> { + let description = [{ + Allows operations to participate in strided metadata analysis by providing + methods that allow them to specify bounds on offsets, sizes, and strides + of their result(s) given bounds on their input(s) if known. + }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + Infer the strided metadata bounds on the results of this op given + the bounds on its operands. + For each result value or block argument of interest, the method should + call `setMetadata` with that `Value` as an argument. + The `operands` parameter contains the strided metadata ranges for all the + operands of the operation in order. + The `getIntRange` callback is provided for obtaining the int-range + analysis result for a given value. + }], + "void", "inferStridedMetadataRanges", + (ins "::llvm::ArrayRef<::mlir::StridedMetadataRange>":$operands, + "::mlir::GetIntRangeFn":$getIntRange, + "::mlir::SetStridedMetadataRangeFn":$setMetadata, + "int32_t":$indexBitwidth)> + ]; +} +#endif // MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE diff --git a/mlir/include/mlir/Remark/RemarkStreamer.h b/mlir/include/mlir/Remark/RemarkStreamer.h index 170d6b4..19a70fa 100644 --- a/mlir/include/mlir/Remark/RemarkStreamer.h +++ b/mlir/include/mlir/Remark/RemarkStreamer.h @@ -45,6 +45,7 @@ namespace mlir::remark { /// mlir::emitRemarks. LogicalResult enableOptimizationRemarksWithLLVMStreamer( MLIRContext &ctx, StringRef filePath, llvm::remarks::Format fmt, + std::unique_ptr<detail::RemarkEmittingPolicyBase> remarkEmittingPolicy, const RemarkCategories &cat, bool printAsEmitRemarks = false); } // namespace mlir::remark diff --git a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h index 0fbe15f..b739438 100644 --- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h +++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h @@ -44,6 +44,11 @@ enum class RemarkFormat { REMARK_FORMAT_BITSTREAM, }; +enum class RemarkPolicy { + REMARK_POLICY_ALL, + REMARK_POLICY_FINAL, +}; + /// Configuration options for the mlir-opt tool. /// This is intended to help building tools like mlir-opt by collecting the /// supported options. @@ -242,6 +247,8 @@ public: /// Set the reproducer output filename RemarkFormat getRemarkFormat() const { return remarkFormatFlag; } + /// Set the remark policy to use. + RemarkPolicy getRemarkPolicy() const { return remarkPolicyFlag; } /// Set the remark format to use. std::string getRemarksAllFilter() const { return remarksAllFilterFlag; } /// Set the remark output file. @@ -265,6 +272,8 @@ protected: /// Remark format RemarkFormat remarkFormatFlag = RemarkFormat::REMARK_FORMAT_STDOUT; + /// Remark policy + RemarkPolicy remarkPolicyFlag = RemarkPolicy::REMARK_POLICY_ALL; /// Remark file to output to std::string remarksOutputFileFlag = ""; /// Remark filters diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt index 609cb34..db10ebc 100644 --- a/mlir/lib/Analysis/CMakeLists.txt +++ b/mlir/lib/Analysis/CMakeLists.txt @@ -40,6 +40,7 @@ add_mlir_library(MLIRAnalysis DataFlow/IntegerRangeAnalysis.cpp DataFlow/LivenessAnalysis.cpp DataFlow/SparseAnalysis.cpp + DataFlow/StridedMetadataRangeAnalysis.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Analysis @@ -53,6 +54,7 @@ add_mlir_library(MLIRAnalysis MLIRDataLayoutInterfaces MLIRFunctionInterfaces MLIRInferIntRangeInterface + MLIRInferStridedMetadataInterface MLIRInferTypeOpInterface MLIRLoopLikeInterface MLIRPresburger diff --git a/mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp new file mode 100644 index 0000000..01c9daf --- /dev/null +++ b/mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp @@ -0,0 +1,127 @@ +//===- StridedMetadataRangeAnalysis.cpp - Integer range analysis --------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the dataflow analysis class for integer range inference +// which is used in transformations over the `arith` dialect such as +// branch elimination or signed->unsigned rewriting +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h" +#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/DebugStringHelper.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLog.h" + +#define DEBUG_TYPE "strided-metadata-range-analysis" + +using namespace mlir; +using namespace mlir::dataflow; + +/// Get the entry state for a value. For any value that is not a ranked memref, +/// this function sets the metadata to a top state with no offsets, sizes, or +/// strides. For `memref` types, this function will use the metadata in the type +/// to try to deduce as much informaiton as possible. +static StridedMetadataRange getEntryStateImpl(Value v, int32_t indexBitwidth) { + // TODO: generalize this method with a type interface. + auto mTy = dyn_cast<BaseMemRefType>(v.getType()); + + // If not a memref or it's un-ranked, don't infer any metadata. + if (!mTy || !mTy.hasRank()) + return StridedMetadataRange::getMaxRanges(indexBitwidth, 0, 0, 0); + + // Get the top state. + auto metadata = + StridedMetadataRange::getMaxRanges(indexBitwidth, mTy.getRank()); + + // Compute the offset and strides. + int64_t offset; + SmallVector<int64_t> strides; + if (failed(cast<MemRefType>(mTy).getStridesAndOffset(strides, offset))) + return metadata; + + // Refine the metadata if we know it from the type. + if (!ShapedType::isDynamic(offset)) { + metadata.getOffsets()[0] = + ConstantIntRanges::constant(APInt(indexBitwidth, offset)); + } + for (auto &&[size, range] : + llvm::zip_equal(mTy.getShape(), metadata.getSizes())) { + if (ShapedType::isDynamic(size)) + continue; + range = ConstantIntRanges::constant(APInt(indexBitwidth, size)); + } + for (auto &&[stride, range] : + llvm::zip_equal(strides, metadata.getStrides())) { + if (ShapedType::isDynamic(stride)) + continue; + range = ConstantIntRanges::constant(APInt(indexBitwidth, stride)); + } + + return metadata; +} + +StridedMetadataRangeAnalysis::StridedMetadataRangeAnalysis( + DataFlowSolver &solver, int32_t indexBitwidth) + : SparseForwardDataFlowAnalysis(solver), indexBitwidth(indexBitwidth) { + assert(indexBitwidth > 0 && "invalid bitwidth"); +} + +void StridedMetadataRangeAnalysis::setToEntryState( + StridedMetadataRangeLattice *lattice) { + propagateIfChanged(lattice, lattice->join(getEntryStateImpl( + lattice->getAnchor(), indexBitwidth))); +} + +LogicalResult StridedMetadataRangeAnalysis::visitOperation( + Operation *op, ArrayRef<const StridedMetadataRangeLattice *> operands, + ArrayRef<StridedMetadataRangeLattice *> results) { + auto inferrable = dyn_cast<InferStridedMetadataOpInterface>(op); + + // Bail if we cannot reason about the op. + if (!inferrable) { + setAllToEntryStates(results); + return success(); + } + + LDBG() << "Inferring metadata for: " + << OpWithFlags(op, OpPrintingFlags().skipRegions()); + + // Helper function to retrieve int range values. + auto getIntRange = [&](Value value) -> IntegerValueRange { + auto lattice = getOrCreateFor<IntegerValueRangeLattice>( + getProgramPointAfter(op), value); + return lattice ? lattice->getValue() : IntegerValueRange(); + }; + + // Convert the arguments lattices to a vector. + SmallVector<StridedMetadataRange> argRanges = llvm::map_to_vector( + operands, [](const StridedMetadataRangeLattice *lattice) { + return lattice->getValue(); + }); + + // Callback to set metadata on a result. + auto joinCallback = [&](Value v, const StridedMetadataRange &md) { + auto result = cast<OpResult>(v); + assert(llvm::is_contained(op->getResults(), result)); + LDBG() << "- Inferred metadata: " << md; + StridedMetadataRangeLattice *lattice = results[result.getResultNumber()]; + ChangeResult changed = lattice->join(md); + LDBG() << "- Joined metadata: " << lattice->getValue(); + propagateIfChanged(lattice, changed); + }; + + // Infer the metadata. + inferrable.inferStridedMetadataRanges(argRanges, getIntRange, joinCallback, + indexBitwidth); + return success(); +} diff --git a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp index 624519f..70faa71 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp @@ -64,12 +64,13 @@ mlir::bufferization::dropEquivalentBufferResults(ModuleOp module) { module.walk([&](func::CallOp callOp) { if (func::FuncOp calledFunc = dyn_cast_or_null<func::FuncOp>(callOp.resolveCallable())) { - callerMap[calledFunc].insert(callOp); + if (!calledFunc.isPublic() && !calledFunc.isExternal()) + callerMap[calledFunc].insert(callOp); } }); for (auto funcOp : module.getOps<func::FuncOp>()) { - if (funcOp.isExternal()) + if (funcOp.isExternal() || funcOp.isPublic()) continue; func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); // TODO: Support functions with multiple blocks. diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index c477c6c..dcc1ef9 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -315,7 +315,8 @@ bool mlir::linalg::detail::isContractionBody( Value yielded = getSourceSkipUnary(terminator->getOperand(0)); Operation *reductionOp = yielded.getDefiningOp(); - if (reductionOp->getNumResults() != 1 || reductionOp->getNumOperands() != 2) { + if (!reductionOp || reductionOp->getNumResults() != 1 || + reductionOp->getNumOperands() != 2) { errs << "expected reduction op to be binary"; return false; } diff --git a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt index e25a012..1382c7ac 100644 --- a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt @@ -5,7 +5,7 @@ add_mlir_dialect_library(MLIRMemRefDialect ValueBoundsOpInterfaceImpl.cpp ADDITIONAL_HEADER_DIRS - ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRefDialect + ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRef/IR DEPENDS MLIRMemRefOpsIncGen @@ -18,6 +18,7 @@ add_mlir_dialect_library(MLIRMemRefDialect MLIRDialectUtils MLIRInferIntRangeCommon MLIRInferIntRangeInterface + MLIRInferStridedMetadataInterface MLIRInferTypeOpInterface MLIRIR MLIRMemOpInterfaces diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index e9bdcda..507597b 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -3437,6 +3437,65 @@ SubViewOp::bubbleDownCasts(OpBuilder &builder) { return bubbleDownCastsPassthroughOpImpl(*this, builder, getSourceMutable()); } +void SubViewOp::inferStridedMetadataRanges( + ArrayRef<StridedMetadataRange> ranges, GetIntRangeFn getIntRange, + SetStridedMetadataRangeFn setMetadata, int32_t indexBitwidth) { + auto isUninitialized = + +[](IntegerValueRange range) { return range.isUninitialized(); }; + + // Bail early if any of the operands metadata is not ready: + SmallVector<IntegerValueRange> offsetOperands = + getIntValueRanges(getMixedOffsets(), getIntRange, indexBitwidth); + if (llvm::any_of(offsetOperands, isUninitialized)) + return; + + SmallVector<IntegerValueRange> sizeOperands = + getIntValueRanges(getMixedSizes(), getIntRange, indexBitwidth); + if (llvm::any_of(sizeOperands, isUninitialized)) + return; + + SmallVector<IntegerValueRange> stridesOperands = + getIntValueRanges(getMixedStrides(), getIntRange, indexBitwidth); + if (llvm::any_of(stridesOperands, isUninitialized)) + return; + + StridedMetadataRange sourceRange = + ranges[getSourceMutable().getOperandNumber()]; + if (sourceRange.isUninitialized()) + return; + + ArrayRef<ConstantIntRanges> srcStrides = sourceRange.getStrides(); + + // Get the dropped dims. + llvm::SmallBitVector droppedDims = getDroppedDims(); + + // Compute the new offset, strides and sizes. + ConstantIntRanges offset = sourceRange.getOffsets()[0]; + SmallVector<ConstantIntRanges> strides, sizes; + + for (size_t i = 0, e = droppedDims.size(); i < e; ++i) { + bool dropped = droppedDims.test(i); + // Compute the new offset. + ConstantIntRanges off = + intrange::inferMul({offsetOperands[i].getValue(), srcStrides[i]}); + offset = intrange::inferAdd({offset, off}); + + // Skip dropped dimensions. + if (dropped) + continue; + // Multiply the strides. + strides.push_back( + intrange::inferMul({stridesOperands[i].getValue(), srcStrides[i]})); + // Get the sizes. + sizes.push_back(sizeOperands[i].getValue()); + } + + setMetadata(getResult(), + StridedMetadataRange::getRanked( + SmallVector<ConstantIntRanges>({std::move(offset)}), + std::move(sizes), std::move(strides))); +} + //===----------------------------------------------------------------------===// // TransposeOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 6564a4e..642ced9 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -17,6 +17,7 @@ #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/IR/SymbolTable.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/SmallSet.h" @@ -74,14 +75,16 @@ struct MemRefPointerLikeModel } mlir::Value genAllocate(Type pointer, OpBuilder &builder, Location loc, - StringRef varName, Type varType, - Value originalVar) const { + StringRef varName, Type varType, Value originalVar, + bool &needsFree) const { auto memrefTy = cast<MemRefType>(pointer); // Check if this is a static memref (all dimensions are known) - if yes // then we can generate an alloca operation. - if (memrefTy.hasStaticShape()) + if (memrefTy.hasStaticShape()) { + needsFree = false; // alloca doesn't need deallocation return memref::AllocaOp::create(builder, loc, memrefTy).getResult(); + } // For dynamic memrefs, extract sizes from the original variable if // provided. Otherwise they cannot be handled. @@ -99,6 +102,7 @@ struct MemRefPointerLikeModel // Note: We only add dynamic sizes to the dynamicSizes array // Static dimensions are handled automatically by AllocOp } + needsFree = true; // alloc needs deallocation return memref::AllocOp::create(builder, loc, memrefTy, dynamicSizes) .getResult(); } @@ -108,10 +112,14 @@ struct MemRefPointerLikeModel } bool genFree(Type pointer, OpBuilder &builder, Location loc, - TypedValue<PointerLikeType> varPtr, Type varType) const { - if (auto memrefValue = dyn_cast<TypedValue<MemRefType>>(varPtr)) { + TypedValue<PointerLikeType> varToFree, Value allocRes, + Type varType) const { + if (auto memrefValue = dyn_cast<TypedValue<MemRefType>>(varToFree)) { + // Use allocRes if provided to determine the allocation type + Value valueToInspect = allocRes ? allocRes : memrefValue; + // Walk through casts to find the original allocation - Value currentValue = memrefValue; + Value currentValue = valueToInspect; Operation *originalAlloc = nullptr; // Follow the chain of operations to find the original allocation @@ -150,7 +158,7 @@ struct MemRefPointerLikeModel return true; } if (isa<memref::AllocOp>(originalAlloc)) { - // This is an alloc - generate dealloc + // This is an alloc - generate dealloc on varToFree memref::DeallocOp::create(builder, loc, memrefValue); return true; } @@ -1003,6 +1011,142 @@ struct RemoveConstantIfConditionWithRegion : public OpRewritePattern<OpTy> { } }; +//===----------------------------------------------------------------------===// +// Recipe Region Helpers +//===----------------------------------------------------------------------===// + +/// Create and populate an init region for privatization recipes. +/// Returns the init block on success, or nullptr on failure. +/// Sets needsFree to indicate if the allocated memory requires deallocation. +static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc, + Type varType, StringRef varName, + ValueRange bounds, + bool &needsFree) { + // Create init block with arguments: original value + bounds + SmallVector<Type> argTypes{varType}; + SmallVector<Location> argLocs{loc}; + for (Value bound : bounds) { + argTypes.push_back(bound.getType()); + argLocs.push_back(loc); + } + + auto initBlock = std::make_unique<Block>(); + initBlock->addArguments(argTypes, argLocs); + builder.setInsertionPointToStart(initBlock.get()); + + Value privatizedValue; + + // Get the block argument that represents the original variable + Value blockArgVar = initBlock->getArgument(0); + + // Generate init region body based on variable type + if (isa<MappableType>(varType)) { + auto mappableTy = cast<MappableType>(varType); + auto typedVar = cast<TypedValue<MappableType>>(blockArgVar); + privatizedValue = mappableTy.generatePrivateInit( + builder, loc, typedVar, varName, bounds, {}, needsFree); + if (!privatizedValue) + return nullptr; + } else { + assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType"); + auto pointerLikeTy = cast<PointerLikeType>(varType); + // Use PointerLikeType's allocation API with the block argument + privatizedValue = pointerLikeTy.genAllocate(builder, loc, varName, varType, + blockArgVar, needsFree); + if (!privatizedValue) + return nullptr; + } + + // Add yield operation to init block + acc::YieldOp::create(builder, loc, privatizedValue); + + return initBlock; +} + +/// Create and populate a copy region for firstprivate recipes. +/// Returns the copy block on success, or nullptr on failure. +/// TODO: Handle MappableType - it does not yet have a copy API. +static std::unique_ptr<Block> createCopyRegion(OpBuilder &builder, Location loc, + Type varType, + ValueRange bounds) { + // Create copy block with arguments: original value + privatized value + + // bounds + SmallVector<Type> copyArgTypes{varType, varType}; + SmallVector<Location> copyArgLocs{loc, loc}; + for (Value bound : bounds) { + copyArgTypes.push_back(bound.getType()); + copyArgLocs.push_back(loc); + } + + auto copyBlock = std::make_unique<Block>(); + copyBlock->addArguments(copyArgTypes, copyArgLocs); + builder.setInsertionPointToStart(copyBlock.get()); + + bool isMappable = isa<MappableType>(varType); + bool isPointerLike = isa<PointerLikeType>(varType); + // TODO: Handle MappableType - it does not yet have a copy API. + // Otherwise, for now just fallback to pointer-like behavior. + if (isMappable && !isPointerLike) + return nullptr; + + // Generate copy region body based on variable type + if (isPointerLike) { + auto pointerLikeTy = cast<PointerLikeType>(varType); + Value originalArg = copyBlock->getArgument(0); + Value privatizedArg = copyBlock->getArgument(1); + + // Generate copy operation using PointerLikeType interface + if (!pointerLikeTy.genCopy( + builder, loc, cast<TypedValue<PointerLikeType>>(privatizedArg), + cast<TypedValue<PointerLikeType>>(originalArg), varType)) + return nullptr; + } + + // Add terminator to copy block + acc::TerminatorOp::create(builder, loc); + + return copyBlock; +} + +/// Create and populate a destroy region for privatization recipes. +/// Returns the destroy block on success, or nullptr if not needed. +static std::unique_ptr<Block> createDestroyRegion(OpBuilder &builder, + Location loc, Type varType, + Value allocRes, + ValueRange bounds) { + // Create destroy block with arguments: original value + privatized value + + // bounds + SmallVector<Type> destroyArgTypes{varType, varType}; + SmallVector<Location> destroyArgLocs{loc, loc}; + for (Value bound : bounds) { + destroyArgTypes.push_back(bound.getType()); + destroyArgLocs.push_back(loc); + } + + auto destroyBlock = std::make_unique<Block>(); + destroyBlock->addArguments(destroyArgTypes, destroyArgLocs); + builder.setInsertionPointToStart(destroyBlock.get()); + + bool isMappable = isa<MappableType>(varType); + bool isPointerLike = isa<PointerLikeType>(varType); + // TODO: Handle MappableType - it does not yet have a deallocation API. + // Otherwise, for now just fallback to pointer-like behavior. + if (isMappable && !isPointerLike) + return nullptr; + + assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType"); + auto pointerLikeTy = cast<PointerLikeType>(varType); + auto privatizedArg = + cast<TypedValue<PointerLikeType>>(destroyBlock->getArgument(1)); + // Pass allocRes to help determine the allocation type + if (!pointerLikeTy.genFree(builder, loc, privatizedArg, allocRes, varType)) + return nullptr; + + acc::TerminatorOp::create(builder, loc); + + return destroyBlock; +} + } // namespace //===----------------------------------------------------------------------===// @@ -1050,6 +1194,55 @@ LogicalResult acc::PrivateRecipeOp::verifyRegions() { return success(); } +std::optional<PrivateRecipeOp> +PrivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc, + StringRef recipeName, Type varType, + StringRef varName, ValueRange bounds) { + // First, validate that we can handle this variable type + bool isMappable = isa<MappableType>(varType); + bool isPointerLike = isa<PointerLikeType>(varType); + + // Unsupported type + if (!isMappable && !isPointerLike) + return std::nullopt; + + // Create init and destroy blocks using shared helpers + OpBuilder::InsertionGuard guard(builder); + + // Save the original insertion point for creating the recipe operation later + auto originalInsertionPoint = builder.saveInsertionPoint(); + + bool needsFree = false; + auto initBlock = + createInitRegion(builder, loc, varType, varName, bounds, needsFree); + if (!initBlock) + return std::nullopt; + + // Only create destroy region if the allocation needs deallocation + std::unique_ptr<Block> destroyBlock; + if (needsFree) { + // Extract the allocated value from the init block's yield operation + auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator()); + Value allocRes = yieldOp.getOperand(0); + + destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds); + if (!destroyBlock) + return std::nullopt; + } + + // Now create the recipe operation at the original insertion point and attach + // the blocks + builder.restoreInsertionPoint(originalInsertionPoint); + auto recipe = PrivateRecipeOp::create(builder, loc, recipeName, varType); + + // Move the blocks into the recipe's regions + recipe.getInitRegion().push_back(initBlock.release()); + if (destroyBlock) + recipe.getDestroyRegion().push_back(destroyBlock.release()); + + return recipe; +} + //===----------------------------------------------------------------------===// // FirstprivateRecipeOp //===----------------------------------------------------------------------===// @@ -1080,6 +1273,60 @@ LogicalResult acc::FirstprivateRecipeOp::verifyRegions() { return success(); } +std::optional<FirstprivateRecipeOp> +FirstprivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc, + StringRef recipeName, Type varType, + StringRef varName, ValueRange bounds) { + // First, validate that we can handle this variable type + bool isMappable = isa<MappableType>(varType); + bool isPointerLike = isa<PointerLikeType>(varType); + + // Unsupported type + if (!isMappable && !isPointerLike) + return std::nullopt; + + // Create init, copy, and destroy blocks using shared helpers + OpBuilder::InsertionGuard guard(builder); + + // Save the original insertion point for creating the recipe operation later + auto originalInsertionPoint = builder.saveInsertionPoint(); + + bool needsFree = false; + auto initBlock = + createInitRegion(builder, loc, varType, varName, bounds, needsFree); + if (!initBlock) + return std::nullopt; + + auto copyBlock = createCopyRegion(builder, loc, varType, bounds); + if (!copyBlock) + return std::nullopt; + + // Only create destroy region if the allocation needs deallocation + std::unique_ptr<Block> destroyBlock; + if (needsFree) { + // Extract the allocated value from the init block's yield operation + auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator()); + Value allocRes = yieldOp.getOperand(0); + + destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds); + if (!destroyBlock) + return std::nullopt; + } + + // Now create the recipe operation at the original insertion point and attach + // the blocks + builder.restoreInsertionPoint(originalInsertionPoint); + auto recipe = FirstprivateRecipeOp::create(builder, loc, recipeName, varType); + + // Move the blocks into the recipe's regions + recipe.getInitRegion().push_back(initBlock.release()); + recipe.getCopyRegion().push_back(copyBlock.release()); + if (destroyBlock) + recipe.getDestroyRegion().push_back(destroyBlock.release()); + + return recipe; +} + //===----------------------------------------------------------------------===// // ReductionRecipeOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 1fa04ed..89b81cf 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -121,6 +121,11 @@ namespace mlir { class MLIRContextImpl { public: //===--------------------------------------------------------------------===// + // Remark + //===--------------------------------------------------------------------===// + std::unique_ptr<remark::detail::RemarkEngine> remarkEngine; + + //===--------------------------------------------------------------------===// // Debugging //===--------------------------------------------------------------------===// @@ -135,11 +140,6 @@ public: DiagnosticEngine diagEngine; //===--------------------------------------------------------------------===// - // Remark - //===--------------------------------------------------------------------===// - std::unique_ptr<remark::detail::RemarkEngine> remarkEngine; - - //===--------------------------------------------------------------------===// // Options //===--------------------------------------------------------------------===// @@ -357,7 +357,10 @@ MLIRContext::MLIRContext(const DialectRegistry ®istry, Threading setting) impl->affineUniquer.registerParametricStorageType<IntegerSetStorage>(); } -MLIRContext::~MLIRContext() = default; +MLIRContext::~MLIRContext() { + // finalize remark engine before destroying anything else. + impl->remarkEngine.reset(); +} /// Copy the specified array of elements into memory managed by the provided /// bump pointer allocator. This assumes the elements are all PODs. diff --git a/mlir/lib/IR/Remarks.cpp b/mlir/lib/IR/Remarks.cpp index a55f61a..031eae2 100644 --- a/mlir/lib/IR/Remarks.cpp +++ b/mlir/lib/IR/Remarks.cpp @@ -16,7 +16,7 @@ #include "llvm/ADT/StringRef.h" using namespace mlir::remark::detail; - +using namespace mlir::remark; //------------------------------------------------------------------------------ // Remark //------------------------------------------------------------------------------ @@ -70,7 +70,7 @@ static void printArgs(llvm::raw_ostream &os, llvm::ArrayRef<Remark::Arg> args) { void Remark::print(llvm::raw_ostream &os, bool printLocation) const { // Header: [Type] pass:remarkName StringRef type = getRemarkTypeString(); - StringRef categoryName = getFullCategoryName(); + StringRef categoryName = getCombinedCategoryName(); StringRef name = remarkName; os << '[' << type << "] "; @@ -81,9 +81,10 @@ void Remark::print(llvm::raw_ostream &os, bool printLocation) const { os << "Function=" << getFunction() << " | "; if (printLocation) { - if (auto flc = mlir::dyn_cast<mlir::FileLineColLoc>(getLocation())) + if (auto flc = mlir::dyn_cast<mlir::FileLineColLoc>(getLocation())) { os << " @" << flc.getFilename() << ":" << flc.getLine() << ":" << flc.getColumn(); + } } printArgs(os, getArgs()); @@ -140,7 +141,7 @@ llvm::remarks::Remark Remark::generateRemark() const { r.RemarkType = getRemarkType(); r.RemarkName = getRemarkName(); // MLIR does not use passes; instead, it has categories and sub-categories. - r.PassName = getFullCategoryName(); + r.PassName = getCombinedCategoryName(); r.FunctionName = getFunction(); r.Loc = locLambda(); for (const Remark::Arg &arg : getArgs()) { @@ -225,26 +226,42 @@ InFlightRemark RemarkEngine::emitOptimizationRemarkAnalysis(Location loc, // RemarkEngine //===----------------------------------------------------------------------===// -void RemarkEngine::report(const Remark &&remark) { +void RemarkEngine::reportImpl(const Remark &remark) { // Stream the remark - if (remarkStreamer) + if (remarkStreamer) { remarkStreamer->streamOptimizationRemark(remark); + } // Print using MLIR's diagnostic if (printAsEmitRemarks) emitRemark(remark.getLocation(), remark.getMsg()); } +void RemarkEngine::report(const Remark &&remark) { + if (remarkEmittingPolicy) + remarkEmittingPolicy->reportRemark(remark); +} + RemarkEngine::~RemarkEngine() { + if (remarkEmittingPolicy) + remarkEmittingPolicy->finalize(); + if (remarkStreamer) remarkStreamer->finalize(); } -llvm::LogicalResult -RemarkEngine::initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer, - std::string *errMsg) { - // If you need to validate categories/filters, do so here and set errMsg. +llvm::LogicalResult RemarkEngine::initialize( + std::unique_ptr<MLIRRemarkStreamerBase> streamer, + std::unique_ptr<RemarkEmittingPolicyBase> remarkEmittingPolicy, + std::string *errMsg) { + remarkStreamer = std::move(streamer); + + auto reportFunc = + std::bind(&RemarkEngine::reportImpl, this, std::placeholders::_1); + remarkEmittingPolicy->initialize(ReportFn(std::move(reportFunc))); + + this->remarkEmittingPolicy = std::move(remarkEmittingPolicy); return success(); } @@ -301,14 +318,15 @@ RemarkEngine::RemarkEngine(bool printAsEmitRemarks, } llvm::LogicalResult mlir::remark::enableOptimizationRemarks( - MLIRContext &ctx, - std::unique_ptr<remark::detail::MLIRRemarkStreamerBase> streamer, - const remark::RemarkCategories &cats, bool printAsEmitRemarks) { + MLIRContext &ctx, std::unique_ptr<detail::MLIRRemarkStreamerBase> streamer, + std::unique_ptr<detail::RemarkEmittingPolicyBase> remarkEmittingPolicy, + const RemarkCategories &cats, bool printAsEmitRemarks) { auto engine = - std::make_unique<remark::detail::RemarkEngine>(printAsEmitRemarks, cats); + std::make_unique<detail::RemarkEngine>(printAsEmitRemarks, cats); std::string errMsg; - if (failed(engine->initialize(std::move(streamer), &errMsg))) { + if (failed(engine->initialize(std::move(streamer), + std::move(remarkEmittingPolicy), &errMsg))) { llvm::report_fatal_error( llvm::Twine("Failed to initialize remark engine. Error: ") + errMsg); } @@ -316,3 +334,12 @@ llvm::LogicalResult mlir::remark::enableOptimizationRemarks( return success(); } + +//===----------------------------------------------------------------------===// +// Remark emitting policies +//===----------------------------------------------------------------------===// + +namespace mlir::remark { +RemarkEmittingPolicyAll::RemarkEmittingPolicyAll() = default; +RemarkEmittingPolicyFinal::RemarkEmittingPolicyFinal() = default; +} // namespace mlir::remark diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt index 388de1c..f96af02 100644 --- a/mlir/lib/Interfaces/CMakeLists.txt +++ b/mlir/lib/Interfaces/CMakeLists.txt @@ -9,6 +9,7 @@ set(LLVM_OPTIONAL_SOURCES FunctionInterfaces.cpp IndexingMapOpInterface.cpp InferIntRangeInterface.cpp + InferStridedMetadataInterface.cpp InferTypeOpInterface.cpp LoopLikeInterface.cpp MemOpInterfaces.cpp @@ -64,6 +65,21 @@ add_mlir_library(MLIRFunctionInterfaces add_mlir_interface_library(IndexingMapOpInterface) add_mlir_interface_library(InferIntRangeInterface) + +add_mlir_library(MLIRInferStridedMetadataInterface + InferStridedMetadataInterface.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces + + DEPENDS + MLIRInferStridedMetadataInterfaceIncGen + + LINK_LIBS PUBLIC + MLIRInferIntRangeInterface + MLIRIR +) + add_mlir_interface_library(InferTypeOpInterface) add_mlir_library(MLIRLoopLikeInterface diff --git a/mlir/lib/Interfaces/InferIntRangeInterface.cpp b/mlir/lib/Interfaces/InferIntRangeInterface.cpp index 9f3e97d..84fc9b8 100644 --- a/mlir/lib/Interfaces/InferIntRangeInterface.cpp +++ b/mlir/lib/Interfaces/InferIntRangeInterface.cpp @@ -146,6 +146,25 @@ raw_ostream &mlir::operator<<(raw_ostream &os, const IntegerValueRange &range) { return os; } +SmallVector<IntegerValueRange> +mlir::getIntValueRanges(ArrayRef<OpFoldResult> values, + GetIntRangeFn getIntRange, int32_t indexBitwidth) { + SmallVector<IntegerValueRange> ranges; + ranges.reserve(values.size()); + for (OpFoldResult ofr : values) { + if (auto value = dyn_cast<Value>(ofr)) { + ranges.push_back(getIntRange(value)); + continue; + } + + // Create a constant range. + auto attr = cast<IntegerAttr>(cast<Attribute>(ofr)); + ranges.emplace_back(ConstantIntRanges::constant( + attr.getValue().sextOrTrunc(indexBitwidth))); + } + return ranges; +} + void mlir::intrange::detail::defaultInferResultRanges( InferIntRangeInterface interface, ArrayRef<IntegerValueRange> argRanges, SetIntLatticeFn setResultRanges) { diff --git a/mlir/lib/Interfaces/InferStridedMetadataInterface.cpp b/mlir/lib/Interfaces/InferStridedMetadataInterface.cpp new file mode 100644 index 0000000..483e9f1 --- /dev/null +++ b/mlir/lib/Interfaces/InferStridedMetadataInterface.cpp @@ -0,0 +1,36 @@ +//===- InferStridedMetadataInterface.cpp - Strided md inference interface -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Interfaces/InferStridedMetadataInterface.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/TypeUtilities.h" +#include <optional> + +using namespace mlir; + +#include "mlir/Interfaces/InferStridedMetadataInterface.cpp.inc" + +void StridedMetadataRange::print(raw_ostream &os) const { + if (isUninitialized()) { + os << "strided_metadata<None>"; + return; + } + os << "strided_metadata<offset = ["; + llvm::interleaveComma(*offsets, os, [&](const ConstantIntRanges &range) { + os << "{" << range << "}"; + }); + os << "], sizes = ["; + llvm::interleaveComma(sizes, os, [&](const ConstantIntRanges &range) { + os << "{" << range << "}"; + }); + os << "], strides = ["; + llvm::interleaveComma(strides, os, [&](const ConstantIntRanges &range) { + os << "{" << range << "}"; + }); + os << "]>"; +} diff --git a/mlir/lib/Remark/RemarkStreamer.cpp b/mlir/lib/Remark/RemarkStreamer.cpp index d213a1a..bf36286 100644 --- a/mlir/lib/Remark/RemarkStreamer.cpp +++ b/mlir/lib/Remark/RemarkStreamer.cpp @@ -60,6 +60,7 @@ void LLVMRemarkStreamer::finalize() { namespace mlir::remark { LogicalResult enableOptimizationRemarksWithLLVMStreamer( MLIRContext &ctx, StringRef path, llvm::remarks::Format fmt, + std::unique_ptr<detail::RemarkEmittingPolicyBase> remarkEmittingPolicy, const RemarkCategories &cat, bool printAsEmitRemarks) { FailureOr<std::unique_ptr<detail::MLIRRemarkStreamerBase>> sOr = @@ -67,7 +68,8 @@ LogicalResult enableOptimizationRemarksWithLLVMStreamer( if (failed(sOr)) return failure(); - return remark::enableOptimizationRemarks(ctx, std::move(*sOr), cat, + return remark::enableOptimizationRemarks(ctx, std::move(*sOr), + std::move(remarkEmittingPolicy), cat, printAsEmitRemarks); } diff --git a/mlir/lib/Target/Wasm/TranslateFromWasm.cpp b/mlir/lib/Target/Wasm/TranslateFromWasm.cpp index 132be4e..51c6077 100644 --- a/mlir/lib/Target/Wasm/TranslateFromWasm.cpp +++ b/mlir/lib/Target/Wasm/TranslateFromWasm.cpp @@ -956,7 +956,7 @@ inline parsed_inst_t ExpressionParser::buildNumericOp( << ", type = " << ty << " ***"; auto tysToPop = SmallVector<Type, numOperands>(); tysToPop.resize(numOperands); - std::fill(tysToPop.begin(), tysToPop.end(), ty); + llvm::fill(tysToPop, ty); auto operands = popOperands(tysToPop); if (failed(operands)) return failure(); diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp index 30fd384..9ef405d 100644 --- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp +++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp @@ -37,6 +37,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/LogicalResult.h" #include "llvm/Support/ManagedStatic.h" @@ -226,6 +227,18 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig { "bitstream", "Print bitstream file")), llvm::cl::cat(remarkCategory)}; + static llvm::cl::opt<RemarkPolicy, /*ExternalStorage=*/true> remarkPolicy{ + "remark-policy", + llvm::cl::desc("Specify the policy for remark output."), + cl::location(remarkPolicyFlag), + llvm::cl::value_desc("format"), + llvm::cl::init(RemarkPolicy::REMARK_POLICY_ALL), + llvm::cl::values(clEnumValN(RemarkPolicy::REMARK_POLICY_ALL, "all", + "Print all remarks"), + clEnumValN(RemarkPolicy::REMARK_POLICY_FINAL, "final", + "Print final remarks")), + llvm::cl::cat(remarkCategory)}; + static cl::opt<std::string, /*ExternalStorage=*/true> remarksAll( "remarks-filter", cl::desc("Show all remarks: passed, missed, failed, analysis"), @@ -517,18 +530,28 @@ performActions(raw_ostream &os, return failure(); context->enableMultithreading(wasThreadingEnabled); - + // Set the remark categories and policy. remark::RemarkCategories cats{ config.getRemarksAllFilter(), config.getRemarksPassedFilter(), config.getRemarksMissedFilter(), config.getRemarksAnalyseFilter(), config.getRemarksFailedFilter()}; mlir::MLIRContext &ctx = *context; + // Helper to create the appropriate policy based on configuration + auto createPolicy = [&config]() + -> std::unique_ptr<mlir::remark::detail::RemarkEmittingPolicyBase> { + if (config.getRemarkPolicy() == RemarkPolicy::REMARK_POLICY_ALL) + return std::make_unique<mlir::remark::RemarkEmittingPolicyAll>(); + if (config.getRemarkPolicy() == RemarkPolicy::REMARK_POLICY_FINAL) + return std::make_unique<mlir::remark::RemarkEmittingPolicyFinal>(); + + llvm_unreachable("Invalid remark policy"); + }; switch (config.getRemarkFormat()) { case RemarkFormat::REMARK_FORMAT_STDOUT: if (failed(mlir::remark::enableOptimizationRemarks( - ctx, nullptr, cats, true /*printAsEmitRemarks*/))) + ctx, nullptr, createPolicy(), cats, true /*printAsEmitRemarks*/))) return failure(); break; @@ -537,7 +560,7 @@ performActions(raw_ostream &os, ? "mlir-remarks.yaml" : config.getRemarksOutputFile(); if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer( - ctx, file, llvm::remarks::Format::YAML, cats))) + ctx, file, llvm::remarks::Format::YAML, createPolicy(), cats))) return failure(); break; } @@ -547,7 +570,7 @@ performActions(raw_ostream &os, ? "mlir-remarks.bitstream" : config.getRemarksOutputFile(); if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer( - ctx, file, llvm::remarks::Format::Bitstream, cats))) + ctx, file, llvm::remarks::Format::Bitstream, createPolicy(), cats))) return failure(); break; } @@ -593,6 +616,12 @@ performActions(raw_ostream &os, AsmState asmState(op.get(), OpPrintingFlags(), /*locationMap=*/nullptr, &fallbackResourceMap); os << OpWithState(op.get(), asmState) << '\n'; + + // This is required if the remark policy is final. Otherwise, the remarks are + // not emitted. + if (remark::detail::RemarkEngine *engine = ctx.getRemarkEngine()) + engine->getRemarkEmittingPolicy()->finalize(); + return success(); } diff --git a/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp b/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp index 111f58e..5f3b04a 100644 --- a/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp @@ -66,7 +66,9 @@ size_t mlir::moveLoopInvariantCode( size_t numMoved = 0; for (Region *region : regions) { - LDBG() << "Original loop:\n" << *region->getParentOp(); + LDBG() << "Original loop:\n" + << OpWithFlags(region->getParentOp(), + OpPrintingFlags().skipRegions()); std::queue<Operation *> worklist; // Add top-level operations in the loop body to the worklist. @@ -90,7 +92,8 @@ size_t mlir::moveLoopInvariantCode( !canBeHoisted(op, definedOutside)) continue; - LDBG() << "Moving loop-invariant op: " << *op; + LDBG() << "Moving loop-invariant op: " + << OpWithFlags(op, OpPrintingFlags().skipRegions()); moveOutOfRegion(op, region); ++numMoved; @@ -111,9 +114,7 @@ size_t mlir::moveLoopInvariantCode(LoopLikeOpInterface loopLike) { [&](Value value, Region *) { return loopLike.isDefinedOutsideOfLoop(value); }, - [&](Operation *op, Region *) { - return isMemoryEffectFree(op) && isSpeculatable(op); - }, + [&](Operation *op, Region *) { return isPure(op); }, [&](Operation *op, Region *) { loopLike.moveOutOfLoop(op); }); } diff --git a/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir new file mode 100644 index 0000000..808c1c2 --- /dev/null +++ b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir @@ -0,0 +1,67 @@ +// RUN: mlir-opt -test-strided-metadata-range-analysis %s 2>&1 | FileCheck %s + +func.func @memref_subview(%arg0: memref<8x16x4xf32, strided<[64, 4, 1]>>, %arg1: memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>>, %arg2: memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>>, %arg3: index, %arg4: index, %arg5: index) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %0 = test.with_bounds {smax = 13 : index, smin = 11 : index, umax = 13 : index, umin = 11 : index} : index + %1 = test.with_bounds {smax = 7 : index, smin = 5 : index, umax = 7 : index, umin = 5 : index} : index + + // Test subview with unknown sizes, and constant offsets and strides. + // CHECK: Op: %[[SV0:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [1, 1] signed : [1, 1]}] + // CHECK-SAME: sizes = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + // CHECK-SAME: strides = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [4, 4] signed : [4, 4]}, {unsigned : [1, 1] signed : [1, 1]}] + %subview = memref.subview %arg0[%c0, %c0, %c1] [%arg3, %arg4, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[64, 4, 1]>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> + + // Test a subview of a subview, with bounded dynamic offsets. + // CHECK: Op: %[[SV1:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [346, 484] signed : [346, 484]}] + // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}] + // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}] + %subview_0 = memref.subview %subview[%1, %1, %1] [%c2, %c2, %c2] [%0, %0, %0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> + + // Test a subview of a subview, with constant operands. + // CHECK: Op: %[[SV2:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [368, 510] signed : [368, 510]}] + // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}] + // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}] + %subview_1 = memref.subview %subview_0[%c0, %c0, %c2] [%c2, %c2, %c2] [%c1, %c1, %c1] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> + + // Test a rank-reducing subview. + // CHECK: Op: %[[SV3:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + // CHECK-SAME: sizes = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [16, 16] signed : [16, 16]}] + // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + %subview_2 = memref.subview %arg1[%arg4, %arg4, %arg4, %arg4, %arg4] [1, 64, 1, 16, 1] [%arg5, %arg5, %arg5, %arg5, %arg5] : memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>> to memref<64x16xf32, strided<[?, ?], offset: ?>> + + // Test a subview of a rank-reducing subview + // CHECK: Op: %[[SV4:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + // CHECK-SAME: sizes = [{unsigned : [5, 7] signed : [5, 7]}] + // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + %subview_3 = memref.subview %subview_2[%c0, %0] [1, %1] [%c1, %c2] : memref<64x16xf32, strided<[?, ?], offset: ?>> to memref<?xf32, strided<[?], offset: ?>> + + // Test a subview with mixed bounded and unbound dynamic sizes. + // CHECK: Op: %[[SV5:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [32, 32] signed : [32, 32]}] + // CHECK-SAME: sizes = [{unsigned : [11, 13] signed : [11, 13]}, {unsigned : [5, 7] signed : [5, 7]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + // CHECK-SAME: strides = [{unsigned : [1, 1] signed : [1, 1]}, {unsigned : [64, 64] signed : [64, 64]}, {unsigned : [8, 8] signed : [8, 8]}] + %subview_4 = memref.subview %arg2[%c0, %c0, %c2] [%0, %1, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> + return +} + +// CHECK: func.func @memref_subview +// CHECK: %[[A0:.*]]: memref<8x16x4xf32, strided<[64, 4, 1]>> +// CHECK: %[[SV0]] = memref.subview %[[A0]] +// CHECK-NEXT: %[[SV1]] = memref.subview +// CHECK-NEXT: %[[SV2]] = memref.subview +// CHECK-NEXT: %[[SV3]] = memref.subview +// CHECK-NEXT: %[[SV4]] = memref.subview +// CHECK-NEXT: %[[SV5]] = memref.subview diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir index c58b153..21b508e 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir @@ -65,13 +65,13 @@ func.func @main(%t: tensor<?xf32>, %sz: index, %idx: index) -> (f32, f32) { // ----- -func.func @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> { +func.func private @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> { func.return %A : tensor<?xf32> } -// CHECK-LABEL: func @return_arg +// CHECK-LABEL: func private @return_arg // CHECK-SAME: %[[A:.*]]: memref<?xf32 // CHECK-NOT: return %[[A]] -// NO-DROP-LABEL: func @return_arg +// NO-DROP-LABEL: func private @return_arg // NO-DROP-SAME: %[[A:.*]]: memref<?xf32 // NO-DROP: return %[[A]] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index 6054a61..d5f834b 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -171,9 +171,9 @@ func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { // Bufferization of a function that is reading and writing. %t0 is writable, so // no copy should be inserted. -// CHECK-LABEL: func @inner_func( +// CHECK-LABEL: func private @inner_func( // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 -func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { +func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { // CHECK-NOT: copy %f = arith.constant 1.0 : f32 %c0 = arith.constant 0 : index @@ -186,9 +186,9 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { return %0, %1 : tensor<?xf32>, f32 } -// CHECK-LABEL: func @call_func_with_non_tensor_return( +// CHECK-LABEL: func private @call_func_with_non_tensor_return( // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 -func.func @call_func_with_non_tensor_return( +func.func private @call_func_with_non_tensor_return( %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) { // CHECK-NOT: alloc // CHECK-NOT: copy @@ -203,9 +203,9 @@ func.func @call_func_with_non_tensor_return( // Bufferization of a function that is reading and writing. %t0 is not writable, // so a copy is needed. -// CHECK-LABEL: func @inner_func( +// CHECK-LABEL: func private @inner_func( // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 -func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { +func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { // CHECK-NOT: copy %f = arith.constant 1.0 : f32 %c0 = arith.constant 0 : index @@ -276,10 +276,10 @@ func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) { // This function does not read, just write. We need an alloc, but no copy. -// CHECK-LABEL: func @does_not_read( +// CHECK-LABEL: func private @does_not_read( // CHECK-NOT: alloc // CHECK-NOT: copy -func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> { +func.func private @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> { %f0 = arith.constant 0.0 : f32 %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32> return %r : tensor<?xf32> @@ -354,9 +354,9 @@ func.func @main() { // A write inside an scf.execute_region. An equivalent tensor is yielded. -// CHECK-LABEL: func @execute_region_test( +// CHECK-LABEL: func private @execute_region_test( // CHECK-SAME: %[[m1:.*]]: memref<?xf32 -func.func @execute_region_test(%t1 : tensor<?xf32>) +func.func private @execute_region_test(%t1 : tensor<?xf32>) -> (f32, tensor<?xf32>, f32) { %f1 = arith.constant 0.0 : f32 @@ -397,11 +397,11 @@ func.func @no_inline_execute_region_not_canonicalized() { // CHECK: func private @some_external_func(memref<?xf32, strided<[?], offset: ?>>) func.func private @some_external_func(tensor<?xf32>) -// CHECK: func @scf_for_with_tensor_insert_slice( +// CHECK: func private @scf_for_with_tensor_insert_slice( // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>> -func.func @scf_for_with_tensor_insert_slice( +func.func private @scf_for_with_tensor_insert_slice( %A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>, %lb : index, %ub : index, %step : index) -> (tensor<?xf32>, tensor<?xf32>) @@ -456,11 +456,11 @@ func.func @bar( // ----- -// CHECK: func @init_and_dot( +// CHECK: func private @init_and_dot( // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<f32, strided<[], offset: ?>> -func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> { +func.func private @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> { // CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32 %v0 = arith.constant 0.0 : f32 @@ -574,9 +574,9 @@ func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i // No alloc or copy inside of the loop. -// CHECK-LABEL: func @inner_func( +// CHECK-LABEL: func private @inner_func( // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 -func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> { +func.func private @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> { %f = arith.constant 1.0 : f32 %c0 = arith.constant 0 : index // CHECK: memref.store %{{.*}}, %[[arg0]] diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir index 618ba34..66cae5c 100644 --- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir +++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir @@ -1011,6 +1011,20 @@ module attributes { transform.target_tag = "start_here" } { } -> tensor<1x1x4xf32> return } + + func.func @generic_none(%arg0: tensor<128x128xi32>, %arg1: tensor<128x128xi32>, %arg2: tensor<128x128xi32>) { + %0 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, + affine_map<(d0, d1, d2) -> (d2, d1)>, + affine_map<(d0, d1, d2) -> (d0, d1)>], + iterator_types = ["parallel", "parallel", "reduction"]} + ins(%arg0, %arg1 : tensor<128x128xi32>, tensor<128x128xi32>) + outs(%arg2 : tensor<128x128xi32>) { + ^bb0(%in: i32, %in_0: i32, %out: i32): + linalg.yield %out : i32 + } -> tensor<128x128xi32> + return + } } // ----- diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir index 9616a3e..1df15e8 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -10,10 +10,10 @@ // TODO: Some test cases from this file should be moved to other dialects. -// CHECK-LABEL: func @fill_inplace( +// CHECK-LABEL: func private @fill_inplace( // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> -// CHECK-NO-LAYOUT-MAP-LABEL: func @fill_inplace(%{{.*}}: memref<?xf32>) { -func.func @fill_inplace( +// CHECK-NO-LAYOUT-MAP-LABEL: func private @fill_inplace(%{{.*}}: memref<?xf32>) { +func.func private @fill_inplace( %A : tensor<?xf32> {bufferization.writable = true}) -> tensor<?xf32> { @@ -56,10 +56,10 @@ func.func @not_inplace( // ----- -// CHECK-LABEL: func @not_inplace +// CHECK-LABEL: func private @not_inplace // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?x?xf32, strided<[?, ?], offset: ?>>) { -// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?x?xf32>) { -func.func @not_inplace( +// CHECK-NO-LAYOUT-MAP-LABEL: func private @not_inplace(%{{.*}}: memref<?x?xf32>) { +func.func private @not_inplace( %A : tensor<?x?xf32> {bufferization.writable = true}) -> tensor<?x?xf32> { @@ -235,7 +235,7 @@ func.func @dominance_violation_bug_1( // ----- -func.func @gather_like( +func.func private @gather_like( %arg0 : tensor<?x?xf32> {bufferization.writable = false}, %arg1 : tensor<?xi32> {bufferization.writable = false}, %arg2 : tensor<?x?xf32> {bufferization.writable = true}) @@ -254,7 +254,7 @@ func.func @gather_like( } -> tensor<?x?xf32> return %0 : tensor<?x?xf32> } -// CHECK-LABEL: func @gather_like( +// CHECK-LABEL: func private @gather_like( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?xf32, // CHECK-SAME: %[[ARG1:.+]]: memref<?xi32 // CHECK-SAME: %[[ARG2:.+]]: memref<?x?xf32 diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir new file mode 100644 index 0000000..35355c6 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir @@ -0,0 +1,102 @@ +// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=firstprivate})" | FileCheck %s + +// CHECK: acc.firstprivate.recipe @firstprivate_scalar : memref<f32> init { +// CHECK: ^bb0(%{{.*}}: memref<f32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<f32> +// CHECK: acc.yield %[[ALLOC]] : memref<f32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<f32>, %[[DST:.*]]: memref<f32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<f32> to memref<f32> +// CHECK: acc.terminator +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_scalar() { + %0 = memref.alloca() {test.var = "scalar"} : memref<f32> + return +} + +// ----- + +// CHECK: acc.firstprivate.recipe @firstprivate_static_2d : memref<10x20xf32> init { +// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<10x20xf32>, %[[DST:.*]]: memref<10x20xf32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x20xf32> to memref<10x20xf32> +// CHECK: acc.terminator +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_static_2d() { + %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32> + return +} + +// ----- + +// CHECK: acc.firstprivate.recipe @firstprivate_dynamic_2d : memref<?x?xf32> init { +// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>): +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32> +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32> +// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<?x?xf32>, %[[DST:.*]]: memref<?x?xf32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<?x?xf32> to memref<?x?xf32> +// CHECK: acc.terminator +// CHECK: } destroy { +// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>): +// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32> +// CHECK: acc.terminator +// CHECK: } + +func.func @test_dynamic_2d(%arg0: index, %arg1: index) { + %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32> + return +} + +// ----- + +// CHECK: acc.firstprivate.recipe @firstprivate_mixed_dims : memref<10x?xf32> init { +// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>): +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32> +// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<10x?xf32>, %[[DST:.*]]: memref<10x?xf32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x?xf32> to memref<10x?xf32> +// CHECK: acc.terminator +// CHECK: } destroy { +// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>): +// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32> +// CHECK: acc.terminator +// CHECK: } + +func.func @test_mixed_dims(%arg0: index) { + %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32> + return +} + +// ----- + +// CHECK: acc.firstprivate.recipe @firstprivate_scalar_int : memref<i32> init { +// CHECK: ^bb0(%{{.*}}: memref<i32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<i32> +// CHECK: acc.yield %[[ALLOC]] : memref<i32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<i32>, %[[DST:.*]]: memref<i32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<i32> to memref<i32> +// CHECK: acc.terminator +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_scalar_int() { + %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32> + return +} + diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir new file mode 100644 index 0000000..8403ee8 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir @@ -0,0 +1,82 @@ +// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=private})" | FileCheck %s + +// CHECK: acc.private.recipe @private_scalar : memref<f32> init { +// CHECK: ^bb0(%{{.*}}: memref<f32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<f32> +// CHECK: acc.yield %[[ALLOC]] : memref<f32> +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_scalar() { + %0 = memref.alloca() {test.var = "scalar"} : memref<f32> + return +} + +// ----- + +// CHECK: acc.private.recipe @private_static_2d : memref<10x20xf32> init { +// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32> +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_static_2d() { + %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32> + return +} + +// ----- + +// CHECK: acc.private.recipe @private_dynamic_2d : memref<?x?xf32> init { +// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>): +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32> +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32> +// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32> +// CHECK: } destroy { +// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>): +// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32> +// CHECK: acc.terminator +// CHECK: } + +func.func @test_dynamic_2d(%arg0: index, %arg1: index) { + %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32> + return +} + +// ----- + +// CHECK: acc.private.recipe @private_mixed_dims : memref<10x?xf32> init { +// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>): +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32> +// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32> +// CHECK: } destroy { +// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>): +// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32> +// CHECK: acc.terminator +// CHECK: } + +func.func @test_mixed_dims(%arg0: index) { + %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32> + return +} + +// ----- + +// CHECK: acc.private.recipe @private_scalar_int : memref<i32> init { +// CHECK: ^bb0(%{{.*}}: memref<i32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<i32> +// CHECK: acc.yield %[[ALLOC]] : memref<i32> +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_scalar_int() { + %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32> + return +} + diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir index a1067ec..af09dc8 100644 --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -8,11 +8,11 @@ // Test bufferization using memref types that have no layout map. // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null -// CHECK-LABEL: func @scf_for_yield_only( +// CHECK-LABEL: func private @scf_for_yield_only( // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>, // CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: ) -> memref<?xf32> { -func.func @scf_for_yield_only( +func.func private @scf_for_yield_only( %A : tensor<?xf32> {bufferization.writable = false}, %B : tensor<?xf32> {bufferization.writable = true}, %lb : index, %ub : index, %step : index) @@ -85,11 +85,11 @@ func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true}, // ----- -// CHECK-LABEL: func @scf_for_with_tensor.insert_slice +// CHECK-LABEL: func private @scf_for_with_tensor.insert_slice // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>> -func.func @scf_for_with_tensor.insert_slice( +func.func private @scf_for_with_tensor.insert_slice( %A : tensor<?xf32> {bufferization.writable = false}, %B : tensor<?xf32> {bufferization.writable = true}, %C : tensor<4xf32> {bufferization.writable = false}, @@ -471,11 +471,11 @@ func.func @scf_while_iter_arg_result_mismatch(%arg0: tensor<5xi1>, // ----- -// CHECK-LABEL: func.func @parallel_insert_slice_no_conflict( +// CHECK-LABEL: func private @parallel_insert_slice_no_conflict( // CHECK-SAME: %[[idx:.*]]: index, %[[idx2:.*]]: index, // CHECK-SAME: %[[arg1:.*]]: memref<?xf32, strided{{.*}}>, // CHECK-SAME: %[[arg2:.*]]: memref<?xf32, strided{{.*}}> -func.func @parallel_insert_slice_no_conflict( +func.func private @parallel_insert_slice_no_conflict( %idx: index, %idx2: index, %arg1: tensor<?xf32> {bufferization.writable = true}, diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir index 5f95da2..b6c72be 100644 --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -8,12 +8,12 @@ // Test bufferization using memref types that have no layout map. // RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null -// CHECK-LABEL: func @insert_slice_fun +// CHECK-LABEL: func private @insert_slice_fun // CHECK-SAME: %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>, // CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>, // CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>, // CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>> -func.func @insert_slice_fun( +func.func private @insert_slice_fun( %A0 : tensor<?xf32> {bufferization.writable = false}, %A1 : tensor<?xf32> {bufferization.writable = true}, %t0 : tensor<4xf32> {bufferization.writable = false}, @@ -331,12 +331,12 @@ func.func @dim_not_reading(%t: tensor<?xf32>, %f: f32, %pos: index) // ----- // CHECK: #[[$map:.*]] = affine_map<(d0) -> (d0 + 5)> -// CHECK-LABEL: func.func @cast_retains_buffer_layout( +// CHECK-LABEL: func.func private @cast_retains_buffer_layout( // CHECK-SAME: %[[t:.*]]: memref<?xf32, #[[$map]]>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> { // CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, #[[$map]]> to memref<10xf32, #[[$map]]> // CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, #[[$map]]> to memref<?xf32, strided<[1], offset: 7>> // CHECK: return %[[slice]] -func.func @cast_retains_buffer_layout( +func.func private @cast_retains_buffer_layout( %t: tensor<?xf32> {bufferization.buffer_layout = affine_map<(d0) -> (d0 + 5)>}, %sz: index) @@ -353,12 +353,12 @@ func.func @cast_retains_buffer_layout( // ----- -// CHECK-LABEL: func.func @cast_retains_buffer_layout_strided( +// CHECK-LABEL: func private @cast_retains_buffer_layout_strided( // CHECK-SAME: %[[t:.*]]: memref<?xf32, strided<[1], offset: 5>>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> { // CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, strided<[1], offset: 5>> to memref<10xf32, strided<[1], offset: 5>> // CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, strided<[1], offset: 5>> to memref<?xf32, strided<[1], offset: 7>> // CHECK: return %[[slice]] -func.func @cast_retains_buffer_layout_strided( +func.func private @cast_retains_buffer_layout_strided( %t: tensor<?xf32> {bufferization.buffer_layout = strided<[1], offset: 5>}, %sz: index) diff --git a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir index cb16c37..b3154d4 100644 --- a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir +++ b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir @@ -3,7 +3,16 @@ // RUN: | FileCheck %s // CHECK: data = -// CHECK-RAW: [[[7.7, 0, 0], [7.7, 0, 0], [7.7, 0, 0]], [[0, 7.7, 0], [0, 7.7, 0], [0, 7.7, 0]], [[0, 0, 7.7], [0, 0, 7.7], [0, 0, 7.7]]] +// CHECK{LITERAL}: [[[7.7, 0, 0], +// CHECK{LITERAL}: [7.7, 0, 0], +// CHECK{LITERAL}: [7.7, 0, 0]], +// CHECK{LITERAL}: [[0, 7.7, 0], +// CHECK{LITERAL}: [0, 7.7, 0], +// CHECK{LITERAL}: [0, 7.7, 0]], +// CHECK{LITERAL}: [[0, 0, 7.7], +// CHECK{LITERAL}: [0, 0, 7.7], +// CHECK{LITERAL}: [0, 0, 7.7]]] + module attributes { gpu.container_module, spirv.target_env = #spirv.target_env< diff --git a/mlir/test/Pass/remark-final.mlir b/mlir/test/Pass/remark-final.mlir new file mode 100644 index 0000000..325271e --- /dev/null +++ b/mlir/test/Pass/remark-final.mlir @@ -0,0 +1,17 @@ +// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final 2>&1 | FileCheck %s +// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final --remark-format=yaml --remarks-output-file=%t.yaml +// RUN: FileCheck --check-prefix=CHECK-YAML %s < %t.yaml +module @foo { + "test.op"() : () -> () + +} + +// CHECK-YAML-NOT: This is a test passed remark (should be dropped) +// CHECK-YAML-DAG: !Analysis +// CHECK-YAML-DAG: !Failure +// CHECK-YAML-DAG: !Passed + +// CHECK-NOT: This is a test passed remark (should be dropped) +// CHECK-DAG: remark: [Analysis] test-remark +// CHECK-DAG: remark: [Failure] test-remark | Category:category-2-failed +// CHECK-DAG: remark: [Passed] test-remark | Category:category-1-passed diff --git a/mlir/test/lib/Analysis/CMakeLists.txt b/mlir/test/lib/Analysis/CMakeLists.txt index 9187998..c37671a 100644 --- a/mlir/test/lib/Analysis/CMakeLists.txt +++ b/mlir/test/lib/Analysis/CMakeLists.txt @@ -17,6 +17,7 @@ add_mlir_library(MLIRTestAnalysis DataFlow/TestDenseForwardDataFlowAnalysis.cpp DataFlow/TestLivenessAnalysis.cpp DataFlow/TestSparseBackwardDataFlowAnalysis.cpp + DataFlow/TestStridedMetadataRangeAnalysis.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp new file mode 100644 index 0000000..6ac09fd --- /dev/null +++ b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp @@ -0,0 +1,86 @@ +//===- TestStridedMetadataRangeAnalysis.cpp - Test strided md analysis ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h" +#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" +#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h" +#include "mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h" +#include "mlir/Analysis/DataFlowFramework.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::dataflow; + +static void printAnalysisResults(DataFlowSolver &solver, Operation *op, + raw_ostream &os) { + // Collect the strided metadata of the op results. + SmallVector<std::pair<unsigned, const StridedMetadataRangeLattice *>> results; + for (OpResult result : op->getResults()) { + const auto *state = solver.lookupState<StridedMetadataRangeLattice>(result); + // Skip the result if it's uninitialized. + if (!state || state->getValue().isUninitialized()) + continue; + + // Skip the result if the range is empty. + const mlir::StridedMetadataRange &md = state->getValue(); + if (md.getOffsets().empty() && md.getSizes().empty() && + md.getStrides().empty()) + continue; + results.push_back({result.getResultNumber(), state}); + } + + // Early exit if there's no metadata to print. + if (results.empty()) + return; + + // Print the metadata. + os << "Op: " << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "\n"; + for (auto [idx, state] : results) + os << " result[" << idx << "]: " << state->getValue() << "\n"; + os << "\n"; +} + +namespace { +struct TestStridedMetadataRangeAnalysisPass + : public PassWrapper<TestStridedMetadataRangeAnalysisPass, + OperationPass<>> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( + TestStridedMetadataRangeAnalysisPass) + + StringRef getArgument() const override { + return "test-strided-metadata-range-analysis"; + } + void runOnOperation() override { + Operation *op = getOperation(); + + DataFlowSolver solver; + solver.load<DeadCodeAnalysis>(); + solver.load<SparseConstantPropagation>(); + solver.load<IntegerRangeAnalysis>(); + solver.load<StridedMetadataRangeAnalysis>(); + if (failed(solver.initializeAndRun(op))) + return signalPassFailure(); + + op->walk( + [&](Operation *op) { printAnalysisResults(solver, op, llvm::errs()); }); + } +}; +} // end anonymous namespace + +namespace mlir { +namespace test { +void registerTestStridedMetadataRangeAnalysisPass() { + PassRegistration<TestStridedMetadataRangeAnalysisPass>(); +} +} // end namespace test +} // end namespace mlir diff --git a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt index f84055d..1e59338 100644 --- a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt +++ b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt @@ -1,6 +1,7 @@ add_mlir_library(MLIROpenACCTestPasses TestOpenACC.cpp TestPointerLikeTypeInterface.cpp + TestRecipePopulate.cpp EXCLUDE_FROM_LIBMLIR ) diff --git a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp index 9886240..bea21b9 100644 --- a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp +++ b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp @@ -15,9 +15,13 @@ namespace test { // Forward declarations of individual test pass registration functions void registerTestPointerLikeTypeInterfacePass(); +void registerTestRecipePopulatePass(); // Unified registration function for all OpenACC tests -void registerTestOpenACC() { registerTestPointerLikeTypeInterfacePass(); } +void registerTestOpenACC() { + registerTestPointerLikeTypeInterfacePass(); + registerTestRecipePopulatePass(); +} } // namespace test } // namespace mlir diff --git a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp index 85f9283..027b0a1 100644 --- a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp +++ b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp @@ -196,13 +196,15 @@ void TestPointerLikeTypeInterfacePass::testGenAllocate( newBuilder.setInsertionPointAfter(op); // Call the genAllocate API + bool needsFree = false; Value allocRes = pointerType.genAllocate(newBuilder, loc, "test_alloc", - result.getType(), result); + result.getType(), result, needsFree); if (allocRes) { llvm::errs() << "Successfully generated alloc for operation: "; op->print(llvm::errs()); llvm::errs() << "\n"; + llvm::errs() << "\tneeds free: " << (needsFree ? "true" : "false") << "\n"; // Print all operations that were inserted for (Operation *insertedOp : tracker.insertedOps) { @@ -230,8 +232,8 @@ void TestPointerLikeTypeInterfacePass::testGenFree(Operation *op, Value result, // Call the genFree API auto typedResult = cast<TypedValue<PointerLikeType>>(result); - bool success = - pointerType.genFree(newBuilder, loc, typedResult, result.getType()); + bool success = pointerType.genFree(newBuilder, loc, typedResult, result, + result.getType()); if (success) { llvm::errs() << "Successfully generated free for operation: "; diff --git a/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp new file mode 100644 index 0000000..35f092c --- /dev/null +++ b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp @@ -0,0 +1,110 @@ +//===- TestRecipePopulate.cpp - Test Recipe Population -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains test passes for testing the createAndPopulate methods +// of the recipe operations. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "llvm/Support/CommandLine.h" + +using namespace mlir; +using namespace mlir::acc; + +namespace { + +struct TestRecipePopulatePass + : public PassWrapper<TestRecipePopulatePass, OperationPass<ModuleOp>> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestRecipePopulatePass) + + TestRecipePopulatePass() = default; + TestRecipePopulatePass(const TestRecipePopulatePass &pass) + : PassWrapper(pass) { + recipeType = pass.recipeType; + } + + Pass::Option<std::string> recipeType{ + *this, "recipe-type", + llvm::cl::desc("Recipe type: private or firstprivate"), + llvm::cl::init("private")}; + + StringRef getArgument() const override { return "test-acc-recipe-populate"; } + + StringRef getDescription() const override { + return "Test OpenACC recipe population"; + } + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert<acc::OpenACCDialect>(); + registry.insert<arith::ArithDialect>(); + registry.insert<memref::MemRefDialect>(); + } +}; + +void TestRecipePopulatePass::runOnOperation() { + auto module = getOperation(); + OpBuilder builder(&getContext()); + + // Collect all test variables + SmallVector<std::tuple<Operation *, Value, std::string>> testVars; + + module.walk([&](Operation *op) { + if (auto varName = op->getAttrOfType<StringAttr>("test.var")) { + for (auto result : op->getResults()) { + testVars.push_back({op, result, varName.str()}); + } + } + }); + + // Generate recipes at module level + builder.setInsertionPoint(&module.getBodyRegion().front(), + module.getBodyRegion().front().begin()); + + for (auto [op, var, varName] : testVars) { + Location loc = op->getLoc(); + + std::string recipeName = recipeType.getValue() + "_" + varName; + ValueRange bounds; // No bounds for memref tests + + if (recipeType == "private") { + auto recipe = PrivateRecipeOp::createAndPopulate( + builder, loc, recipeName, var.getType(), varName, bounds); + + if (!recipe) { + op->emitError("Failed to create private recipe for ") << varName; + } + } else if (recipeType == "firstprivate") { + auto recipe = FirstprivateRecipeOp::createAndPopulate( + builder, loc, recipeName, var.getType(), varName, bounds); + + if (!recipe) { + op->emitError("Failed to create firstprivate recipe for ") << varName; + } + } + } +} + +} // namespace + +namespace mlir { +namespace test { + +void registerTestRecipePopulatePass() { + PassRegistration<TestRecipePopulatePass>(); +} + +} // namespace test +} // namespace mlir diff --git a/mlir/test/lib/Pass/TestRemarksPass.cpp b/mlir/test/lib/Pass/TestRemarksPass.cpp index 3b25686..5ca2d1a 100644 --- a/mlir/test/lib/Pass/TestRemarksPass.cpp +++ b/mlir/test/lib/Pass/TestRemarksPass.cpp @@ -43,7 +43,12 @@ public: << remark::add("This is a test missed remark") << remark::reason("because we are testing the remark pipeline") << remark::suggest("try using the remark pipeline feature"); - + mlir::remark::passed( + loc, + remark::RemarkOpts::name("test-remark").category("category-1-passed")) + << remark::add("This is a test passed remark (should be dropped)") + << remark::reason("because we are testing the remark pipeline") + << remark::suggest("try using the remark pipeline feature"); mlir::remark::passed( loc, remark::RemarkOpts::name("test-remark").category("category-1-passed")) diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 6432fae..8842180 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -151,6 +151,7 @@ void registerTestSliceAnalysisPass(); void registerTestSPIRVCPURunnerPipeline(); void registerTestSPIRVFuncSignatureConversion(); void registerTestSPIRVVectorUnrolling(); +void registerTestStridedMetadataRangeAnalysisPass(); void registerTestTensorCopyInsertionPass(); void registerTestTensorLikeAndBufferLikePass(); void registerTestTensorTransforms(); @@ -299,6 +300,7 @@ void registerTestPasses() { mlir::test::registerTestSPIRVCPURunnerPipeline(); mlir::test::registerTestSPIRVFuncSignatureConversion(); mlir::test::registerTestSPIRVVectorUnrolling(); + mlir::test::registerTestStridedMetadataRangeAnalysisPass(); mlir::test::registerTestTensorCopyInsertionPass(); mlir::test::registerTestTensorLikeAndBufferLikePass(); mlir::test::registerTestTensorTransforms(); diff --git a/mlir/unittests/IR/RemarkTest.cpp b/mlir/unittests/IR/RemarkTest.cpp index bcbda90..09c576c 100644 --- a/mlir/unittests/IR/RemarkTest.cpp +++ b/mlir/unittests/IR/RemarkTest.cpp @@ -53,10 +53,12 @@ TEST(Remark, TestOutputOptimizationRemark) { /*missed=*/categoryUnroll, /*analysis=*/categoryRegister, /*failed=*/categoryInliner}; - + std::unique_ptr<remark::RemarkEmittingPolicyAll> policy = + std::make_unique<remark::RemarkEmittingPolicyAll>(); LogicalResult isEnabled = mlir::remark::enableOptimizationRemarksWithLLVMStreamer( - context, yamlFile, llvm::remarks::Format::YAML, cats); + context, yamlFile, llvm::remarks::Format::YAML, std::move(policy), + cats); ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine"; // PASS: something succeeded @@ -202,9 +204,10 @@ TEST(Remark, TestOutputOptimizationRemarkDiagnostic) { /*missed=*/categoryUnroll, /*analysis=*/categoryRegister, /*failed=*/categoryUnroll}; - - LogicalResult isEnabled = - remark::enableOptimizationRemarks(context, nullptr, cats, true); + std::unique_ptr<remark::RemarkEmittingPolicyAll> policy = + std::make_unique<remark::RemarkEmittingPolicyAll>(); + LogicalResult isEnabled = remark::enableOptimizationRemarks( + context, nullptr, std::move(policy), cats, true); ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine"; @@ -282,8 +285,11 @@ TEST(Remark, TestCustomOptimizationRemarkDiagnostic) { /*analysis=*/std::nullopt, /*failed=*/categoryLoopunroll}; + std::unique_ptr<remark::RemarkEmittingPolicyAll> policy = + std::make_unique<remark::RemarkEmittingPolicyAll>(); LogicalResult isEnabled = remark::enableOptimizationRemarks( - context, std::make_unique<MyCustomStreamer>(), cats, true); + context, std::make_unique<MyCustomStreamer>(), std::move(policy), cats, + true); ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine"; // Remark 1: pass, category LoopUnroll @@ -311,4 +317,66 @@ TEST(Remark, TestCustomOptimizationRemarkDiagnostic) { EXPECT_NE(errOut.find(pass2Msg), std::string::npos); // printed EXPECT_EQ(errOut.find(pass3Msg), std::string::npos); // filtered out } + +TEST(Remark, TestRemarkFinal) { + testing::internal::CaptureStderr(); + const auto *pass1Msg = "I failed"; + const auto *pass2Msg = "I failed too"; + const auto *pass3Msg = "I succeeded"; + const auto *pass4Msg = "I succeeded too"; + + std::string categoryLoopunroll("LoopUnroll"); + + std::string seenMsg = ""; + + { + MLIRContext context; + Location loc = FileLineColLoc::get(&context, "test.cpp", 1, 5); + Location locOther = FileLineColLoc::get(&context, "test.cpp", 55, 5); + + // Setup the remark engine + mlir::remark::RemarkCategories cats{/*all=*/"", + /*passed=*/categoryLoopunroll, + /*missed=*/categoryLoopunroll, + /*analysis=*/categoryLoopunroll, + /*failed=*/categoryLoopunroll}; + + std::unique_ptr<remark::RemarkEmittingPolicyFinal> policy = + std::make_unique<remark::RemarkEmittingPolicyFinal>(); + LogicalResult isEnabled = remark::enableOptimizationRemarks( + context, std::make_unique<MyCustomStreamer>(), std::move(policy), cats, + true); + ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine"; + + // Remark 1: failure + remark::failed( + loc, remark::RemarkOpts::name("Unroller").category(categoryLoopunroll)) + << pass1Msg; + + // Remark 2: failure + remark::missed( + loc, remark::RemarkOpts::name("Unroller").category(categoryLoopunroll)) + << remark::reason(pass2Msg); + + // Remark 3: pass + remark::passed( + loc, remark::RemarkOpts::name("Unroller").category(categoryLoopunroll)) + << pass3Msg; + + // Remark 4: pass + remark::passed( + locOther, + remark::RemarkOpts::name("Unroller").category(categoryLoopunroll)) + << pass4Msg; + } + + llvm::errs().flush(); + std::string errOut = ::testing::internal::GetCapturedStderr(); + + // Containment checks for messages. + EXPECT_EQ(errOut.find(pass1Msg), std::string::npos); // dropped + EXPECT_EQ(errOut.find(pass2Msg), std::string::npos); // dropped + EXPECT_NE(errOut.find(pass3Msg), std::string::npos); // shown + EXPECT_NE(errOut.find(pass4Msg), std::string::npos); // shown +} } // namespace diff --git a/mlir/utils/generate-test-checks.py b/mlir/utils/generate-test-checks.py index f80a181..3712a6b 100755 --- a/mlir/utils/generate-test-checks.py +++ b/mlir/utils/generate-test-checks.py @@ -31,13 +31,16 @@ import argparse import os # Used to advertise this file's name ("autogenerated_note"). import re import sys +from collections import Counter ADVERT_BEGIN = "// NOTE: Assertions have been autogenerated by " ADVERT_END = """ -// The script is designed to make adding checks to -// a test case fast, it is *not* designed to be authoritative -// about what constitutes a good test! The CHECK should be -// minimized and named to reflect the test intent. +// This script is intended to make adding checks to a test case quick and easy. +// It is *not* authoritative about what constitutes a good test. After using the +// script, be sure to review and refine the generated checks. For example, +// CHECK lines should be minimized and named to reflect the test’s intent. +// For comprehensive guidelines, see: +// * https://mlir.llvm.org/getting_started/TestingGuide/ """ @@ -45,6 +48,9 @@ ADVERT_END = """ SSA_RE_STR = "[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*" SSA_RE = re.compile(SSA_RE_STR) +# Regex matching `dialect.op_name` (e.g. `vector.transfer_read`). +SSA_OP_NAME_RE = re.compile(r"\b(?:\s=\s[a-z_]+)[.]([a-z_]+)\b") + # Regex matching the left-hand side of an assignment SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*=' SSA_RESULTS_RE = re.compile(SSA_RESULTS_STR) @@ -63,7 +69,12 @@ ATTR_DEF_RE = re.compile(ATTR_DEF_RE_STR) class VariableNamer: def __init__(self, variable_names): self.scopes = [] + # Counter for generic FileCHeck names, e.g. VAL_#N self.name_counter = 0 + # Counters for FileCheck names derived from Op names, e.g. + # TRANSFER_READ_#N (based on `vector.transfer_read`). Note, there's a + # dedicated counter for every Op type present in the input. + self.op_name_counter = Counter() # Number of variable names to still generate in parent scope self.generate_in_parent_scope_left = 0 @@ -77,17 +88,29 @@ class VariableNamer: self.generate_in_parent_scope_left = n # Generate a substitution name for the given ssa value name. - def generate_name(self, source_variable_name, use_ssa_name): + def generate_name(self, source_variable_name, use_ssa_name, op_name=""): # Compute variable name - variable_name = self.variable_names.pop(0) if len(self.variable_names) > 0 else '' - if variable_name == '': + variable_name = ( + self.variable_names.pop(0) if len(self.variable_names) > 0 else "" + ) + if variable_name == "": # If `use_ssa_name` is set, use the MLIR SSA value name to generate # a FileCHeck substation string. As FileCheck requires these # strings to start with a character, skip MLIR variables starting # with a digit (e.g. `%0`). + # + # The next fallback option is to use the op name, if the + # corresponding match succeeds. + # + # If neither worked, use a generic name: `VAL_#N`. if use_ssa_name and source_variable_name[0].isalpha(): variable_name = source_variable_name.upper() + elif op_name != "": + variable_name = ( + op_name.upper() + "_" + str(self.op_name_counter[op_name]) + ) + self.op_name_counter[op_name] += 1 else: variable_name = "VAL_" + str(self.name_counter) self.name_counter += 1 @@ -123,6 +146,7 @@ class VariableNamer: def clear_names(self): self.name_counter = 0 self.used_variable_names = set() + self.op_name_counter.clear() class AttributeNamer: @@ -170,8 +194,12 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re # Process the rest that contained an SSA value name. for chunk in line_chunks: - m = SSA_RE.match(chunk) - ssa_name = m.group(0) if m is not None else '' + ssa = SSA_RE.match(chunk) + op_name_with_dialect = SSA_OP_NAME_RE.search(chunk) + ssa_name = ssa.group(0) if ssa is not None else "" + op_name = ( + op_name_with_dialect.group(1) if op_name_with_dialect is not None else "" + ) # Check if an existing variable exists for this name. variable = None @@ -185,7 +213,7 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re output_line += "%[[" + variable + "]]" else: # Otherwise, generate a new variable. - variable = variable_namer.generate_name(ssa_name, use_ssa_name) + variable = variable_namer.generate_name(ssa_name, use_ssa_name, op_name) if strict_name_re: # Use stricter regexp for the variable name, if requested. # Greedy matching may cause issues with the generic '.*' diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td index ac27d85..b472236 100644 --- a/offload/liboffload/API/Common.td +++ b/offload/liboffload/API/Common.td @@ -140,9 +140,10 @@ def ol_dimensions_t : Struct { } def olInit : Function { - let desc = "Perform initialization of the Offload library and plugins"; + let desc = "Perform initialization of the Offload library"; let details = [ "This must be the first API call made by a user of the Offload library", + "The underlying platforms are lazily initialized on their first use" "Each call will increment an internal reference count that is decremented by `olShutDown`" ]; let params = []; diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index c549ae0..6d22fae 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -42,9 +42,7 @@ using namespace error; struct ol_platform_impl_t { ol_platform_impl_t(std::unique_ptr<GenericPluginTy> Plugin, ol_platform_backend_t BackendType) - : Plugin(std::move(Plugin)), BackendType(BackendType) {} - std::unique_ptr<GenericPluginTy> Plugin; - llvm::SmallVector<std::unique_ptr<ol_device_impl_t>> Devices; + : BackendType(BackendType), Plugin(std::move(Plugin)) {} ol_platform_backend_t BackendType; /// Complete all pending work for this platform and perform any needed @@ -53,6 +51,14 @@ struct ol_platform_impl_t { /// After calling this function, no liboffload functions should be called with /// this platform handle. llvm::Error destroy(); + + /// Initialize the associated plugin and devices. + llvm::Error init(); + + /// Direct access to the plugin, may be uninitialized if accessed here. + std::unique_ptr<GenericPluginTy> Plugin; + + llvm::SmallVector<std::unique_ptr<ol_device_impl_t>> Devices; }; // Handle type definitions. Ideally these would be 1:1 with the plugins, but @@ -130,6 +136,28 @@ llvm::Error ol_platform_impl_t::destroy() { return Result; } +llvm::Error ol_platform_impl_t::init() { + if (!Plugin) + return llvm::Error::success(); + + if (llvm::Error Err = Plugin->init()) + return Err; + + for (auto Id = 0, End = Plugin->getNumDevices(); Id != End; Id++) { + if (llvm::Error Err = Plugin->initDevice(Id)) + return Err; + + auto Device = &Plugin->getDevice(Id); + auto Info = Device->obtainInfoImpl(); + if (llvm::Error Err = Info.takeError()) + return Err; + Devices.emplace_back(std::make_unique<ol_device_impl_t>(Id, Device, *this, + std::move(*Info))); + } + + return llvm::Error::success(); +} + struct ol_queue_impl_t { ol_queue_impl_t(__tgt_async_info *AsyncInfo, ol_device_handle_t Device) : AsyncInfo(AsyncInfo), Device(Device), Id(IdCounter++) {} @@ -207,15 +235,11 @@ struct OffloadContext { std::mutex AllocInfoMapMutex{}; // Partitioned list of memory base addresses. Each element in this list is a // key in AllocInfoMap - llvm::SmallVector<void *> AllocBases{}; + SmallVector<void *> AllocBases{}; SmallVector<std::unique_ptr<ol_platform_impl_t>, 4> Platforms{}; + ol_device_handle_t HostDevice; size_t RefCount; - ol_device_handle_t HostDevice() { - // The host platform is always inserted last - return Platforms.back()->Devices[0].get(); - } - static OffloadContext &get() { assert(OffloadContextVal); return *OffloadContextVal; @@ -259,28 +283,21 @@ Error initPlugins(OffloadContext &Context) { } while (false); #include "Shared/Targets.def" - // Preemptively initialize all devices in the plugin + // Eagerly initialize all of the plugins and devices. We need to make sure + // that the platform is initialized at a consistent point to maintain the + // expected teardown order in the vendor libraries. for (auto &Platform : Context.Platforms) { - auto Err = Platform->Plugin->init(); - [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); - for (auto DevNum = 0; DevNum < Platform->Plugin->number_of_devices(); - DevNum++) { - if (Platform->Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) { - auto Device = &Platform->Plugin->getDevice(DevNum); - auto Info = Device->obtainInfoImpl(); - if (auto Err = Info.takeError()) - return Err; - Platform->Devices.emplace_back(std::make_unique<ol_device_impl_t>( - DevNum, Device, *Platform, std::move(*Info))); - } - } + if (Error Err = Platform->init()) + return Err; } - // Add the special host device + // Add the special host device. auto &HostPlatform = Context.Platforms.emplace_back( std::make_unique<ol_platform_impl_t>(nullptr, OL_PLATFORM_BACKEND_HOST)); - HostPlatform->Devices.emplace_back(std::make_unique<ol_device_impl_t>( - -1, nullptr, *HostPlatform, InfoTreeNode{})); + Context.HostDevice = HostPlatform->Devices + .emplace_back(std::make_unique<ol_device_impl_t>( + -1, nullptr, *HostPlatform, InfoTreeNode{})) + .get(); Context.TracingEnabled = std::getenv("OFFLOAD_TRACE"); Context.ValidationEnabled = !std::getenv("OFFLOAD_DISABLE_VALIDATION"); @@ -312,16 +329,16 @@ Error olShutDown_impl() { if (--OffloadContext::get().RefCount != 0) return Error::success(); - llvm::Error Result = Error::success(); + Error Result = Error::success(); auto *OldContext = OffloadContextVal.exchange(nullptr); - for (auto &P : OldContext->Platforms) { + for (auto &Platform : OldContext->Platforms) { // Host plugin is nullptr and has no deinit - if (!P->Plugin || !P->Plugin->is_initialized()) + if (!Platform->Plugin || !Platform->Plugin->is_initialized()) continue; - if (auto Res = P->destroy()) - Result = llvm::joinErrors(std::move(Result), std::move(Res)); + if (auto Res = Platform->destroy()) + Result = joinErrors(std::move(Result), std::move(Res)); } delete OldContext; @@ -334,6 +351,8 @@ Error olGetPlatformInfoImplDetail(ol_platform_handle_t Platform, InfoWriter Info(PropSize, PropValue, PropSizeRet); bool IsHost = Platform->BackendType == OL_PLATFORM_BACKEND_HOST; + // Note that the plugin is potentially uninitialized here. It will need to be + // initialized once info is added that requires it to be initialized. switch (PropName) { case OL_PLATFORM_INFO_NAME: return Info.writeString(IsHost ? "Host" : Platform->Plugin->getName()); @@ -373,12 +392,12 @@ Error olGetPlatformInfoSize_impl(ol_platform_handle_t Platform, Error olGetDeviceInfoImplDetail(ol_device_handle_t Device, ol_device_info_t PropName, size_t PropSize, void *PropValue, size_t *PropSizeRet) { - assert(Device != OffloadContext::get().HostDevice()); + assert(Device != OffloadContext::get().HostDevice); InfoWriter Info(PropSize, PropValue, PropSizeRet); auto makeError = [&](ErrorCode Code, StringRef Err) { std::string ErrBuffer; - llvm::raw_string_ostream(ErrBuffer) << PropName << ": " << Err; + raw_string_ostream(ErrBuffer) << PropName << ": " << Err; return Plugin::error(ErrorCode::UNIMPLEMENTED, ErrBuffer.c_str()); }; @@ -511,7 +530,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device, Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device, ol_device_info_t PropName, size_t PropSize, void *PropValue, size_t *PropSizeRet) { - assert(Device == OffloadContext::get().HostDevice()); + assert(Device == OffloadContext::get().HostDevice); InfoWriter Info(PropSize, PropValue, PropSizeRet); constexpr auto uint32_max = std::numeric_limits<uint32_t>::max(); @@ -579,7 +598,7 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device, Error olGetDeviceInfo_impl(ol_device_handle_t Device, ol_device_info_t PropName, size_t PropSize, void *PropValue) { - if (Device == OffloadContext::get().HostDevice()) + if (Device == OffloadContext::get().HostDevice) return olGetDeviceInfoImplDetailHost(Device, PropName, PropSize, PropValue, nullptr); return olGetDeviceInfoImplDetail(Device, PropName, PropSize, PropValue, @@ -588,7 +607,7 @@ Error olGetDeviceInfo_impl(ol_device_handle_t Device, ol_device_info_t PropName, Error olGetDeviceInfoSize_impl(ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet) { - if (Device == OffloadContext::get().HostDevice()) + if (Device == OffloadContext::get().HostDevice) return olGetDeviceInfoImplDetailHost(Device, PropName, 0, nullptr, PropSizeRet); return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet); @@ -598,7 +617,7 @@ Error olIterateDevices_impl(ol_device_iterate_cb_t Callback, void *UserData) { for (auto &Platform : OffloadContext::get().Platforms) { for (auto &Device : Platform->Devices) { if (!Callback(Device.get(), UserData)) { - break; + return Error::success(); } } } @@ -949,7 +968,7 @@ Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) { Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice, const void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size) { - auto Host = OffloadContext::get().HostDevice(); + auto Host = OffloadContext::get().HostDevice; if (DstDevice == Host && SrcDevice == Host) { if (!Queue) { std::memcpy(DstPtr, SrcPtr, Size); @@ -1138,7 +1157,7 @@ Error olGetSymbolInfoImplDetail(ol_symbol_handle_t Symbol, auto CheckKind = [&](ol_symbol_kind_t Required) { if (Symbol->Kind != Required) { std::string ErrBuffer; - llvm::raw_string_ostream(ErrBuffer) + raw_string_ostream(ErrBuffer) << PropName << ": Expected a symbol of Kind " << Required << " but given a symbol of Kind " << Symbol->Kind; return Plugin::error(ErrorCode::SYMBOL_KIND, ErrBuffer.c_str()); diff --git a/orc-rt/include/orc-rt/ExecutorAddress.h b/orc-rt/include/orc-rt/ExecutorAddress.h index cc7bbf5..6ec583f 100644 --- a/orc-rt/include/orc-rt/ExecutorAddress.h +++ b/orc-rt/include/orc-rt/ExecutorAddress.h @@ -204,6 +204,9 @@ struct ExecutorAddrRange { constexpr bool contains(ExecutorAddr Addr) const noexcept { return Start <= Addr && Addr < End; } + constexpr bool contains(const ExecutorAddrRange &Other) const noexcept { + return (Other.Start >= Start && Other.End <= End); + } constexpr bool overlaps(const ExecutorAddrRange &Other) const noexcept { return !(Other.End <= Start || End <= Other.Start); } diff --git a/orc-rt/unittests/ExecutorAddressTest.cpp b/orc-rt/unittests/ExecutorAddressTest.cpp index 98074a7..2e04901 100644 --- a/orc-rt/unittests/ExecutorAddressTest.cpp +++ b/orc-rt/unittests/ExecutorAddressTest.cpp @@ -97,10 +97,16 @@ TEST(ExecutorAddrTest, AddrRanges) { EXPECT_FALSE(R1.contains(A0)); EXPECT_FALSE(R1.contains(A2)); + EXPECT_TRUE(R3.contains(R0)); // True for singleton range at start. + EXPECT_TRUE(R3.contains(R1)); // True for singleton range at end. + EXPECT_FALSE(R3.contains(R2)); // False for non-overlaping singleton range. + EXPECT_FALSE(R3.contains(R4)); // False for overlapping, uncontained range. + EXPECT_FALSE(R1.overlaps(R0)); EXPECT_FALSE(R1.overlaps(R2)); EXPECT_TRUE(R1.overlaps(R3)); EXPECT_TRUE(R1.overlaps(R4)); + EXPECT_TRUE(R3.overlaps(R4)); } TEST(ExecutorAddrTest, Hashable) { diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 61c30b8..74d632b 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1412,6 +1412,13 @@ td_library( ) td_library( + name = "InferStridedMetadataInterfaceTdFiles", + srcs = ["include/mlir/Interfaces/InferStridedMetadataInterface.td"], + includes = ["include"], + deps = [":OpBaseTdFiles"], +) + +td_library( name = "InferTypeOpInterfaceTdFiles", srcs = ["include/mlir/Interfaces/InferTypeOpInterface.td"], includes = ["include"], @@ -4159,6 +4166,7 @@ cc_library( ":MathToLibm", ":MathToROCDL", ":MathToSPIRV", + ":MathToXeVM", ":MemRefToEmitC", ":MemRefToLLVM", ":MemRefToSPIRV", @@ -5227,6 +5235,7 @@ cc_library( ":IR", ":InferTypeOpInterface", ":InliningUtils", + ":LLVMDialectBytecodeIncGen", ":LLVMDialectInterfaceIncGen", ":LLVMIntrinsicOpsIncGen", ":LLVMOpsIncGen", @@ -5679,6 +5688,16 @@ td_library( ], ) +td_library( + name = "LLVMDialectBytecodeTdFiles", + srcs = ["include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.td"], + includes = ["include"], + deps = [ + ":BytecodeTdFiles", + ":LLVMOpsTdFiles", + ], +) + cc_library( name = "GPUCommonTransforms", hdrs = [ @@ -6032,6 +6051,17 @@ gentbl_cc_library( ) gentbl_cc_library( + name = "LLVMDialectBytecodeIncGen", + tbl_outs = {"include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.cpp.inc": [ + "-gen-bytecode", + "-bytecode-dialect=LLVM", + ]}, + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.td", + deps = [":LLVMDialectBytecodeTdFiles"], +) + +gentbl_cc_library( name = "LLVMTypesIncGen", tbl_outs = { "include/mlir/Dialect/LLVMIR/LLVMTypes.h.inc": [ @@ -7027,6 +7057,33 @@ cc_library( ) cc_library( + name = "MathToXeVM", + srcs = glob([ + "lib/Conversion/MathToXeVM/*.cpp", + ]), + hdrs = glob([ + "include/mlir/Conversion/MathToXeVM/*.h", + ]), + includes = [ + "include", + "lib/Conversion/MathToXeVM", + ], + deps = [ + ":ArithAttrToLLVMConversion", + ":ArithDialect", + ":ConversionPassIncGen", + ":IR", + ":LLVMCommonConversion", + ":LLVMDialect", + ":MathDialect", + ":Pass", + ":Transforms", + ":XeVMDialect", + "//llvm:Support", + ], +) + +cc_library( name = "FuncToEmitC", srcs = glob([ "lib/Conversion/FuncToEmitC/*.cpp", @@ -7600,6 +7657,30 @@ cc_library( ], ) +gentbl_cc_library( + name = "InferStridedMetadataInterfaceIncGen", + tbl_outs = { + "include/mlir/Interfaces/InferStridedMetadataInterface.h.inc": ["-gen-op-interface-decls"], + "include/mlir/Interfaces/InferStridedMetadataInterface.cpp.inc": ["-gen-op-interface-defs"], + }, + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Interfaces/InferStridedMetadataInterface.td", + deps = [":InferStridedMetadataInterfaceTdFiles"], +) + +cc_library( + name = "InferStridedMetadataInterface", + srcs = ["lib/Interfaces/InferStridedMetadataInterface.cpp"], + hdrs = ["include/mlir/Interfaces/InferStridedMetadataInterface.h"], + includes = ["include"], + deps = [ + ":IR", + ":InferIntRangeInterface", + ":InferStridedMetadataInterfaceIncGen", + "//llvm:Support", + ], +) + td_library( name = "DataLayoutInterfacesTdFiles", srcs = ["include/mlir/Interfaces/DataLayoutInterfaces.td"], @@ -8528,9 +8609,11 @@ cc_library( ":CallOpInterfaces", ":ControlFlowInterfaces", ":DataLayoutInterfaces", + ":DialectUtils", ":FunctionInterfaces", ":IR", ":InferIntRangeInterface", + ":InferStridedMetadataInterface", ":LoopLikeInterface", ":Pass", ":SideEffectInterfaces", @@ -12452,6 +12535,7 @@ cc_library( ":IR", ":InferIntRangeCommon", ":InferIntRangeInterface", + ":InferStridedMetadataInterface", ":InferTypeOpInterface", ":InliningUtils", ":Pass", @@ -12673,6 +12757,7 @@ td_library( ":ArithOpsTdFiles", ":CastInterfacesTdFiles", ":ControlFlowInterfacesTdFiles", + ":InferStridedMetadataInterfaceTdFiles", ":MemOpInterfacesTdFiles", ":MemorySlotInterfacesTdFiles", ":OpBaseTdFiles", @@ -12763,6 +12848,7 @@ cc_library( ":IR", ":InferIntRangeCommon", ":InferIntRangeInterface", + ":InferStridedMetadataInterface", ":InferTypeOpInterface", ":InliningUtils", ":MemOpInterfaces", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel index b00e8f2..d8fcb53 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel @@ -1,4 +1,5 @@ load("//llvm:lit_test.bzl", "lit_test") +load("//llvm:targets.bzl", "llvm_targets") licenses(["notice"]) @@ -15,6 +16,9 @@ package(default_visibility = ["//visibility:public"]) ) for src in glob( include = ["**/*.mlir"], - exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"], + exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"] + ( + # MathToXeVM needs SPIRV; see MathToXeVM/lit.local.cfg + ["MathToXeVM/**"] if "SPIRV" not in llvm_targets else [] + ), ) ] |