diff options
214 files changed, 7725 insertions, 4168 deletions
diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index c49fd1d..efdc42d 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1096,8 +1096,8 @@ clang:openmp: - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - clang/include/clang/Analysis/Analyses/LifetimeSafety* - - clang/lib/Analysis/LifetimeSafety* + - clang/include/clang/Analysis/Analyses/LifetimeSafety/** + - clang/lib/Analysis/LifetimeSafety/** - clang/unittests/Analysis/LifetimeSafety* - clang/test/Sema/*lifetime-safety* - clang/test/Sema/*lifetime-analysis* diff --git a/.github/workflows/release-binaries-save-stage/action.yml b/.github/workflows/release-binaries-save-stage/action.yml deleted file mode 100644 index 84ccf98..0000000 --- a/.github/workflows/release-binaries-save-stage/action.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Save Stage -description: >- - Upload the source and binary directories from a build stage so that they - can be re-used in the next stage. This action is used to the release - binaries workflow into multiple stages to avoid the 6 hour timeout on - the GitHub hosted runners. -inputs: - build-prefix: - description: "Directory containing the build directory." - required: true - type: 'string' - -permissions: - contents: read - -runs: - using: "composite" - steps: - # We need to create an archive of the build directory, because it has too - # many files to upload. - - name: Package Build and Source Directories - shell: bash - run: | - # Remove .git/config to avoid leaking GITHUB_TOKEN stored there. - # See https://unit42.paloaltonetworks.com/github-repo-artifacts-leak-tokens/ - rm -Rf .git/config - # Windows does not support symlinks, so we need to dereference them. - tar --exclude build/ ${{ (runner.os == 'Windows' && '-h') || '' }} -c . | zstd -T0 -c > ../llvm-project.tar.zst - mv ../llvm-project.tar.zst . - tar -C ${{ inputs.build-prefix }} -c build/ | zstd -T0 -c > build.tar.zst - - - name: Upload Stage 1 Source - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: ${{ runner.os }}-${{ runner.arch }}-${{ github.job }}-source - path: llvm-project.tar.zst - retention-days: 2 - - - name: Upload Stage 1 Build Dir - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: ${{ runner.os}}-${{ runner.arch }}-${{ github.job }}-build - path: build.tar.zst - retention-days: 2 diff --git a/.github/workflows/release-binaries-setup-stage/action.yml b/.github/workflows/release-binaries-setup-stage/action.yml deleted file mode 100644 index 475a25f..0000000 --- a/.github/workflows/release-binaries-setup-stage/action.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Setup Stage -description: >- - Setup the next stage of the release binaries workflow. This sets up the - environment correctly for a new stage of the release binaries workflow - and also restores the source and build directory from the previous stage. - -inputs: - previous-artifact: - description: >- - A unique descriptor for the artifact from the previous stage. This will - be used to construct the final artifact pattern, which is: - $RUNNER_OS-$RUNNER_ARCH-$PREVIOUS_ARTIFACT-* - required: false - type: 'string' - -outputs: - build-prefix: - description: "Directory containing the build directory." - value: ${{ steps.build-prefix.outputs.build-prefix }} - -runs: - using: "composite" - steps: - - name: Install Ninja - uses: llvm/actions/install-ninja@a1ea791b03c8e61f53a0e66f2f73db283aa0f01e # main - - - name: Setup Windows - if: startsWith(runner.os, 'Windows') - uses: llvm/actions/setup-windows@main - with: - arch: amd64 - - - name: Set Build Prefix - id: build-prefix - shell: bash - run: | - build_prefix=`pwd` - if [ "${{ runner.os }}" = "Linux" ]; then - sudo chown $USER:$USER /mnt/ - build_prefix=/mnt/ - fi - echo "build-prefix=$build_prefix" >> $GITHUB_OUTPUT - - - name: Download Previous Stage Artifact - if: ${{ inputs.previous-artifact }} - id: download - uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 - with: - pattern: ${{ runner.os }}-${{ runner.arch }}-${{ inputs.previous-artifact }}-* - merge-multiple: true - - - name: Unpack Artifact - if: ${{ steps.download.outputs.download-path }} - shell: bash - run: | - tar --zstd -xf llvm-project.tar.zst - rm llvm-project.tar.zst - tar --zstd -C ${{ steps.build-prefix.outputs.build-prefix}} -xf build.tar.zst - rm build.tar.zst diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index cba48e4..83969b5 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -194,40 +194,30 @@ jobs: runs-on: ${{ needs.prepare.outputs.build-runs-on }} steps: - - name: Checkout Actions - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }} - sparse-checkout: | - .github/workflows/ - sparse-checkout-cone-mode: false - # Check out outside of working directory so the source checkout doesn't - # remove it. - path: workflows - - # actions/checkout does not support paths outside of the GITHUB_WORKSPACE. - # Also, anything that we put inside of GITHUB_WORKSPACE will be overwritten - # by future actions/checkout steps. Therefore, in order to checkout the - # latest actions from main, we need to first checkout out the actions inside of - # GITHUB_WORKSPACE (see previous step), then use actions/checkout to checkout - # the code being built and the move the actions from main back into GITHUB_WORKSPACE, - # becasue the uses on composite actions only reads workflows from inside GITHUB_WORKSPACE. - - shell: bash - run: mv workflows ../workflows-main - - name: Checkout LLVM uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: ref: ${{ needs.prepare.outputs.ref }} - - name: Copy main workflows - shell: bash - run: | - mv ../workflows-main . + - name: Install Ninja + uses: llvm/actions/install-ninja@a1ea791b03c8e61f53a0e66f2f73db283aa0f01e # main + + - name: Setup Windows + if: startsWith(runner.os, 'Windows') + uses: llvm/actions/setup-windows@main + with: + arch: amd64 - - name: Setup Stage + - name: Set Build Prefix id: setup-stage - uses: ./workflows-main/.github/workflows/release-binaries-setup-stage + shell: bash + run: | + build_prefix=`pwd` + if [ "${{ runner.os }}" = "Linux" ]; then + sudo chown $USER:$USER /mnt/ + build_prefix=/mnt/ + fi + echo "build-prefix=$build_prefix" >> $GITHUB_OUTPUT - name: Configure id: build @@ -258,17 +248,11 @@ jobs: path: | ${{ needs.prepare.outputs.release-binary-filename }} - # Clean up some build files to reduce size of artifact. - - name: Clean Up Build Directory - shell: bash + - name: Run Tests + # These almost always fail so don't let them fail the build and prevent the uploads. + continue-on-error: true run: | - find ${{ steps.setup-stage.outputs.build-prefix }}/build -iname ${{ needs.prepare.outputs.release-binary-filename }} -delete - find ${{ steps.setup-stage.outputs.build-prefix }}/build -iname _CPack_Packages -prune -exec rm -r {} + - - - name: Save Stage - uses: ./workflows-main/.github/workflows/release-binaries-save-stage - with: - build-prefix: ${{ steps.setup-stage.outputs.build-prefix }} + ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build stage2-check-all upload-release-binaries: name: "Upload Release Binaries" @@ -327,31 +311,3 @@ jobs: --release ${{ needs.prepare.outputs.release-version }} \ upload \ --files ${{ needs.prepare.outputs.release-binary-filename }}* - - test-release: - name: "Test Release" - needs: - - prepare - - build-release-package - if: >- - github.repository_owner == 'llvm' - runs-on: ${{ needs.prepare.outputs.test-runs-on }} - steps: - - name: Checkout Actions - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }} - sparse-checkout: | - .github/workflows/ - sparse-checkout-cone-mode: false - path: workflows - - name: Setup Stage - id: setup-stage - uses: ./workflows/.github/workflows/release-binaries-setup-stage - with: - previous-artifact: build-release-package - - - name: Run Tests - shell: bash - run: | - ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build stage2-check-all diff --git a/clang-tools-extra/clang-doc/JSONGenerator.cpp b/clang-tools-extra/clang-doc/JSONGenerator.cpp index 26794a5..6fba211 100644 --- a/clang-tools-extra/clang-doc/JSONGenerator.cpp +++ b/clang-tools-extra/clang-doc/JSONGenerator.cpp @@ -582,11 +582,10 @@ static SmallString<16> determineFileName(Info *I, SmallString<128> &Path) { if (I->IT == InfoType::IT_record) { auto *RecordSymbolInfo = static_cast<SymbolInfo *>(I); FileName = RecordSymbolInfo->MangledName; - } else if (I->IT == InfoType::IT_namespace && I->Name != "") - // Serialize the global namespace as index.json - FileName = I->Name; + } else if (I->USR == GlobalNamespaceID) + FileName = "index"; else - FileName = I->getFileBaseName(); + FileName = I->Name; sys::path::append(Path, FileName + ".json"); return FileName; } diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h index 2a75f89..d8c2b9c 100644 --- a/clang-tools-extra/clang-doc/Representation.h +++ b/clang-tools-extra/clang-doc/Representation.h @@ -30,6 +30,9 @@ namespace doc { // SHA1'd hash of a USR. using SymbolID = std::array<uint8_t, 20>; +constexpr SymbolID GlobalNamespaceID = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + struct BaseRecordInfo; struct EnumInfo; struct FunctionInfo; diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp index 843368e..aaf0594 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp @@ -40,8 +40,9 @@ SuspiciousIncludeCheck::SuspiciousIncludeCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), HeaderFileExtensions(Context->getHeaderFileExtensions()), - ImplementationFileExtensions(Context->getImplementationFileExtensions()) { -} + ImplementationFileExtensions(Context->getImplementationFileExtensions()), + IgnoredRegexString(Options.get("IgnoredRegex").value_or(StringRef{})), + IgnoredRegex(IgnoredRegexString) {} void SuspiciousIncludeCheck::registerPPCallbacks( const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) { @@ -49,6 +50,11 @@ void SuspiciousIncludeCheck::registerPPCallbacks( ::std::make_unique<SuspiciousIncludePPCallbacks>(*this, SM, PP)); } +void SuspiciousIncludeCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + if (!IgnoredRegexString.empty()) + Options.store(Opts, "IgnoredRegex", IgnoredRegexString); +} + void SuspiciousIncludePPCallbacks::InclusionDirective( SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, @@ -57,6 +63,9 @@ void SuspiciousIncludePPCallbacks::InclusionDirective( if (IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import) return; + if (!Check.IgnoredRegexString.empty() && Check.IgnoredRegex.match(FileName)) + return; + SourceLocation DiagLoc = FilenameRange.getBegin().getLocWithOffset(1); const std::optional<StringRef> IFE = diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h index 3aa9491e..50fc345 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h @@ -10,7 +10,6 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSINCLUDECHECK_H #include "../ClangTidyCheck.h" -#include "../utils/FileExtensionsUtils.h" namespace clang::tidy::bugprone { @@ -28,9 +27,12 @@ public: SuspiciousIncludeCheck(StringRef Name, ClangTidyContext *Context); void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; FileExtensionsSet HeaderFileExtensions; FileExtensionsSet ImplementationFileExtensions; + StringRef IgnoredRegexString; + llvm::Regex IgnoredRegex; }; } // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 9aeda03..216d3f5 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -286,6 +286,10 @@ Changes in existing checks <clang-tidy/checks/bugprone/sizeof-expression>` check by fixing a crash on ``sizeof`` of an array of dependent type. +- Improved :doc:`bugprone-suspicious-include + <clang-tidy/checks/bugprone/suspicious-include>` check by adding + `IgnoredRegex` option. + - Improved :doc:`bugprone-tagged-union-member-count <clang-tidy/checks/bugprone/tagged-union-member-count>` by fixing a false positive when enums or unions from system header files or the ``std`` diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-include.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-include.rst index 669654f..4fbfa259 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-include.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-include.rst @@ -14,3 +14,11 @@ Examples: #include "Pterodactyl.h" // OK, .h files tend not to have definitions. #include "Velociraptor.cpp" // Warning, filename is suspicious. #include_next <stdio.c> // Warning, filename is suspicious. + +Options +------- + +.. option:: IgnoredRegex + + A regular expression for the file name to be ignored by the check. Default + is empty string. diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/moc_foo.cpp b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/moc_foo.cpp new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/moc_foo.cpp diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-include.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-include.cpp index 969d0bf..4f2acbc 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-include.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-include.cpp @@ -1,4 +1,6 @@ -// RUN: %check_clang_tidy %s bugprone-suspicious-include %t -- -- -isystem %clang_tidy_headers -fmodules +// RUN: %check_clang_tidy %s bugprone-suspicious-include %t -- \ +// RUN: -config="{CheckOptions: {bugprone-suspicious-include.IgnoredRegex: 'moc_.*'}"} -- \ +// RUN: -isystem %clang_tidy_headers -fmodules // clang-format off @@ -22,3 +24,6 @@ // CHECK-MESSAGES: [[@LINE+1]]:14: warning: suspicious #include of file with '.cxx' extension # include <c.cxx> + +// CHECK-MESSAGES-NOT: warning: +#include "moc_foo.cpp" diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h deleted file mode 100644 index e54fc26..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ /dev/null @@ -1,183 +0,0 @@ -//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the entry point for a dataflow-based static analysis -// that checks for C++ lifetime violations. -// -// The analysis is based on the concepts of "origins" and "loans" to track -// pointer lifetimes and detect issues like use-after-free and dangling -// pointers. See the RFC for more details: -// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Basic/SourceLocation.h" -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" -#include "llvm/ADT/StringMap.h" -#include <memory> - -namespace clang::lifetimes { - -/// Enum to track the confidence level of a potential error. -enum class Confidence : uint8_t { - None, - Maybe, // Reported as a potential error (-Wlifetime-safety-strict) - Definite // Reported as a definite error (-Wlifetime-safety-permissive) -}; - -enum class LivenessKind : uint8_t { - Dead, // Not alive - Maybe, // Live on some path but not all paths (may-be-live) - Must // Live on all paths (must-be-live) -}; - -class LifetimeSafetyReporter { -public: - LifetimeSafetyReporter() = default; - virtual ~LifetimeSafetyReporter() = default; - - virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, - SourceLocation FreeLoc, - Confidence Confidence) {} -}; - -/// The main entry point for the analysis. -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - -namespace internal { -// Forward declarations of internal types. -class Fact; -class FactManager; -class LoanPropagationAnalysis; -class ExpiredLoansAnalysis; -class LiveOriginAnalysis; -struct LifetimeFactory; - -/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. -/// Used for giving ID to loans and origins. -template <typename Tag> struct ID { - uint32_t Value = 0; - - bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; } - bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); } - bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; } - ID<Tag> operator++(int) { - ID<Tag> Tmp = *this; - ++Value; - return Tmp; - } - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddInteger(Value); - } -}; - -using LoanID = ID<struct LoanTag>; -using OriginID = ID<struct OriginTag>; -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { - return OS << ID.Value; -} -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { - return OS << ID.Value; -} - -// Using LLVM's immutable collections is efficient for dataflow analysis -// as it avoids deep copies during state transitions. -// TODO(opt): Consider using a bitset to represent the set of loans. -using LoanSet = llvm::ImmutableSet<LoanID>; -using OriginSet = llvm::ImmutableSet<OriginID>; -using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -/// Running the lifetime safety analysis and querying its results. It -/// encapsulates the various dataflow analyses. -class LifetimeSafetyAnalysis { -public: - LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - ~LifetimeSafetyAnalysis(); - - void run(); - - /// Returns the set of loans an origin holds at a specific program point. - LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const; - - /// Returns the set of origins that are live at a specific program point, - /// along with the confidence level of their liveness. - /// - /// An origin is considered live if there are potential future uses of that - /// origin after the given program point. The confidence level indicates - /// whether the origin is definitely live (Definite) due to being domintated - /// by a set of uses or only possibly live (Maybe) only on some but not all - /// control flow paths. - std::vector<std::pair<OriginID, LivenessKind>> - getLiveOriginsAtPoint(ProgramPoint PP) const; - - /// Finds the OriginID for a given declaration. - /// Returns a null optional if not found. - std::optional<OriginID> getOriginIDForDecl(const ValueDecl *D) const; - - /// Finds the LoanID's for the loan created with the specific variable as - /// their Path. - std::vector<LoanID> getLoanIDForVar(const VarDecl *VD) const; - - /// Retrieves program points that were specially marked in the source code - /// for testing. - /// - /// The analysis recognizes special function calls of the form - /// `void("__lifetime_test_point_<name>")` as test points. This method returns - /// a map from the annotation string (<name>) to the corresponding - /// `ProgramPoint`. This allows test harnesses to query the analysis state at - /// user-defined locations in the code. - /// \note This is intended for testing only. - llvm::StringMap<ProgramPoint> getTestPoints() const; - -private: - AnalysisDeclContext &AC; - LifetimeSafetyReporter *Reporter; - std::unique_ptr<LifetimeFactory> Factory; - std::unique_ptr<FactManager> FactMgr; - std::unique_ptr<LoanPropagationAnalysis> LoanPropagation; - std::unique_ptr<LiveOriginAnalysis> LiveOrigins; -}; -} // namespace internal -} // namespace clang::lifetimes - -namespace llvm { -template <typename Tag> -struct DenseMapInfo<clang::lifetimes::internal::ID<Tag>> { - using ID = clang::lifetimes::internal::ID<Tag>; - - static inline ID getEmptyKey() { - return {DenseMapInfo<uint32_t>::getEmptyKey()}; - } - - static inline ID getTombstoneKey() { - return {DenseMapInfo<uint32_t>::getTombstoneKey()}; - } - - static unsigned getHashValue(const ID &Val) { - return DenseMapInfo<uint32_t>::getHashValue(Val.Value); - } - - static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } -}; -} // namespace llvm - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h new file mode 100644 index 0000000..03636be --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h @@ -0,0 +1,35 @@ +//===- Checker.h - C++ Lifetime Safety Analysis -*----------- C++-*-=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines and enforces the lifetime safety policy. It detects +// use-after-free errors by examining loan expiration points and checking if +// any live origins hold the expired loans. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" + +namespace clang::lifetimes::internal { + +/// Runs the lifetime checker, which detects use-after-free errors by +/// examining loan expiration points and checking if any live origins hold +/// the expired loan. +void runLifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, + const LiveOriginsAnalysis &LiveOrigins, + const FactManager &FactMgr, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter); + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h new file mode 100644 index 0000000..6a90aeb --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h @@ -0,0 +1,232 @@ +//===- Facts.h - Lifetime Analysis Facts and Fact Manager ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines Facts, which are atomic lifetime-relevant events (such as +// loan issuance, loan expiration, origin flow, and use), and the FactManager, +// which manages the storage and retrieval of facts for each CFG block. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include <cstdint> + +namespace clang::lifetimes::internal { +/// An abstract base class for a single, atomic lifetime-relevant event. +class Fact { + +public: + enum class Kind : uint8_t { + /// A new loan is issued from a borrow expression (e.g., &x). + Issue, + /// A loan expires as its underlying storage is freed (e.g., variable goes + /// out of scope). + Expire, + /// An origin is propagated from a source to a destination (e.g., p = q). + /// This can also optionally kill the destination origin before flowing into + /// it. Otherwise, the source's loan set is merged into the destination's + /// loan set. + OriginFlow, + /// An origin escapes the function by flowing into the return value. + ReturnOfOrigin, + /// An origin is used (eg. appears as l-value expression like DeclRefExpr). + Use, + /// A marker for a specific point in the code, for testing. + TestPoint, + }; + +private: + Kind K; + +protected: + Fact(Kind K) : K(K) {} + +public: + virtual ~Fact() = default; + Kind getKind() const { return K; } + + template <typename T> const T *getAs() const { + if (T::classof(this)) + return static_cast<const T *>(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const; +}; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + + IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const override; +}; + +class ExpireFact : public Fact { + LoanID LID; + SourceLocation ExpiryLoc; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + + ExpireFact(LoanID LID, SourceLocation ExpiryLoc) + : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} + + LoanID getLoanID() const { return LID; } + SourceLocation getExpiryLoc() const { return ExpiryLoc; } + + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const override; +}; + +class OriginFlowFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + // True if the destination origin should be killed (i.e., its current loans + // cleared) before the source origin's loans are flowed into it. + bool KillDest; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::OriginFlow; + } + + OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) + : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), + KillDest(KillDest) {} + + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + bool getKillDest() const { return KillDest; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +class ReturnOfOriginFact : public Fact { + OriginID OID; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::ReturnOfOrigin; + } + + ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +class UseFact : public Fact { + const Expr *UseExpr; + // True if this use is a write operation (e.g., left-hand side of assignment). + // Write operations are exempted from use-after-free checks. + bool IsWritten = false; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } + + UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} + + OriginID getUsedOrigin(const OriginManager &OM) const { + // TODO: Remove const cast and make OriginManager::get as const. + return const_cast<OriginManager &>(OM).get(*UseExpr); + } + const Expr *getUseExpr() const { return UseExpr; } + void markAsWritten() { IsWritten = true; } + bool isWritten() const { return IsWritten; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +/// A dummy-fact used to mark a specific point in the code for testing. +/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. +class TestPointFact : public Fact { + StringRef Annotation; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } + + explicit TestPointFact(StringRef Annotation) + : Fact(Kind::TestPoint), Annotation(Annotation) {} + + StringRef getAnnotation() const { return Annotation; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const override; +}; + +class FactManager { +public: + llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const { + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) + return It->second; + return {}; + } + + void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) { + if (!NewFacts.empty()) + BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); + } + + template <typename FactType, typename... Args> + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate<FactType>(); + return new (Mem) FactType(std::forward<Args>(args)...); + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const; + + /// Retrieves program points that were specially marked in the source code + /// for testing. + /// + /// The analysis recognizes special function calls of the form + /// `void("__lifetime_test_point_<name>")` as test points. This method returns + /// a map from the annotation string (<name>) to the corresponding + /// `ProgramPoint`. This allows test harnesses to query the analysis state at + /// user-defined locations in the code. + /// \note This is intended for testing only. + llvm::StringMap<ProgramPoint> getTestPoints() const; + + LoanManager &getLoanMgr() { return LoanMgr; } + const LoanManager &getLoanMgr() const { return LoanMgr; } + OriginManager &getOriginMgr() { return OriginMgr; } + const OriginManager &getOriginMgr() const { return OriginMgr; } + +private: + LoanManager LoanMgr; + OriginManager OriginMgr; + llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>> + BlockToFactsMap; + llvm::BumpPtrAllocator FactAllocator; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h new file mode 100644 index 0000000..5e58abe --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -0,0 +1,106 @@ +//===- FactsGenerator.h - Lifetime Facts Generation -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the FactsGenerator, which traverses the AST to generate +// lifetime-relevant facts (such as loan issuance, expiration, origin flow, +// and use) from CFG statements. These facts are used by the dataflow analyses +// to track pointer lifetimes and detect use-after-free errors. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H + +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang::lifetimes::internal { + +class FactsGenerator : public ConstStmtVisitor<FactsGenerator> { + using Base = ConstStmtVisitor<FactsGenerator>; + +public: + FactsGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) + : FactMgr(FactMgr), AC(AC) {} + + void run(); + + void VisitDeclStmt(const DeclStmt *DS); + void VisitDeclRefExpr(const DeclRefExpr *DRE); + void VisitCXXConstructExpr(const CXXConstructExpr *CCE); + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE); + void VisitCallExpr(const CallExpr *CE); + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N); + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE); + void VisitUnaryOperator(const UnaryOperator *UO); + void VisitReturnStmt(const ReturnStmt *RS); + void VisitBinaryOperator(const BinaryOperator *BO); + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE); + void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE); + void VisitInitListExpr(const InitListExpr *ILE); + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE); + +private: + void handleDestructor(const CFGAutomaticObjDtor &DtorOpt); + + void handleGSLPointerConstruction(const CXXConstructExpr *CCE); + + /// Checks if a call-like expression creates a borrow by passing a value to a + /// reference parameter, creating an IssueFact if it does. + /// \param IsGslConstruction True if this is a GSL construction where all + /// argument origins should flow to the returned origin. + void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, + ArrayRef<const Expr *> Args, + bool IsGslConstruction = false); + + template <typename Destination, typename Source> + void flowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>( + DestOID, SrcOID, /*KillDest=*/false)); + } + + template <typename Destination, typename Source> + void killAndFlowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact<OriginFlowFact>(DestOID, SrcOID, /*KillDest=*/true)); + } + + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. + /// If so, creates a `TestPointFact` and returns true. + bool handleTestPoint(const CXXFunctionalCastExpr *FCE); + + void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr); + + // A DeclRefExpr will be treated as a use of the referenced decl. It will be + // checked for use-after-free unless it is later marked as being written to + // (e.g. on the left-hand side of an assignment). + void handleUse(const DeclRefExpr *DRE); + + void markUseAsWrite(const DeclRefExpr *DRE); + + FactManager &FactMgr; + AnalysisDeclContext &AC; + llvm::SmallVector<Fact *> CurrentBlockFacts; + // To distinguish between reads and writes for use-after-free checks, this map + // stores the `UseFact` for each `DeclRefExpr`. We initially identify all + // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use + // corresponding to the left-hand side is updated to be a "write", thereby + // exempting it from the check. + llvm::DenseMap<const DeclRefExpr *, UseFact *> UseFacts; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h index 229d16c..f02969e 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h @@ -12,8 +12,7 @@ #include "clang/AST/DeclCXX.h" -namespace clang { -namespace lifetimes { +namespace clang ::lifetimes { /// Returns the most recent declaration of the method to ensure all /// lifetime-bound attributes from redeclarations are considered. @@ -38,7 +37,7 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD); /// lifetimebound, either due to an explicit lifetimebound attribute on the /// method or because it's a normal assignment operator. bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); -} // namespace lifetimes -} // namespace clang + +} // namespace clang::lifetimes #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h new file mode 100644 index 0000000..91ffbb1 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h @@ -0,0 +1,87 @@ +//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the main entry point and orchestrator for the C++ Lifetime +// Safety Analysis. It coordinates the entire analysis pipeline: fact +// generation, loan propagation, live origins analysis, and enforcement of +// lifetime safety policy. +// +// The analysis is based on the concepts of "origins" and "loans" to track +// pointer lifetimes and detect issues like use-after-free and dangling +// pointers. See the RFC for more details: +// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/AnalysisDeclContext.h" + +namespace clang::lifetimes { + +/// Enum to track the confidence level of a potential error. +enum class Confidence : uint8_t { + None, + Maybe, // Reported as a potential error (-Wlifetime-safety-strict) + Definite // Reported as a definite error (-Wlifetime-safety-permissive) +}; + +class LifetimeSafetyReporter { +public: + LifetimeSafetyReporter() = default; + virtual ~LifetimeSafetyReporter() = default; + + virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, + SourceLocation FreeLoc, + Confidence Confidence) {} +}; + +/// The main entry point for the analysis. +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + +namespace internal { +/// An object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + OriginLoanMap::Factory OriginMapFactory{/*canonicalize=*/false}; + LoanSet::Factory LoanSetFactory{/*canonicalize=*/false}; + LivenessMap::Factory LivenessMapFactory{/*canonicalize=*/false}; +}; + +/// Running the lifetime safety analysis and querying its results. It +/// encapsulates the various dataflow analyses. +class LifetimeSafetyAnalysis { +public: + LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + + void run(); + + /// \note These are provided only for testing purposes. + LoanPropagationAnalysis &getLoanPropagation() const { + return *LoanPropagation; + } + LiveOriginsAnalysis &getLiveOrigins() const { return *LiveOrigins; } + FactManager &getFactManager() { return FactMgr; } + +private: + AnalysisDeclContext &AC; + LifetimeSafetyReporter *Reporter; + LifetimeFactory Factory; + FactManager FactMgr; + std::unique_ptr<LiveOriginsAnalysis> LiveOrigins; + std::unique_ptr<LoanPropagationAnalysis> LoanPropagation; +}; +} // namespace internal +} // namespace clang::lifetimes + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h new file mode 100644 index 0000000..c4f5f0e --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h @@ -0,0 +1,97 @@ +//===- LiveOrigins.h - Live Origins Analysis -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LiveOriginAnalysis, a backward dataflow analysis that +// determines which origins are "live" at each program point. An origin is +// "live" at a program point if there's a potential future use of a pointer it +// is associated with. Liveness is "generated" by a use of an origin (e.g., a +// `UseFact` from a read of a pointer) and is "killed" (i.e., it stops being +// live) when the origin is replaced by flowing a different origin into it +// (e.g., an OriginFlow from an assignment that kills the destination). +// +// This information is used for detecting use-after-free errors, as it allows us +// to check if a live origin holds a loan to an object that has already expired. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/Support/Debug.h" + +namespace clang::lifetimes::internal { + +enum class LivenessKind : uint8_t { + Dead, // Not alive + Maybe, // Live on some path but not all paths (may-be-live) + Must // Live on all paths (must-be-live) +}; + +/// Information about why an origin is live at a program point. +struct LivenessInfo { + /// The use that makes the origin live. If liveness is propagated from + /// multiple uses along different paths, this will point to the use appearing + /// earlier in the translation unit. + /// This is 'null' when the origin is not live. + const UseFact *CausingUseFact; + + /// The kind of liveness of the origin. + /// `Must`: The origin is live on all control-flow paths from the current + /// point to the function's exit (i.e. the current point is dominated by a set + /// of uses). + /// `Maybe`: indicates it is live on some but not all paths. + /// + /// This determines the diagnostic's confidence level. + /// `Must`-be-alive at expiration implies a definite use-after-free, + /// while `Maybe`-be-alive suggests a potential one on some paths. + LivenessKind Kind; + + LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} + LivenessInfo(const UseFact *UF, LivenessKind K) + : CausingUseFact(UF), Kind(K) {} + + bool operator==(const LivenessInfo &Other) const { + return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; + } + bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } + + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddPointer(CausingUseFact); + IDBuilder.Add(Kind); + } +}; + +using LivenessMap = llvm::ImmutableMap<OriginID, LivenessInfo>; + +class LiveOriginsAnalysis { +public: + LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF); + ~LiveOriginsAnalysis(); + + /// Returns the set of origins that are live at a specific program point, + /// along with the the details of the liveness. + LivenessMap getLiveOriginsAt(ProgramPoint P) const; + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, + llvm::StringMap<ProgramPoint> TestPoints) const; + +private: + class Impl; + std::unique_ptr<Impl> PImpl; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h new file mode 100644 index 0000000..447d05c --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h @@ -0,0 +1,48 @@ +//===- LoanPropagation.h - Loan Propagation Analysis -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoanPropagationAnalysis, a forward dataflow analysis +// that tracks which loans each origin holds at each program point. Loans +// represent borrows of storage locations and are propagated through the +// program as pointers are copied or assigned. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" + +namespace clang::lifetimes::internal { + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet<LoanID>; +using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; + +class LoanPropagationAnalysis { +public: + LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory); + ~LoanPropagationAnalysis(); + + LoanSet getLoans(OriginID OID, ProgramPoint P) const; + +private: + class Impl; + std::unique_ptr<Impl> PImpl; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h new file mode 100644 index 0000000..7f5cf03 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h @@ -0,0 +1,80 @@ +//===- Loans.h - Loan and Access Path Definitions --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Loan and AccessPath structures, which represent +// borrows of storage locations, and the LoanManager, which manages the +// creation and retrieval of loans during lifetime analysis. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H + +#include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang::lifetimes::internal { + +using LoanID = utils::ID<struct LoanTag>; +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { + return OS << ID.Value; +} + +/// Represents the storage location being borrowed, e.g., a specific stack +/// variable. +/// TODO: Model access paths of other types, e.g., s.field, heap and globals. +struct AccessPath { + const clang::ValueDecl *D; + + AccessPath(const clang::ValueDecl *D) : D(D) {} +}; + +/// Information about a single borrow, or "Loan". A loan is created when a +/// reference or pointer is created. +struct Loan { + /// TODO: Represent opaque loans. + /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it + /// is represented as empty LoanSet + LoanID ID; + AccessPath Path; + /// The expression that creates the loan, e.g., &x. + const Expr *IssueExpr; + + Loan(LoanID id, AccessPath path, const Expr *IssueExpr) + : ID(id), Path(path), IssueExpr(IssueExpr) {} + + void dump(llvm::raw_ostream &OS) const; +}; + +/// Manages the creation, storage and retrieval of loans. +class LoanManager { +public: + LoanManager() = default; + + Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { + AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); + return AllLoans.back(); + } + + const Loan &getLoan(LoanID ID) const { + assert(ID.Value < AllLoans.size()); + return AllLoans[ID.Value]; + } + llvm::ArrayRef<Loan> getLoans() const { return AllLoans; } + +private: + LoanID getNextLoanID() { return NextLoanID++; } + + LoanID NextLoanID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector<Loan> AllLoans; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h new file mode 100644 index 0000000..ba138b0 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h @@ -0,0 +1,91 @@ +//===- Origins.h - Origin and Origin Management ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines Origins, which represent the set of possible loans a +// pointer-like object could hold, and the OriginManager, which manages the +// creation, storage, and retrieval of origins for variables and expressions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" + +namespace clang::lifetimes::internal { + +using OriginID = utils::ID<struct OriginTag>; + +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { + return OS << ID.Value; +} + +/// An Origin is a symbolic identifier that represents the set of possible +/// loans a pointer-like object could hold at any given time. +/// TODO: Enhance the origin model to handle complex types, pointer +/// indirection and reborrowing. The plan is to move from a single origin per +/// variable/expression to a "list of origins" governed by the Type. +/// For example, the type 'int**' would have two origins. +/// See discussion: +/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 +struct Origin { + OriginID ID; + /// A pointer to the AST node that this origin represents. This union + /// distinguishes between origins from declarations (variables or parameters) + /// and origins from expressions. + llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr; + + Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} + Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} + + const clang::ValueDecl *getDecl() const { + return Ptr.dyn_cast<const clang::ValueDecl *>(); + } + const clang::Expr *getExpr() const { + return Ptr.dyn_cast<const clang::Expr *>(); + } +}; + +/// Manages the creation, storage, and retrieval of origins for pointer-like +/// variables and expressions. +class OriginManager { +public: + OriginManager() = default; + + Origin &addOrigin(OriginID ID, const clang::ValueDecl &D); + Origin &addOrigin(OriginID ID, const clang::Expr &E); + + // TODO: Mark this method as const once we remove the call to getOrCreate. + OriginID get(const Expr &E); + + OriginID get(const ValueDecl &D); + + OriginID getOrCreate(const Expr &E); + + const Origin &getOrigin(OriginID ID) const; + + llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl &D); + + void dump(OriginID OID, llvm::raw_ostream &OS) const; + +private: + OriginID getNextOriginID() { return NextOriginID++; } + + OriginID NextOriginID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector<Origin> AllOrigins; + llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; + llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h new file mode 100644 index 0000000..4183cab --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h @@ -0,0 +1,118 @@ +//===- Utils.h - Utility Functions for Lifetime Safety --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file provides utilities for the lifetime safety analysis, including +// join operations for LLVM's immutable data structures. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H + +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" + +namespace clang::lifetimes::internal::utils { + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template <typename Tag> struct ID { + uint32_t Value = 0; + + bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; } + bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); } + bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; } + ID<Tag> operator++(int) { + ID<Tag> Tmp = *this; + ++Value; + return Tmp; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } +}; + +/// Computes the union of two ImmutableSets. +template <typename T> +static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, + llvm::ImmutableSet<T> B, + typename llvm::ImmutableSet<T>::Factory &F) { + if (A.getHeight() < B.getHeight()) + std::swap(A, B); + for (const T &E : B) + A = F.add(A, E); + return A; +} + +/// Describes the strategy for joining two `ImmutableMap` instances, primarily +/// differing in how they handle keys that are unique to one of the maps. +/// +/// A `Symmetric` join is universally correct, while an `Asymmetric` join +/// serves as a performance optimization. The latter is applicable only when the +/// join operation possesses a left identity element, allowing for a more +/// efficient, one-sided merge. +enum class JoinKind { + /// A symmetric join applies the `JoinValues` operation to keys unique to + /// either map, ensuring that values from both maps contribute to the result. + Symmetric, + /// An asymmetric join preserves keys unique to the first map as-is, while + /// applying the `JoinValues` operation only to keys unique to the second map. + Asymmetric, +}; + +/// Computes the key-wise union of two ImmutableMaps. +// TODO(opt): This key-wise join is a performance bottleneck. A more +// efficient merge could be implemented using a Patricia Trie or HAMT +// instead of the current AVL-tree-based ImmutableMap. +template <typename K, typename V, typename Joiner> +static llvm::ImmutableMap<K, V> +join(const llvm::ImmutableMap<K, V> &A, const llvm::ImmutableMap<K, V> &B, + typename llvm::ImmutableMap<K, V>::Factory &F, Joiner JoinValues, + JoinKind Kind) { + if (A.getHeight() < B.getHeight()) + return join(B, A, F, JoinValues, Kind); + + // For each element in B, join it with the corresponding element in A + // (or with an empty value if it doesn't exist in A). + llvm::ImmutableMap<K, V> Res = A; + for (const auto &Entry : B) { + const K &Key = Entry.first; + const V &ValB = Entry.second; + Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); + } + if (Kind == JoinKind::Symmetric) { + for (const auto &Entry : A) { + const K &Key = Entry.first; + const V &ValA = Entry.second; + if (!B.contains(Key)) + Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); + } + } + return Res; +} +} // namespace clang::lifetimes::internal::utils + +namespace llvm { +template <typename Tag> +struct DenseMapInfo<clang::lifetimes::internal::utils::ID<Tag>> { + using ID = clang::lifetimes::internal::utils::ID<Tag>; + + static inline ID getEmptyKey() { + return {DenseMapInfo<uint32_t>::getEmptyKey()}; + } + + static inline ID getTombstoneKey() { + return {DenseMapInfo<uint32_t>::getTombstoneKey()}; + } + + static unsigned getHashValue(const ID &Val) { + return DenseMapInfo<uint32_t>::getHashValue(Val.Value); + } + + static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } +}; +} // namespace llvm + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 40bc7b9..3df28f2 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13665,11 +13665,6 @@ def warn_acc_var_referenced_lacks_op "reference has no effect">, InGroup<DiagGroup<"openacc-var-lacks-operation">>, DefaultError; -def err_acc_reduction_recipe_no_op - : Error<"variable of type %0 referenced in OpenACC 'reduction' clause does " - "not have a valid operation available">; -def note_acc_reduction_recipe_noop_field - : Note<"while forming combiner for compound type %0">; // AMDGCN builtins diagnostics def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ec38231..c2f2ac5 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -545,15 +545,16 @@ multiclass BoolFOption<string flag_base, KeyPathAndMacro kpm, Group<f_Group>; } -// Creates a BoolOption where both of the flags are prefixed with "g" and have -// the Group<g_Group>. +// Creates a BoolOption where both of the flags are prefixed with "g". +// Does *not* map to g_Group, because that is reserved for flags that are +// intended to enable (or disable) debug info, which is not appropriate for a +// negative boolean flag (-gno-${feature}). // Used for -cc1 frontend options. Driver-only options do not map to // CompilerInvocation. multiclass BoolGOption<string flag_base, KeyPathAndMacro kpm, Default default, FlagDef flag1, FlagDef flag2, BothFlags both = BothFlags<[]>> { - defm NAME : BoolOption<"g", flag_base, kpm, default, flag1, flag2, both>, - Group<g_Group>; + defm NAME : BoolOption<"g", flag_base, kpm, default, flag1, flag2, both>; } multiclass BoolMOption<string flag_base, KeyPathAndMacro kpm, @@ -4845,8 +4846,7 @@ defm structor_decl_linkage_names NegFlag<SetFalse>, PosFlag<SetTrue, [], [], "Attach linkage names to C++ constructor/destructor " - "declarations in DWARF." - "Implies -g.">, + "declarations in DWARF.">, BothFlags<[], [ClangOption, CLOption, CC1Option]>>, DocBrief<[{On some ABIs (e.g., Itanium), constructors and destructors may have multiple variants. Historically, when generating DWARF, Clang did not attach ``DW_AT_linkage_name`` to structor DIEs because there were multiple possible manglings (depending on the structor variant) that could be used. With ``-gstructor-decl-linkage-names``, for ABIs with structor variants, we attach a "unified" mangled name to structor declarations DIEs which debuggers can use to look up all the definitions for a structor declaration. E.g., a "unified" mangled name ``_ZN3FooC4Ev`` may have multiple definitions associated with it such as ``_ZN3FooC1Ev`` and ``_ZN3FooC2Ev``. @@ -4855,7 +4855,7 @@ defm key_instructions : BoolGOption<"key-instructions", CodeGenOpts<"DebugKeyInstructions">, DefaultFalse, NegFlag<SetFalse>, PosFlag<SetTrue, [], [], "Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code" - " in some debuggers. DWARF only. Implies -g.">, + " in some debuggers. DWARF only.">, BothFlags<[], [ClangOption, CLOption, CC1Option]>>; def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">; def help : Flag<["-", "--"], "help">, diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index 16e7f1b..6cadc34 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -228,11 +228,6 @@ private: bool DiagnoseAllowedClauses(OpenACCDirectiveKind DK, OpenACCClauseKind CK, SourceLocation ClauseLoc); - bool CreateReductionCombinerRecipe( - SourceLocation loc, OpenACCReductionOperator ReductionOperator, - QualType VarTy, - llvm::SmallVectorImpl<OpenACCReductionRecipe::CombinerRecipe> - &CombinerRecipes); public: // Needed from the visitor, so should be public. diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index c0be986..2f7ae6d 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -1670,20 +1670,25 @@ clang::getReplacedTemplateParameter(Decl *D, unsigned Index) { auto P = CTSD->getSpecializedTemplateOrPartial(); TemplateParameterList *TPL; if (const auto *CTPSD = - dyn_cast<ClassTemplatePartialSpecializationDecl *>(P)) + dyn_cast<ClassTemplatePartialSpecializationDecl *>(P)) { TPL = CTPSD->getTemplateParameters(); - else - TPL = cast<ClassTemplateDecl *>(P)->getTemplateParameters(); + // FIXME: Obtain Args deduced for the partial specialization. + return {TPL->getParam(Index), {}}; + } + TPL = cast<ClassTemplateDecl *>(P)->getTemplateParameters(); return {TPL->getParam(Index), CTSD->getTemplateArgs()[Index]}; } case Decl::Kind::VarTemplateSpecialization: { const auto *VTSD = cast<VarTemplateSpecializationDecl>(D); auto P = VTSD->getSpecializedTemplateOrPartial(); TemplateParameterList *TPL; - if (const auto *VTPSD = dyn_cast<VarTemplatePartialSpecializationDecl *>(P)) + if (const auto *VTPSD = + dyn_cast<VarTemplatePartialSpecializationDecl *>(P)) { TPL = VTPSD->getTemplateParameters(); - else - TPL = cast<VarTemplateDecl *>(P)->getTemplateParameters(); + // FIXME: Obtain Args deduced for the partial specialization. + return {TPL->getParam(Index), {}}; + } + TPL = cast<VarTemplateDecl *>(P)->getTemplateParameters(); return {TPL->getParam(Index), VTSD->getTemplateArgs()[Index]}; } case Decl::Kind::ClassTemplatePartialSpecialization: diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 5a26f3e..1dbd415 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,8 +21,6 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp - LifetimeAnnotations.cpp - LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp @@ -51,3 +49,4 @@ add_clang_library(clangAnalysis add_subdirectory(plugins) add_subdirectory(FlowSensitive) +add_subdirectory(LifetimeSafety) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp deleted file mode 100644 index 6196ec3..0000000 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ /dev/null @@ -1,1546 +0,0 @@ -//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety.h" -#include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" -#include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" -#include <cstdint> -#include <memory> -#include <optional> - -namespace clang::lifetimes { -namespace internal { - -/// Represents the storage location being borrowed, e.g., a specific stack -/// variable. -/// TODO: Model access paths of other types, e.g., s.field, heap and globals. -struct AccessPath { - const clang::ValueDecl *D; - - AccessPath(const clang::ValueDecl *D) : D(D) {} -}; - -/// Information about a single borrow, or "Loan". A loan is created when a -/// reference or pointer is created. -struct Loan { - /// TODO: Represent opaque loans. - /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it - /// is represented as empty LoanSet - LoanID ID; - AccessPath Path; - /// The expression that creates the loan, e.g., &x. - const Expr *IssueExpr; - - Loan(LoanID id, AccessPath path, const Expr *IssueExpr) - : ID(id), Path(path), IssueExpr(IssueExpr) {} - - void dump(llvm::raw_ostream &OS) const { - OS << ID << " (Path: "; - OS << Path.D->getNameAsString() << ")"; - } -}; - -/// An Origin is a symbolic identifier that represents the set of possible -/// loans a pointer-like object could hold at any given time. -/// TODO: Enhance the origin model to handle complex types, pointer -/// indirection and reborrowing. The plan is to move from a single origin per -/// variable/expression to a "list of origins" governed by the Type. -/// For example, the type 'int**' would have two origins. -/// See discussion: -/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 -struct Origin { - OriginID ID; - /// A pointer to the AST node that this origin represents. This union - /// distinguishes between origins from declarations (variables or parameters) - /// and origins from expressions. - llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr; - - Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} - Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} - - const clang::ValueDecl *getDecl() const { - return Ptr.dyn_cast<const clang::ValueDecl *>(); - } - const clang::Expr *getExpr() const { - return Ptr.dyn_cast<const clang::Expr *>(); - } -}; - -/// Manages the creation, storage and retrieval of loans. -class LoanManager { -public: - LoanManager() = default; - - Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { - AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); - return AllLoans.back(); - } - - const Loan &getLoan(LoanID ID) const { - assert(ID.Value < AllLoans.size()); - return AllLoans[ID.Value]; - } - llvm::ArrayRef<Loan> getLoans() const { return AllLoans; } - -private: - LoanID getNextLoanID() { return NextLoanID++; } - - LoanID NextLoanID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector<Loan> AllLoans; -}; - -/// Manages the creation, storage, and retrieval of origins for pointer-like -/// variables and expressions. -class OriginManager { -public: - OriginManager() = default; - - Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { - AllOrigins.emplace_back(ID, &D); - return AllOrigins.back(); - } - Origin &addOrigin(OriginID ID, const clang::Expr &E) { - AllOrigins.emplace_back(ID, &E); - return AllOrigins.back(); - } - - // TODO: Mark this method as const once we remove the call to getOrCreate. - OriginID get(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - // If the expression itself has no specific origin, and it's a reference - // to a declaration, its origin is that of the declaration it refers to. - // For pointer types, where we don't pre-emptively create an origin for the - // DeclRefExpr itself. - if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) - return get(*DRE->getDecl()); - // TODO: This should be an assert(It != ExprToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - return getOrCreate(E); - } - - OriginID get(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - // TODO: This should be an assert(It != DeclToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == DeclToOriginID.end()) - return getOrCreate(D); - - return It->second; - } - - OriginID getOrCreate(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - - OriginID NewID = getNextOriginID(); - addOrigin(NewID, E); - ExprToOriginID[&E] = NewID; - return NewID; - } - - const Origin &getOrigin(OriginID ID) const { - assert(ID.Value < AllOrigins.size()); - return AllOrigins[ID.Value]; - } - - llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; } - - OriginID getOrCreate(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - if (It != DeclToOriginID.end()) - return It->second; - OriginID NewID = getNextOriginID(); - addOrigin(NewID, D); - DeclToOriginID[&D] = NewID; - return NewID; - } - - void dump(OriginID OID, llvm::raw_ostream &OS) const { - OS << OID << " ("; - Origin O = getOrigin(OID); - if (const ValueDecl *VD = O.getDecl()) - OS << "Decl: " << VD->getNameAsString(); - else if (const Expr *E = O.getExpr()) - OS << "Expr: " << E->getStmtClassName(); - else - OS << "Unknown"; - OS << ")"; - } - -private: - OriginID getNextOriginID() { return NextOriginID++; } - - OriginID NextOriginID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector<Origin> AllOrigins; - llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; - llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; -}; - -/// An abstract base class for a single, atomic lifetime-relevant event. -class Fact { - -public: - enum class Kind : uint8_t { - /// A new loan is issued from a borrow expression (e.g., &x). - Issue, - /// A loan expires as its underlying storage is freed (e.g., variable goes - /// out of scope). - Expire, - /// An origin is propagated from a source to a destination (e.g., p = q). - /// This can also optionally kill the destination origin before flowing into - /// it. Otherwise, the source's loan set is merged into the destination's - /// loan set. - OriginFlow, - /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin, - /// An origin is used (eg. appears as l-value expression like DeclRefExpr). - Use, - /// A marker for a specific point in the code, for testing. - TestPoint, - }; - -private: - Kind K; - -protected: - Fact(Kind K) : K(K) {} - -public: - virtual ~Fact() = default; - Kind getKind() const { return K; } - - template <typename T> const T *getAs() const { - if (T::classof(this)) - return static_cast<const T *>(this); - return nullptr; - } - - virtual void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const { - OS << "Fact (Kind: " << static_cast<int>(K) << ")\n"; - } -}; - -class IssueFact : public Fact { - LoanID LID; - OriginID OID; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } - - IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} - LoanID getLoanID() const { return LID; } - OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &OM) const override { - OS << "Issue ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ", ToOrigin: "; - OM.dump(getOriginID(), OS); - OS << ")\n"; - } -}; - -class ExpireFact : public Fact { - LoanID LID; - SourceLocation ExpiryLoc; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } - - ExpireFact(LoanID LID, SourceLocation ExpiryLoc) - : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} - - LoanID getLoanID() const { return LID; } - SourceLocation getExpiryLoc() const { return ExpiryLoc; } - - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &) const override { - OS << "Expire ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ")\n"; - } -}; - -class OriginFlowFact : public Fact { - OriginID OIDDest; - OriginID OIDSrc; - // True if the destination origin should be killed (i.e., its current loans - // cleared) before the source origin's loans are flowed into it. - bool KillDest; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::OriginFlow; - } - - OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) - : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), - KillDest(KillDest) {} - - OriginID getDestOriginID() const { return OIDDest; } - OriginID getSrcOriginID() const { return OIDSrc; } - bool getKillDest() const { return KillDest; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "OriginFlow (Dest: "; - OM.dump(getDestOriginID(), OS); - OS << ", Src: "; - OM.dump(getSrcOriginID(), OS); - OS << (getKillDest() ? "" : ", Merge"); - OS << ")\n"; - } -}; - -class ReturnOfOriginFact : public Fact { - OriginID OID; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::ReturnOfOrigin; - } - - ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} - OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "ReturnOfOrigin ("; - OM.dump(getReturnedOriginID(), OS); - OS << ")\n"; - } -}; - -class UseFact : public Fact { - const Expr *UseExpr; - // True if this use is a write operation (e.g., left-hand side of assignment). - // Write operations are exempted from use-after-free checks. - bool IsWritten = false; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } - - UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} - - OriginID getUsedOrigin(const OriginManager &OM) const { - // TODO: Remove const cast and make OriginManager::get as const. - return const_cast<OriginManager &>(OM).get(*UseExpr); - } - const Expr *getUseExpr() const { return UseExpr; } - void markAsWritten() { IsWritten = true; } - bool isWritten() const { return IsWritten; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "Use ("; - OM.dump(getUsedOrigin(OM), OS); - OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; - } -}; - -/// A dummy-fact used to mark a specific point in the code for testing. -/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. -class TestPointFact : public Fact { - StringRef Annotation; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } - - explicit TestPointFact(StringRef Annotation) - : Fact(Kind::TestPoint), Annotation(Annotation) {} - - StringRef getAnnotation() const { return Annotation; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const override { - OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; - } -}; - -class FactManager { -public: - llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const { - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) - return It->second; - return {}; - } - - void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) { - if (!NewFacts.empty()) - BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); - } - - template <typename FactType, typename... Args> - FactType *createFact(Args &&...args) { - void *Mem = FactAllocator.Allocate<FactType>(); - return new (Mem) FactType(std::forward<Args>(args)...); - } - - void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << " Lifetime Analysis Facts:\n"; - llvm::dbgs() << "==========================================\n"; - if (const Decl *D = AC.getDecl()) - if (const auto *ND = dyn_cast<NamedDecl>(D)) - llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; - // Print blocks in the order as they appear in code for a stable ordering. - for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) { - llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) { - for (const Fact *F : It->second) { - llvm::dbgs() << " "; - F->dump(llvm::dbgs(), LoanMgr, OriginMgr); - } - } - llvm::dbgs() << " End of Block\n"; - } - } - - LoanManager &getLoanMgr() { return LoanMgr; } - OriginManager &getOriginMgr() { return OriginMgr; } - -private: - LoanManager LoanMgr; - OriginManager OriginMgr; - llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>> - BlockToFactsMap; - llvm::BumpPtrAllocator FactAllocator; -}; - -class FactGenerator : public ConstStmtVisitor<FactGenerator> { - using Base = ConstStmtVisitor<FactGenerator>; - -public: - FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) - : FactMgr(FactMgr), AC(AC) {} - - void run() { - llvm::TimeTraceScope TimeProfile("FactGenerator"); - // Iterate through the CFG blocks in reverse post-order to ensure that - // initializations and destructions are processed in the correct sequence. - for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) { - CurrentBlockFacts.clear(); - for (unsigned I = 0; I < Block->size(); ++I) { - const CFGElement &Element = Block->Elements[I]; - if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) - Visit(CS->getStmt()); - else if (std::optional<CFGAutomaticObjDtor> DtorOpt = - Element.getAs<CFGAutomaticObjDtor>()) - handleDestructor(*DtorOpt); - } - FactMgr.addBlockFacts(Block, CurrentBlockFacts); - } - } - - void VisitDeclStmt(const DeclStmt *DS) { - for (const Decl *D : DS->decls()) - if (const auto *VD = dyn_cast<VarDecl>(D)) - if (hasOrigin(VD)) - if (const Expr *InitExpr = VD->getInit()) - killAndFlowOrigin(*VD, *InitExpr); - } - - void VisitDeclRefExpr(const DeclRefExpr *DRE) { - handleUse(DRE); - // For non-pointer/non-view types, a reference to the variable's storage - // is a borrow. We create a loan for it. - // For pointer/view types, we stick to the existing model for now and do - // not create an extra origin for the l-value expression itself. - - // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is - // not sufficient to model the different levels of indirection. The current - // single-origin model cannot distinguish between a loan to the variable's - // storage and a loan to what it points to. A multi-origin model would be - // required for this. - if (!isPointerType(DRE->getType())) { - if (const Loan *L = createLoan(DRE)) { - OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); - CurrentBlockFacts.push_back( - FactMgr.createFact<IssueFact>(L->ID, ExprOID)); - } - } - } - - void VisitCXXConstructExpr(const CXXConstructExpr *CCE) { - if (isGslPointerType(CCE->getType())) { - handleGSLPointerConstruction(CCE); - return; - } - } - - void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { - // Specifically for conversion operators, - // like `std::string_view p = std::string{};` - if (isGslPointerType(MCE->getType()) && - isa<CXXConversionDecl>(MCE->getCalleeDecl())) { - // The argument is the implicit object itself. - handleFunctionCall(MCE, MCE->getMethodDecl(), - {MCE->getImplicitObjectArgument()}, - /*IsGslConstruction=*/true); - } - if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { - // Construct the argument list, with the implicit 'this' object as the - // first argument. - llvm::SmallVector<const Expr *, 4> Args; - Args.push_back(MCE->getImplicitObjectArgument()); - Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); - - handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); - } - } - - void VisitCallExpr(const CallExpr *CE) { - handleFunctionCall(CE, CE->getDirectCallee(), - {CE->getArgs(), CE->getNumArgs()}); - } - - void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { - /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized - /// pointers can use the same type of loan. - FactMgr.getOriginMgr().getOrCreate(*N); - } - - void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { - if (!hasOrigin(ICE)) - return; - // An ImplicitCastExpr node itself gets an origin, which flows from the - // origin of its sub-expression (after stripping its own parens/casts). - killAndFlowOrigin(*ICE, *ICE->getSubExpr()); - } - - void VisitUnaryOperator(const UnaryOperator *UO) { - if (UO->getOpcode() == UO_AddrOf) { - const Expr *SubExpr = UO->getSubExpr(); - // Taking address of a pointer-type expression is not yet supported and - // will be supported in multi-origin model. - if (isPointerType(SubExpr->getType())) - return; - // The origin of an address-of expression (e.g., &x) is the origin of - // its sub-expression (x). This fact will cause the dataflow analysis - // to propagate any loans held by the sub-expression's origin to the - // origin of this UnaryOperator expression. - killAndFlowOrigin(*UO, *SubExpr); - } - } - - void VisitReturnStmt(const ReturnStmt *RS) { - if (const Expr *RetExpr = RS->getRetValue()) { - if (hasOrigin(RetExpr)) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); - CurrentBlockFacts.push_back( - FactMgr.createFact<ReturnOfOriginFact>(OID)); - } - } - } - - void VisitBinaryOperator(const BinaryOperator *BO) { - if (BO->isAssignmentOp()) - handleAssignment(BO->getLHS(), BO->getRHS()); - } - - void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { - // Assignment operators have special "kill-then-propagate" semantics - // and are handled separately. - if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { - handleAssignment(OCE->getArg(0), OCE->getArg(1)); - return; - } - handleFunctionCall(OCE, OCE->getDirectCallee(), - {OCE->getArgs(), OCE->getNumArgs()}, - /*IsGslConstruction=*/false); - } - - void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { - // Check if this is a test point marker. If so, we are done with this - // expression. - if (handleTestPoint(FCE)) - return; - if (isGslPointerType(FCE->getType())) - killAndFlowOrigin(*FCE, *FCE->getSubExpr()); - } - - void VisitInitListExpr(const InitListExpr *ILE) { - if (!hasOrigin(ILE)) - return; - // For list initialization with a single element, like `View{...}`, the - // origin of the list itself is the origin of its single element. - if (ILE->getNumInits() == 1) - killAndFlowOrigin(*ILE, *ILE->getInit(0)); - } - - void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { - if (!hasOrigin(MTE)) - return; - // A temporary object's origin is the same as the origin of the - // expression that initializes it. - killAndFlowOrigin(*MTE, *MTE->getSubExpr()); - } - - void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { - /// TODO: Also handle trivial destructors (e.g., for `int` - /// variables) which will never have a CFGAutomaticObjDtor node. - /// TODO: Handle loans to temporaries. - /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the - /// lifetime ends. - const VarDecl *DestructedVD = DtorOpt.getVarDecl(); - if (!DestructedVD) - return; - // Iterate through all loans to see if any expire. - /// TODO(opt): Do better than a linear search to find loans associated with - /// 'DestructedVD'. - for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { - const AccessPath &LoanPath = L.Path; - // Check if the loan is for a stack variable and if that variable - // is the one being destructed. - if (LoanPath.D == DestructedVD) - CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>( - L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); - } - } - -private: - static bool isGslPointerType(QualType QT) { - if (const auto *RD = QT->getAsCXXRecordDecl()) { - // We need to check the template definition for specializations. - if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) - return CTSD->getSpecializedTemplate() - ->getTemplatedDecl() - ->hasAttr<PointerAttr>(); - return RD->hasAttr<PointerAttr>(); - } - return false; - } - - static bool isPointerType(QualType QT) { - return QT->isPointerOrReferenceType() || isGslPointerType(QT); - } - // Check if a type has an origin. - static bool hasOrigin(const Expr *E) { - return E->isGLValue() || isPointerType(E->getType()); - } - - static bool hasOrigin(const VarDecl *VD) { - return isPointerType(VD->getType()); - } - - void handleGSLPointerConstruction(const CXXConstructExpr *CCE) { - assert(isGslPointerType(CCE->getType())); - if (CCE->getNumArgs() != 1) - return; - if (hasOrigin(CCE->getArg(0))) - killAndFlowOrigin(*CCE, *CCE->getArg(0)); - else - // This could be a new borrow. - handleFunctionCall(CCE, CCE->getConstructor(), - {CCE->getArgs(), CCE->getNumArgs()}, - /*IsGslConstruction=*/true); - } - - /// Checks if a call-like expression creates a borrow by passing a value to a - /// reference parameter, creating an IssueFact if it does. - /// \param IsGslConstruction True if this is a GSL construction where all - /// argument origins should flow to the returned origin. - void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, - ArrayRef<const Expr *> Args, - bool IsGslConstruction = false) { - // Ignore functions returning values with no origin. - if (!FD || !hasOrigin(Call)) - return; - auto IsArgLifetimeBound = [FD](unsigned I) -> bool { - const ParmVarDecl *PVD = nullptr; - if (const auto *Method = dyn_cast<CXXMethodDecl>(FD); - Method && Method->isInstance()) { - if (I == 0) - // For the 'this' argument, the attribute is on the method itself. - return implicitObjectParamIsLifetimeBound(Method); - if ((I - 1) < Method->getNumParams()) - // For explicit arguments, find the corresponding parameter - // declaration. - PVD = Method->getParamDecl(I - 1); - } else if (I < FD->getNumParams()) - // For free functions or static methods. - PVD = FD->getParamDecl(I); - return PVD ? PVD->hasAttr<clang::LifetimeBoundAttr>() : false; - }; - if (Args.empty()) - return; - bool killedSrc = false; - for (unsigned I = 0; I < Args.size(); ++I) - if (IsGslConstruction || IsArgLifetimeBound(I)) { - if (!killedSrc) { - killedSrc = true; - killAndFlowOrigin(*Call, *Args[I]); - } else - flowOrigin(*Call, *Args[I]); - } - } - - /// Creates a loan for the storage path of a given declaration reference. - /// This function should be called whenever a DeclRefExpr represents a borrow. - /// \param DRE The declaration reference expression that initiates the borrow. - /// \return The new Loan on success, nullptr otherwise. - const Loan *createLoan(const DeclRefExpr *DRE) { - if (const auto *VD = dyn_cast<ValueDecl>(DRE->getDecl())) { - AccessPath Path(VD); - // The loan is created at the location of the DeclRefExpr. - return &FactMgr.getLoanMgr().addLoan(Path, DRE); - } - return nullptr; - } - - template <typename Destination, typename Source> - void flowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>( - DestOID, SrcOID, /*KillDest=*/false)); - } - - template <typename Destination, typename Source> - void killAndFlowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back( - FactMgr.createFact<OriginFlowFact>(DestOID, SrcOID, /*KillDest=*/true)); - } - - /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. - /// If so, creates a `TestPointFact` and returns true. - bool handleTestPoint(const CXXFunctionalCastExpr *FCE) { - if (!FCE->getType()->isVoidType()) - return false; - - const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); - if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) { - llvm::StringRef LiteralValue = SL->getString(); - const std::string Prefix = "__lifetime_test_point_"; - - if (LiteralValue.starts_with(Prefix)) { - StringRef Annotation = LiteralValue.drop_front(Prefix.length()); - CurrentBlockFacts.push_back( - FactMgr.createFact<TestPointFact>(Annotation)); - return true; - } - } - return false; - } - - void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { - if (!hasOrigin(LHSExpr)) - return; - // Find the underlying variable declaration for the left-hand side. - if (const auto *DRE_LHS = - dyn_cast<DeclRefExpr>(LHSExpr->IgnoreParenImpCasts())) { - markUseAsWrite(DRE_LHS); - if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl())) { - // Kill the old loans of the destination origin and flow the new loans - // from the source origin. - killAndFlowOrigin(*VD_LHS, *RHSExpr); - } - } - } - - // A DeclRefExpr will be treated as a use of the referenced decl. It will be - // checked for use-after-free unless it is later marked as being written to - // (e.g. on the left-hand side of an assignment). - void handleUse(const DeclRefExpr *DRE) { - if (isPointerType(DRE->getType())) { - UseFact *UF = FactMgr.createFact<UseFact>(DRE); - CurrentBlockFacts.push_back(UF); - assert(!UseFacts.contains(DRE)); - UseFacts[DRE] = UF; - } - } - - void markUseAsWrite(const DeclRefExpr *DRE) { - if (!isPointerType(DRE->getType())) - return; - assert(UseFacts.contains(DRE)); - UseFacts[DRE]->markAsWritten(); - } - - FactManager &FactMgr; - AnalysisDeclContext &AC; - llvm::SmallVector<Fact *> CurrentBlockFacts; - // To distinguish between reads and writes for use-after-free checks, this map - // stores the `UseFact` for each `DeclRefExpr`. We initially identify all - // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use - // corresponding to the left-hand side is updated to be a "write", thereby - // exempting it from the check. - llvm::DenseMap<const DeclRefExpr *, UseFact *> UseFacts; -}; - -// ========================================================================= // -// Generic Dataflow Analysis -// ========================================================================= // - -enum class Direction { Forward, Backward }; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -/// A generic, policy-based driver for dataflow analyses. It combines -/// the dataflow runner and the transferer logic into a single class hierarchy. -/// -/// The derived class is expected to provide: -/// - A `Lattice` type. -/// - `StringRef getAnalysisName() const` -/// - `Lattice getInitialState();` The initial state of the analysis. -/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. -/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single -/// lifetime-relevant `Fact` transforms the lattice state. Only overloads -/// for facts relevant to the analysis need to be implemented. -/// -/// \tparam Derived The CRTP derived class that implements the specific -/// analysis. -/// \tparam LatticeType The dataflow lattice used by the analysis. -/// \tparam Dir The direction of the analysis (Forward or Backward). -/// TODO: Maybe use the dataflow framework! The framework might need changes -/// to support the current comparison done at block-entry. -template <typename Derived, typename LatticeType, Direction Dir> -class DataflowAnalysis { -public: - using Lattice = LatticeType; - using Base = DataflowAnalysis<Derived, Lattice, Dir>; - -private: - const CFG &Cfg; - AnalysisDeclContext &AC; - - /// The dataflow state before a basic block is processed. - llvm::DenseMap<const CFGBlock *, Lattice> InStates; - /// The dataflow state after a basic block is processed. - llvm::DenseMap<const CFGBlock *, Lattice> OutStates; - /// The dataflow state at a Program Point. - /// In a forward analysis, this is the state after the Fact at that point has - /// been applied, while in a backward analysis, it is the state before. - llvm::DenseMap<ProgramPoint, Lattice> PerPointStates; - - static constexpr bool isForward() { return Dir == Direction::Forward; } - -protected: - FactManager &AllFacts; - - explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC, - FactManager &F) - : Cfg(C), AC(AC), AllFacts(F) {} - -public: - void run() { - Derived &D = static_cast<Derived &>(*this); - llvm::TimeTraceScope Time(D.getAnalysisName()); - - using Worklist = - std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist, - BackwardDataflowWorklist>; - Worklist W(Cfg, AC); - - const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); - InStates[Start] = D.getInitialState(); - W.enqueueBlock(Start); - - while (const CFGBlock *B = W.dequeue()) { - Lattice StateIn = *getInState(B); - Lattice StateOut = transferBlock(B, StateIn); - OutStates[B] = StateOut; - for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { - if (!AdjacentB) - continue; - std::optional<Lattice> OldInState = getInState(AdjacentB); - Lattice NewInState = - !OldInState ? StateOut : D.join(*OldInState, StateOut); - // Enqueue the adjacent block if its in-state has changed or if we have - // never seen it. - if (!OldInState || NewInState != *OldInState) { - InStates[AdjacentB] = NewInState; - W.enqueueBlock(AdjacentB); - } - } - } - } - -protected: - Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } - - std::optional<Lattice> getInState(const CFGBlock *B) const { - auto It = InStates.find(B); - if (It == InStates.end()) - return std::nullopt; - return It->second; - } - - Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } - - void dump() const { - const Derived *D = static_cast<const Derived *>(this); - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << D->getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); - getOutState(&B).dump(llvm::dbgs()); - } - -private: - /// Computes the state at one end of a block by applying all its facts - /// sequentially to a given state from the other end. - Lattice transferBlock(const CFGBlock *Block, Lattice State) { - auto Facts = AllFacts.getFacts(Block); - if constexpr (isForward()) { - for (const Fact *F : Facts) { - State = transferFact(State, F); - PerPointStates[F] = State; - } - } else { - for (const Fact *F : llvm::reverse(Facts)) { - // In backward analysis, capture the state before applying the fact. - PerPointStates[F] = State; - State = transferFact(State, F); - } - } - return State; - } - - Lattice transferFact(Lattice In, const Fact *F) { - assert(F); - Derived *D = static_cast<Derived *>(this); - switch (F->getKind()) { - case Fact::Kind::Issue: - return D->transfer(In, *F->getAs<IssueFact>()); - case Fact::Kind::Expire: - return D->transfer(In, *F->getAs<ExpireFact>()); - case Fact::Kind::OriginFlow: - return D->transfer(In, *F->getAs<OriginFlowFact>()); - case Fact::Kind::ReturnOfOrigin: - return D->transfer(In, *F->getAs<ReturnOfOriginFact>()); - case Fact::Kind::Use: - return D->transfer(In, *F->getAs<UseFact>()); - case Fact::Kind::TestPoint: - return D->transfer(In, *F->getAs<TestPointFact>()); - } - llvm_unreachable("Unknown fact kind"); - } - -public: - Lattice transfer(Lattice In, const IssueFact &) { return In; } - Lattice transfer(Lattice In, const ExpireFact &) { return In; } - Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } - Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } - Lattice transfer(Lattice In, const UseFact &) { return In; } - Lattice transfer(Lattice In, const TestPointFact &) { return In; } -}; - -namespace utils { - -/// Computes the union of two ImmutableSets. -template <typename T> -static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, - llvm::ImmutableSet<T> B, - typename llvm::ImmutableSet<T>::Factory &F) { - if (A.getHeight() < B.getHeight()) - std::swap(A, B); - for (const T &E : B) - A = F.add(A, E); - return A; -} - -/// Describes the strategy for joining two `ImmutableMap` instances, primarily -/// differing in how they handle keys that are unique to one of the maps. -/// -/// A `Symmetric` join is universally correct, while an `Asymmetric` join -/// serves as a performance optimization. The latter is applicable only when the -/// join operation possesses a left identity element, allowing for a more -/// efficient, one-sided merge. -enum class JoinKind { - /// A symmetric join applies the `JoinValues` operation to keys unique to - /// either map, ensuring that values from both maps contribute to the result. - Symmetric, - /// An asymmetric join preserves keys unique to the first map as-is, while - /// applying the `JoinValues` operation only to keys unique to the second map. - Asymmetric, -}; - -/// Computes the key-wise union of two ImmutableMaps. -// TODO(opt): This key-wise join is a performance bottleneck. A more -// efficient merge could be implemented using a Patricia Trie or HAMT -// instead of the current AVL-tree-based ImmutableMap. -template <typename K, typename V, typename Joiner> -static llvm::ImmutableMap<K, V> -join(const llvm::ImmutableMap<K, V> &A, const llvm::ImmutableMap<K, V> &B, - typename llvm::ImmutableMap<K, V>::Factory &F, Joiner JoinValues, - JoinKind Kind) { - if (A.getHeight() < B.getHeight()) - return join(B, A, F, JoinValues, Kind); - - // For each element in B, join it with the corresponding element in A - // (or with an empty value if it doesn't exist in A). - llvm::ImmutableMap<K, V> Res = A; - for (const auto &Entry : B) { - const K &Key = Entry.first; - const V &ValB = Entry.second; - Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); - } - if (Kind == JoinKind::Symmetric) { - for (const auto &Entry : A) { - const K &Key = Entry.first; - const V &ValA = Entry.second; - if (!B.contains(Key)) - Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); - } - } - return Res; -} -} // namespace utils - -// ========================================================================= // -// Loan Propagation Analysis -// ========================================================================= // - -/// Represents the dataflow lattice for loan propagation. -/// -/// This lattice tracks which loans each origin may hold at a given program -/// point.The lattice has a finite height: An origin's loan set is bounded by -/// the total number of loans in the function. -/// TODO(opt): To reduce the lattice size, propagate origins of declarations, -/// not expressions, because expressions are not visible across blocks. -struct LoanPropagationLattice { - /// The map from an origin to the set of loans it contains. - OriginLoanMap Origins = OriginLoanMap(nullptr); - - explicit LoanPropagationLattice(const OriginLoanMap &S) : Origins(S) {} - LoanPropagationLattice() = default; - - bool operator==(const LoanPropagationLattice &Other) const { - return Origins == Other.Origins; - } - bool operator!=(const LoanPropagationLattice &Other) const { - return !(*this == Other); - } - - void dump(llvm::raw_ostream &OS) const { - OS << "LoanPropagationLattice State:\n"; - if (Origins.isEmpty()) - OS << " <empty>\n"; - for (const auto &Entry : Origins) { - if (Entry.second.isEmpty()) - OS << " Origin " << Entry.first << " contains no loans\n"; - for (const LoanID &LID : Entry.second) - OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; - } - } -}; - -/// The analysis that tracks which loans belong to which origins. -class LoanPropagationAnalysis - : public DataflowAnalysis<LoanPropagationAnalysis, LoanPropagationLattice, - Direction::Forward> { - OriginLoanMap::Factory &OriginLoanMapFactory; - LoanSet::Factory &LoanSetFactory; - -public: - LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory) - : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), - LoanSetFactory(LoanSetFactory) {} - - using Base::transfer; - - StringRef getAnalysisName() const { return "LoanPropagation"; } - - Lattice getInitialState() { return Lattice{}; } - - /// Merges two lattices by taking the union of loans for each origin. - // TODO(opt): Keep the state small by removing origins which become dead. - Lattice join(Lattice A, Lattice B) { - OriginLoanMap JoinedOrigins = utils::join( - A.Origins, B.Origins, OriginLoanMapFactory, - [&](const LoanSet *S1, const LoanSet *S2) { - assert((S1 || S2) && "unexpectedly merging 2 empty sets"); - if (!S1) - return *S2; - if (!S2) - return *S1; - return utils::join(*S1, *S2, LoanSetFactory); - }, - // Asymmetric join is a performance win. For origins present only on one - // branch, the loan set can be carried over as-is. - utils::JoinKind::Asymmetric); - return Lattice(JoinedOrigins); - } - - /// A new loan is issued to the origin. Old loans are erased. - Lattice transfer(Lattice In, const IssueFact &F) { - OriginID OID = F.getOriginID(); - LoanID LID = F.getLoanID(); - return LoanPropagationLattice(OriginLoanMapFactory.add( - In.Origins, OID, - LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); - } - - /// A flow from source to destination. If `KillDest` is true, this replaces - /// the destination's loans with the source's. Otherwise, the source's loans - /// are merged into the destination's. - Lattice transfer(Lattice In, const OriginFlowFact &F) { - OriginID DestOID = F.getDestOriginID(); - OriginID SrcOID = F.getSrcOriginID(); - - LoanSet DestLoans = - F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); - LoanSet SrcLoans = getLoans(In, SrcOID); - LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); - - return LoanPropagationLattice( - OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); - } - - LoanSet getLoans(OriginID OID, ProgramPoint P) const { - return getLoans(getState(P), OID); - } - -private: - LoanSet getLoans(Lattice L, OriginID OID) const { - if (auto *Loans = L.Origins.lookup(OID)) - return *Loans; - return LoanSetFactory.getEmptySet(); - } -}; - -// ========================================================================= // -// Live Origins Analysis -// ========================================================================= // -// -// A backward dataflow analysis that determines which origins are "live" at each -// program point. An origin is "live" at a program point if there's a potential -// future use of the pointer it represents. Liveness is "generated" by a read of -// origin's loan set (e.g., a `UseFact`) and is "killed" (i.e., it stops being -// live) when its loan set is overwritten (e.g. a OriginFlow killing the -// destination origin). -// -// This information is used for detecting use-after-free errors, as it allows us -// to check if a live origin holds a loan to an object that has already expired. -// ========================================================================= // - -/// Information about why an origin is live at a program point. -struct LivenessInfo { - /// The use that makes the origin live. If liveness is propagated from - /// multiple uses along different paths, this will point to the use appearing - /// earlier in the translation unit. - /// This is 'null' when the origin is not live. - const UseFact *CausingUseFact; - /// The kind of liveness of the origin. - /// `Must`: The origin is live on all control-flow paths from the current - /// point to the function's exit (i.e. the current point is dominated by a set - /// of uses). - /// `Maybe`: indicates it is live on some but not all paths. - /// - /// This determines the diagnostic's confidence level. - /// `Must`-be-alive at expiration implies a definite use-after-free, - /// while `Maybe`-be-alive suggests a potential one on some paths. - LivenessKind Kind; - - LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} - LivenessInfo(const UseFact *UF, LivenessKind K) - : CausingUseFact(UF), Kind(K) {} - - bool operator==(const LivenessInfo &Other) const { - return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; - } - bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } - - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddPointer(CausingUseFact); - IDBuilder.Add(Kind); - } -}; - -using LivenessMap = llvm::ImmutableMap<OriginID, LivenessInfo>; - -/// The dataflow lattice for origin liveness analysis. -/// It tracks which origins are live, why they're live (which UseFact), -/// and the confidence level of that liveness. -struct LivenessLattice { - LivenessMap LiveOrigins; - - LivenessLattice() : LiveOrigins(nullptr) {}; - - explicit LivenessLattice(LivenessMap L) : LiveOrigins(L) {} - - bool operator==(const LivenessLattice &Other) const { - return LiveOrigins == Other.LiveOrigins; - } - - bool operator!=(const LivenessLattice &Other) const { - return !(*this == Other); - } - - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { - if (LiveOrigins.isEmpty()) - OS << " <empty>\n"; - for (const auto &Entry : LiveOrigins) { - OriginID OID = Entry.first; - const LivenessInfo &Info = Entry.second; - OS << " "; - OM.dump(OID, OS); - OS << " is "; - switch (Info.Kind) { - case LivenessKind::Must: - OS << "definitely"; - break; - case LivenessKind::Maybe: - OS << "maybe"; - break; - case LivenessKind::Dead: - llvm_unreachable("liveness kind of live origins should not be dead."); - } - OS << " live at this point\n"; - } - } -}; - -/// The analysis that tracks which origins are live, with granular information -/// about the causing use fact and confidence level. This is a backward -/// analysis. -class LiveOriginAnalysis - : public DataflowAnalysis<LiveOriginAnalysis, LivenessLattice, - Direction::Backward> { - FactManager &FactMgr; - LivenessMap::Factory &Factory; - -public: - LiveOriginAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LivenessMap::Factory &SF) - : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} - using DataflowAnalysis<LiveOriginAnalysis, Lattice, - Direction::Backward>::transfer; - - StringRef getAnalysisName() const { return "LiveOrigins"; } - - Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } - - /// Merges two lattices by combining liveness information. - /// When the same origin has different confidence levels, we take the lower - /// one. - Lattice join(Lattice L1, Lattice L2) const { - LivenessMap Merged = L1.LiveOrigins; - // Take the earliest UseFact to make the join hermetic and commutative. - auto CombineUseFact = [](const UseFact &A, - const UseFact &B) -> const UseFact * { - return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A - : &B; - }; - auto CombineLivenessKind = [](LivenessKind K1, - LivenessKind K2) -> LivenessKind { - assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); - assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); - // Only return "Must" if both paths are "Must", otherwise Maybe. - if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) - return LivenessKind::Must; - return LivenessKind::Maybe; - }; - auto CombineLivenessInfo = [&](const LivenessInfo *L1, - const LivenessInfo *L2) -> LivenessInfo { - assert((L1 || L2) && "unexpectedly merging 2 empty sets"); - if (!L1) - return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); - if (!L2) - return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); - return LivenessInfo( - CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), - CombineLivenessKind(L1->Kind, L2->Kind)); - }; - return Lattice(utils::join( - L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, - // A symmetric join is required here. If an origin is live on one - // branch but not the other, its confidence must be demoted to `Maybe`. - utils::JoinKind::Symmetric)); - } - - /// A read operation makes the origin live with definite confidence, as it - /// dominates this program point. A write operation kills the liveness of - /// the origin since it overwrites the value. - Lattice transfer(Lattice In, const UseFact &UF) { - OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); - // Write kills liveness. - if (UF.isWritten()) - return Lattice(Factory.remove(In.LiveOrigins, OID)); - // Read makes origin live with definite confidence (dominates this point). - return Lattice(Factory.add(In.LiveOrigins, OID, - LivenessInfo(&UF, LivenessKind::Must))); - } - - /// Issuing a new loan to an origin kills its liveness. - Lattice transfer(Lattice In, const IssueFact &IF) { - return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); - } - - /// An OriginFlow kills the liveness of the destination origin if `KillDest` - /// is true. Otherwise, it propagates liveness from destination to source. - Lattice transfer(Lattice In, const OriginFlowFact &OF) { - if (!OF.getKillDest()) - return In; - return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); - } - - LivenessMap getLiveOrigins(ProgramPoint P) const { - return getState(P).LiveOrigins; - } - - // Dump liveness values on all test points in the program. - void dump(llvm::raw_ostream &OS, const LifetimeSafetyAnalysis &LSA) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - for (const auto &Entry : LSA.getTestPoints()) { - OS << "TestPoint: " << Entry.getKey() << "\n"; - getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); - } - } -}; - -// ========================================================================= // -// Lifetime checker and Error reporter -// ========================================================================= // - -/// Struct to store the complete context for a potential lifetime violation. -struct PendingWarning { - SourceLocation ExpiryLoc; // Where the loan expired. - const Expr *UseExpr; // Where the origin holding this loan was used. - Confidence ConfidenceLevel; -}; - -class LifetimeChecker { -private: - llvm::DenseMap<LoanID, PendingWarning> FinalWarningsMap; - LoanPropagationAnalysis &LoanPropagation; - LiveOriginAnalysis &LiveOrigins; - FactManager &FactMgr; - AnalysisDeclContext &ADC; - LifetimeSafetyReporter *Reporter; - -public: - LifetimeChecker(LoanPropagationAnalysis &LPA, LiveOriginAnalysis &LOA, - FactManager &FM, AnalysisDeclContext &ADC, - LifetimeSafetyReporter *Reporter) - : LoanPropagation(LPA), LiveOrigins(LOA), FactMgr(FM), ADC(ADC), - Reporter(Reporter) {} - - void run() { - llvm::TimeTraceScope TimeProfile("LifetimeChecker"); - for (const CFGBlock *B : *ADC.getAnalysis<PostOrderCFGView>()) - for (const Fact *F : FactMgr.getFacts(B)) - if (const auto *EF = F->getAs<ExpireFact>()) - checkExpiry(EF); - issuePendingWarnings(); - } - - /// Checks for use-after-free errors when a loan expires. - /// - /// This method examines all live origins at the expiry point and determines - /// if any of them hold the expiring loan. If so, it creates a pending - /// warning with the appropriate confidence level based on the liveness - /// information. The confidence reflects whether the origin is definitely - /// or maybe live at this point. - /// - /// Note: This implementation considers only the confidence of origin - /// liveness. Future enhancements could also consider the confidence of loan - /// propagation (e.g., a loan may only be held on some execution paths). - void checkExpiry(const ExpireFact *EF) { - LoanID ExpiredLoan = EF->getLoanID(); - LivenessMap Origins = LiveOrigins.getLiveOrigins(EF); - Confidence CurConfidence = Confidence::None; - const UseFact *BadUse = nullptr; - for (auto &[OID, LiveInfo] : Origins) { - LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); - if (!HeldLoans.contains(ExpiredLoan)) - continue; - // Loan is defaulted. - Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); - if (CurConfidence < NewConfidence) { - CurConfidence = NewConfidence; - BadUse = LiveInfo.CausingUseFact; - } - } - if (!BadUse) - return; - // We have a use-after-free. - Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; - if (LastConf >= CurConfidence) - return; - FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), - /*UseExpr=*/BadUse->getUseExpr(), - /*ConfidenceLevel=*/CurConfidence}; - } - - static Confidence livenessKindToConfidence(LivenessKind K) { - switch (K) { - case LivenessKind::Must: - return Confidence::Definite; - case LivenessKind::Maybe: - return Confidence::Maybe; - case LivenessKind::Dead: - return Confidence::None; - } - llvm_unreachable("unknown liveness kind"); - } - - void issuePendingWarnings() { - if (!Reporter) - return; - for (const auto &[LID, Warning] : FinalWarningsMap) { - const Loan &L = FactMgr.getLoanMgr().getLoan(LID); - const Expr *IssueExpr = L.IssueExpr; - Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, - Warning.ExpiryLoc, Warning.ConfidenceLevel); - } - } -}; - -// ========================================================================= // -// LifetimeSafetyAnalysis Class Implementation -// ========================================================================= // - -/// An object to hold the factories for immutable collections, ensuring -/// that all created states share the same underlying memory management. -struct LifetimeFactory { - llvm::BumpPtrAllocator Allocator; - OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false}; - LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false}; - LivenessMap::Factory LivenessMapFactory{Allocator, /*canonicalize=*/false}; -}; - -// We need this here for unique_ptr with forward declared class. -LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; - -LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) - : AC(AC), Reporter(Reporter), Factory(std::make_unique<LifetimeFactory>()), - FactMgr(std::make_unique<FactManager>()) {} - -void LifetimeSafetyAnalysis::run() { - llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); - - const CFG &Cfg = *AC.getCFG(); - DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), - /*ShowColors=*/true)); - - FactGenerator FactGen(*FactMgr, AC); - FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC)); - - /// TODO(opt): Consider optimizing individual blocks before running the - /// dataflow analysis. - /// 1. Expression Origins: These are assigned once and read at most once, - /// forming simple chains. These chains can be compressed into a single - /// assignment. - /// 2. Block-Local Loans: Origins of expressions are never read by other - /// blocks; only Decls are visible. Therefore, loans in a block that - /// never reach an Origin associated with a Decl can be safely dropped by - /// the analysis. - /// 3. Collapse ExpireFacts belonging to same source location into a single - /// Fact. - LoanPropagation = std::make_unique<LoanPropagationAnalysis>( - Cfg, AC, *FactMgr, Factory->OriginMapFactory, Factory->LoanSetFactory); - LoanPropagation->run(); - - LiveOrigins = std::make_unique<LiveOriginAnalysis>( - Cfg, AC, *FactMgr, Factory->LivenessMapFactory); - LiveOrigins->run(); - DEBUG_WITH_TYPE("LiveOrigins", LiveOrigins->dump(llvm::dbgs(), *this)); - - LifetimeChecker Checker(*LoanPropagation, *LiveOrigins, *FactMgr, AC, - Reporter); - Checker.run(); -} - -LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, - ProgramPoint PP) const { - assert(LoanPropagation && "Analysis has not been run."); - return LoanPropagation->getLoans(OID, PP); -} - -std::optional<OriginID> -LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { - assert(FactMgr && "FactManager not initialized"); - // This assumes the OriginManager's `get` can find an existing origin. - // We might need a `find` method on OriginManager to avoid `getOrCreate` logic - // in a const-query context if that becomes an issue. - return FactMgr->getOriginMgr().get(*D); -} - -std::vector<LoanID> -LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const { - assert(FactMgr && "FactManager not initialized"); - std::vector<LoanID> Result; - for (const Loan &L : FactMgr->getLoanMgr().getLoans()) - if (L.Path.D == VD) - Result.push_back(L.ID); - return Result; -} - -std::vector<std::pair<OriginID, LivenessKind>> -LifetimeSafetyAnalysis::getLiveOriginsAtPoint(ProgramPoint PP) const { - assert(LiveOrigins && "LiveOriginAnalysis has not been run."); - std::vector<std::pair<OriginID, LivenessKind>> Result; - for (auto &[OID, Info] : LiveOrigins->getLiveOrigins(PP)) - Result.push_back({OID, Info.Kind}); - return Result; -} - -llvm::StringMap<ProgramPoint> LifetimeSafetyAnalysis::getTestPoints() const { - assert(FactMgr && "FactManager not initialized"); - llvm::StringMap<ProgramPoint> AnnotationToPointMap; - for (const CFGBlock *Block : *AC.getCFG()) { - for (const Fact *F : FactMgr->getFacts(Block)) { - if (const auto *TPF = F->getAs<TestPointFact>()) { - StringRef PointName = TPF->getAnnotation(); - assert(AnnotationToPointMap.find(PointName) == - AnnotationToPointMap.end() && - "more than one test points with the same name"); - AnnotationToPointMap[PointName] = F; - } - } - } - return AnnotationToPointMap; -} -} // namespace internal - -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) { - internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); - Analysis.run(); -} -} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt new file mode 100644 index 0000000..5874e84 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt @@ -0,0 +1,17 @@ +add_clang_library(clangAnalysisLifetimeSafety + Checker.cpp + Facts.cpp + FactsGenerator.cpp + LifetimeAnnotations.cpp + LifetimeSafety.cpp + LiveOrigins.cpp + Loans.cpp + LoanPropagation.cpp + Origins.cpp + + LINK_LIBS + clangAST + clangAnalysis + clangBasic + ) + diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp new file mode 100644 index 0000000..c443c3a --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Checker.cpp @@ -0,0 +1,130 @@ +//===- Checker.cpp - C++ Lifetime Safety Checker ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LifetimeChecker, which detects use-after-free +// errors by checking if live origins hold loans that have expired. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { + +static Confidence livenessKindToConfidence(LivenessKind K) { + switch (K) { + case LivenessKind::Must: + return Confidence::Definite; + case LivenessKind::Maybe: + return Confidence::Maybe; + case LivenessKind::Dead: + return Confidence::None; + } + llvm_unreachable("unknown liveness kind"); +} + +namespace { + +/// Struct to store the complete context for a potential lifetime violation. +struct PendingWarning { + SourceLocation ExpiryLoc; // Where the loan expired. + const Expr *UseExpr; // Where the origin holding this loan was used. + Confidence ConfidenceLevel; +}; + +class LifetimeChecker { +private: + llvm::DenseMap<LoanID, PendingWarning> FinalWarningsMap; + const LoanPropagationAnalysis &LoanPropagation; + const LiveOriginsAnalysis &LiveOrigins; + const FactManager &FactMgr; + LifetimeSafetyReporter *Reporter; + +public: + LifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, + const LiveOriginsAnalysis &LiveOrigins, const FactManager &FM, + AnalysisDeclContext &ADC, LifetimeSafetyReporter *Reporter) + : LoanPropagation(LoanPropagation), LiveOrigins(LiveOrigins), FactMgr(FM), + Reporter(Reporter) { + for (const CFGBlock *B : *ADC.getAnalysis<PostOrderCFGView>()) + for (const Fact *F : FactMgr.getFacts(B)) + if (const auto *EF = F->getAs<ExpireFact>()) + checkExpiry(EF); + issuePendingWarnings(); + } + + /// Checks for use-after-free errors when a loan expires. + /// + /// This method examines all live origins at the expiry point and determines + /// if any of them hold the expiring loan. If so, it creates a pending + /// warning with the appropriate confidence level based on the liveness + /// information. The confidence reflects whether the origin is definitely + /// or maybe live at this point. + /// + /// Note: This implementation considers only the confidence of origin + /// liveness. Future enhancements could also consider the confidence of loan + /// propagation (e.g., a loan may only be held on some execution paths). + void checkExpiry(const ExpireFact *EF) { + LoanID ExpiredLoan = EF->getLoanID(); + LivenessMap Origins = LiveOrigins.getLiveOriginsAt(EF); + Confidence CurConfidence = Confidence::None; + const UseFact *BadUse = nullptr; + for (auto &[OID, LiveInfo] : Origins) { + LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); + if (!HeldLoans.contains(ExpiredLoan)) + continue; + // Loan is defaulted. + Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); + if (CurConfidence < NewConfidence) { + CurConfidence = NewConfidence; + BadUse = LiveInfo.CausingUseFact; + } + } + if (!BadUse) + return; + // We have a use-after-free. + Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; + if (LastConf >= CurConfidence) + return; + FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), + /*UseExpr=*/BadUse->getUseExpr(), + /*ConfidenceLevel=*/CurConfidence}; + } + + void issuePendingWarnings() { + if (!Reporter) + return; + for (const auto &[LID, Warning] : FinalWarningsMap) { + const Loan &L = FactMgr.getLoanMgr().getLoan(LID); + const Expr *IssueExpr = L.IssueExpr; + Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, + Warning.ExpiryLoc, Warning.ConfidenceLevel); + } + } +}; +} // namespace + +void runLifetimeChecker(const LoanPropagationAnalysis &LP, + const LiveOriginsAnalysis &LO, + const FactManager &FactMgr, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter) { + llvm::TimeTraceScope TimeProfile("LifetimeChecker"); + LifetimeChecker Checker(LP, LO, FactMgr, ADC, Reporter); +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h new file mode 100644 index 0000000..2f7bcb6 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Dataflow.h @@ -0,0 +1,188 @@ +//===- Dataflow.h - Generic Dataflow Analysis Framework --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a generic, policy-based driver for dataflow analyses. +// It provides a flexible framework that combines the dataflow runner and +// transfer functions, allowing derived classes to implement specific analyses +// by defining their lattice, join, and transfer functions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include <optional> + +namespace clang::lifetimes::internal { + +enum class Direction { Forward, Backward }; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// A generic, policy-based driver for dataflow analyses. It combines +/// the dataflow runner and the transferer logic into a single class hierarchy. +/// +/// The derived class is expected to provide: +/// - A `Lattice` type. +/// - `StringRef getAnalysisName() const` +/// - `Lattice getInitialState();` The initial state of the analysis. +/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. +/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single +/// lifetime-relevant `Fact` transforms the lattice state. Only overloads +/// for facts relevant to the analysis need to be implemented. +/// +/// \tparam Derived The CRTP derived class that implements the specific +/// analysis. +/// \tparam LatticeType The dataflow lattice used by the analysis. +/// \tparam Dir The direction of the analysis (Forward or Backward). +/// TODO: Maybe use the dataflow framework! The framework might need changes +/// to support the current comparison done at block-entry. +template <typename Derived, typename LatticeType, Direction Dir> +class DataflowAnalysis { +public: + using Lattice = LatticeType; + using Base = DataflowAnalysis<Derived, Lattice, Dir>; + +private: + const CFG &Cfg; + AnalysisDeclContext &AC; + + /// The dataflow state before a basic block is processed. + llvm::DenseMap<const CFGBlock *, Lattice> InStates; + /// The dataflow state after a basic block is processed. + llvm::DenseMap<const CFGBlock *, Lattice> OutStates; + /// The dataflow state at a Program Point. + /// In a forward analysis, this is the state after the Fact at that point has + /// been applied, while in a backward analysis, it is the state before. + llvm::DenseMap<ProgramPoint, Lattice> PerPointStates; + + static constexpr bool isForward() { return Dir == Direction::Forward; } + +protected: + FactManager &FactMgr; + + explicit DataflowAnalysis(const CFG &Cfg, AnalysisDeclContext &AC, + FactManager &FactMgr) + : Cfg(Cfg), AC(AC), FactMgr(FactMgr) {} + +public: + void run() { + Derived &D = static_cast<Derived &>(*this); + llvm::TimeTraceScope Time(D.getAnalysisName()); + + using Worklist = + std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist, + BackwardDataflowWorklist>; + Worklist W(Cfg, AC); + + const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); + InStates[Start] = D.getInitialState(); + W.enqueueBlock(Start); + + while (const CFGBlock *B = W.dequeue()) { + Lattice StateIn = *getInState(B); + Lattice StateOut = transferBlock(B, StateIn); + OutStates[B] = StateOut; + for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { + if (!AdjacentB) + continue; + std::optional<Lattice> OldInState = getInState(AdjacentB); + Lattice NewInState = + !OldInState ? StateOut : D.join(*OldInState, StateOut); + // Enqueue the adjacent block if its in-state has changed or if we have + // never seen it. + if (!OldInState || NewInState != *OldInState) { + InStates[AdjacentB] = NewInState; + W.enqueueBlock(AdjacentB); + } + } + } + } + +protected: + Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } + + std::optional<Lattice> getInState(const CFGBlock *B) const { + auto It = InStates.find(B); + if (It == InStates.end()) + return std::nullopt; + return It->second; + } + + Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } + + void dump() const { + const Derived *D = static_cast<const Derived *>(this); + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << D->getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); + getOutState(&B).dump(llvm::dbgs()); + } + +private: + /// Computes the state at one end of a block by applying all its facts + /// sequentially to a given state from the other end. + Lattice transferBlock(const CFGBlock *Block, Lattice State) { + auto Facts = FactMgr.getFacts(Block); + if constexpr (isForward()) { + for (const Fact *F : Facts) { + State = transferFact(State, F); + PerPointStates[F] = State; + } + } else { + for (const Fact *F : llvm::reverse(Facts)) { + // In backward analysis, capture the state before applying the fact. + PerPointStates[F] = State; + State = transferFact(State, F); + } + } + return State; + } + + Lattice transferFact(Lattice In, const Fact *F) { + assert(F); + Derived *D = static_cast<Derived *>(this); + switch (F->getKind()) { + case Fact::Kind::Issue: + return D->transfer(In, *F->getAs<IssueFact>()); + case Fact::Kind::Expire: + return D->transfer(In, *F->getAs<ExpireFact>()); + case Fact::Kind::OriginFlow: + return D->transfer(In, *F->getAs<OriginFlowFact>()); + case Fact::Kind::ReturnOfOrigin: + return D->transfer(In, *F->getAs<ReturnOfOriginFact>()); + case Fact::Kind::Use: + return D->transfer(In, *F->getAs<UseFact>()); + case Fact::Kind::TestPoint: + return D->transfer(In, *F->getAs<TestPointFact>()); + } + llvm_unreachable("Unknown fact kind"); + } + +public: + Lattice transfer(Lattice In, const IssueFact &) { return In; } + Lattice transfer(Lattice In, const ExpireFact &) { return In; } + Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } + Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const UseFact &) { return In; } + Lattice transfer(Lattice In, const TestPointFact &) { return In; } +}; +} // namespace clang::lifetimes::internal +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp new file mode 100644 index 0000000..1aea64f --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -0,0 +1,102 @@ +//===- Facts.cpp - Lifetime Analysis Facts Implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" + +namespace clang::lifetimes::internal { + +void Fact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "Fact (Kind: " << static_cast<int>(K) << ")\n"; +} + +void IssueFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const { + OS << "Issue ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ", ToOrigin: "; + OM.dump(getOriginID(), OS); + OS << ")\n"; +} + +void ExpireFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const { + OS << "Expire ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ")\n"; +} + +void OriginFlowFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "OriginFlow (Dest: "; + OM.dump(getDestOriginID(), OS); + OS << ", Src: "; + OM.dump(getSrcOriginID(), OS); + OS << (getKillDest() ? "" : ", Merge"); + OS << ")\n"; +} + +void ReturnOfOriginFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "ReturnOfOrigin ("; + OM.dump(getReturnedOriginID(), OS); + OS << ")\n"; +} + +void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "Use ("; + OM.dump(getUsedOrigin(OM), OS); + OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; +} + +void TestPointFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; +} + +llvm::StringMap<ProgramPoint> FactManager::getTestPoints() const { + llvm::StringMap<ProgramPoint> AnnotationToPointMap; + for (const CFGBlock *Block : BlockToFactsMap.keys()) { + for (const Fact *F : getFacts(Block)) { + if (const auto *TPF = F->getAs<TestPointFact>()) { + StringRef PointName = TPF->getAnnotation(); + assert(AnnotationToPointMap.find(PointName) == + AnnotationToPointMap.end() && + "more than one test points with the same name"); + AnnotationToPointMap[PointName] = F; + } + } + } + return AnnotationToPointMap; +} + +void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) + if (const auto *ND = dyn_cast<NamedDecl>(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + // Print blocks in the order as they appear in code for a stable ordering. + for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); + } + } + llvm::dbgs() << " End of Block\n"; + } +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp new file mode 100644 index 0000000..485308f --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -0,0 +1,348 @@ +//===- FactsGenerator.cpp - Lifetime Facts Generation -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { + +static bool isGslPointerType(QualType QT) { + if (const auto *RD = QT->getAsCXXRecordDecl()) { + // We need to check the template definition for specializations. + if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) + return CTSD->getSpecializedTemplate() + ->getTemplatedDecl() + ->hasAttr<PointerAttr>(); + return RD->hasAttr<PointerAttr>(); + } + return false; +} + +static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType() || isGslPointerType(QT); +} +// Check if a type has an origin. +static bool hasOrigin(const Expr *E) { + return E->isGLValue() || isPointerType(E->getType()); +} + +static bool hasOrigin(const VarDecl *VD) { + return isPointerType(VD->getType()); +} + +/// Creates a loan for the storage path of a given declaration reference. +/// This function should be called whenever a DeclRefExpr represents a borrow. +/// \param DRE The declaration reference expression that initiates the borrow. +/// \return The new Loan on success, nullptr otherwise. +static const Loan *createLoan(FactManager &FactMgr, const DeclRefExpr *DRE) { + if (const auto *VD = dyn_cast<ValueDecl>(DRE->getDecl())) { + AccessPath Path(VD); + // The loan is created at the location of the DeclRefExpr. + return &FactMgr.getLoanMgr().addLoan(Path, DRE); + } + return nullptr; +} + +void FactsGenerator::run() { + llvm::TimeTraceScope TimeProfile("FactGenerator"); + // Iterate through the CFG blocks in reverse post-order to ensure that + // initializations and destructions are processed in the correct sequence. + for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) { + CurrentBlockFacts.clear(); + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) + Visit(CS->getStmt()); + else if (std::optional<CFGAutomaticObjDtor> DtorOpt = + Element.getAs<CFGAutomaticObjDtor>()) + handleDestructor(*DtorOpt); + } + FactMgr.addBlockFacts(Block, CurrentBlockFacts); + } +} + +void FactsGenerator::VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast<VarDecl>(D)) + if (hasOrigin(VD)) + if (const Expr *InitExpr = VD->getInit()) + killAndFlowOrigin(*VD, *InitExpr); +} + +void FactsGenerator::VisitDeclRefExpr(const DeclRefExpr *DRE) { + handleUse(DRE); + // For non-pointer/non-view types, a reference to the variable's storage + // is a borrow. We create a loan for it. + // For pointer/view types, we stick to the existing model for now and do + // not create an extra origin for the l-value expression itself. + + // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is + // not sufficient to model the different levels of indirection. The current + // single-origin model cannot distinguish between a loan to the variable's + // storage and a loan to what it points to. A multi-origin model would be + // required for this. + if (!isPointerType(DRE->getType())) { + if (const Loan *L = createLoan(FactMgr, DRE)) { + OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); + CurrentBlockFacts.push_back( + FactMgr.createFact<IssueFact>(L->ID, ExprOID)); + } + } +} + +void FactsGenerator::VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (isGslPointerType(CCE->getType())) { + handleGSLPointerConstruction(CCE); + return; + } +} + +void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Specifically for conversion operators, + // like `std::string_view p = std::string{};` + if (isGslPointerType(MCE->getType()) && + isa<CXXConversionDecl>(MCE->getCalleeDecl())) { + // The argument is the implicit object itself. + handleFunctionCall(MCE, MCE->getMethodDecl(), + {MCE->getImplicitObjectArgument()}, + /*IsGslConstruction=*/true); + } + if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { + // Construct the argument list, with the implicit 'this' object as the + // first argument. + llvm::SmallVector<const Expr *, 4> Args; + Args.push_back(MCE->getImplicitObjectArgument()); + Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); + + handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); + } +} + +void FactsGenerator::VisitCallExpr(const CallExpr *CE) { + handleFunctionCall(CE, CE->getDirectCallee(), + {CE->getArgs(), CE->getNumArgs()}); +} + +void FactsGenerator::VisitCXXNullPtrLiteralExpr( + const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); +} + +void FactsGenerator::VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE)) + return; + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + killAndFlowOrigin(*ICE, *ICE->getSubExpr()); +} + +void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + // Taking address of a pointer-type expression is not yet supported and + // will be supported in multi-origin model. + if (isPointerType(SubExpr->getType())) + return; + // The origin of an address-of expression (e.g., &x) is the origin of + // its sub-expression (x). This fact will cause the dataflow analysis + // to propagate any loans held by the sub-expression's origin to the + // origin of this UnaryOperator expression. + killAndFlowOrigin(*UO, *SubExpr); + } +} + +void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr)) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + CurrentBlockFacts.push_back(FactMgr.createFact<ReturnOfOriginFact>(OID)); + } + } +} + +void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) + handleAssignment(BO->getLHS(), BO->getRHS()); +} + +void FactsGenerator::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + // Assignment operators have special "kill-then-propagate" semantics + // and are handled separately. + if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { + handleAssignment(OCE->getArg(0), OCE->getArg(1)); + return; + } + handleFunctionCall(OCE, OCE->getDirectCallee(), + {OCE->getArgs(), OCE->getNumArgs()}, + /*IsGslConstruction=*/false); +} + +void FactsGenerator::VisitCXXFunctionalCastExpr( + const CXXFunctionalCastExpr *FCE) { + // Check if this is a test point marker. If so, we are done with this + // expression. + if (handleTestPoint(FCE)) + return; + if (isGslPointerType(FCE->getType())) + killAndFlowOrigin(*FCE, *FCE->getSubExpr()); +} + +void FactsGenerator::VisitInitListExpr(const InitListExpr *ILE) { + if (!hasOrigin(ILE)) + return; + // For list initialization with a single element, like `View{...}`, the + // origin of the list itself is the origin of its single element. + if (ILE->getNumInits() == 1) + killAndFlowOrigin(*ILE, *ILE->getInit(0)); +} + +void FactsGenerator::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *MTE) { + if (!hasOrigin(MTE)) + return; + // A temporary object's origin is the same as the origin of the + // expression that initializes it. + killAndFlowOrigin(*MTE, *MTE->getSubExpr()); +} + +void FactsGenerator::handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + /// variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the + /// lifetime ends. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + /// TODO(opt): Do better than a linear search to find loans associated with + /// 'DestructedVD'. + for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = L.Path; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.D == DestructedVD) + CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>( + L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); + } +} + +void FactsGenerator::handleGSLPointerConstruction(const CXXConstructExpr *CCE) { + assert(isGslPointerType(CCE->getType())); + if (CCE->getNumArgs() != 1) + return; + if (hasOrigin(CCE->getArg(0))) + killAndFlowOrigin(*CCE, *CCE->getArg(0)); + else + // This could be a new borrow. + handleFunctionCall(CCE, CCE->getConstructor(), + {CCE->getArgs(), CCE->getNumArgs()}, + /*IsGslConstruction=*/true); +} + +/// Checks if a call-like expression creates a borrow by passing a value to a +/// reference parameter, creating an IssueFact if it does. +/// \param IsGslConstruction True if this is a GSL construction where all +/// argument origins should flow to the returned origin. +void FactsGenerator::handleFunctionCall(const Expr *Call, + const FunctionDecl *FD, + ArrayRef<const Expr *> Args, + bool IsGslConstruction) { + // Ignore functions returning values with no origin. + if (!FD || !hasOrigin(Call)) + return; + auto IsArgLifetimeBound = [FD](unsigned I) -> bool { + const ParmVarDecl *PVD = nullptr; + if (const auto *Method = dyn_cast<CXXMethodDecl>(FD); + Method && Method->isInstance()) { + if (I == 0) + // For the 'this' argument, the attribute is on the method itself. + return implicitObjectParamIsLifetimeBound(Method); + if ((I - 1) < Method->getNumParams()) + // For explicit arguments, find the corresponding parameter + // declaration. + PVD = Method->getParamDecl(I - 1); + } else if (I < FD->getNumParams()) + // For free functions or static methods. + PVD = FD->getParamDecl(I); + return PVD ? PVD->hasAttr<clang::LifetimeBoundAttr>() : false; + }; + if (Args.empty()) + return; + bool killedSrc = false; + for (unsigned I = 0; I < Args.size(); ++I) + if (IsGslConstruction || IsArgLifetimeBound(I)) { + if (!killedSrc) { + killedSrc = true; + killAndFlowOrigin(*Call, *Args[I]); + } else + flowOrigin(*Call, *Args[I]); + } +} + +/// Checks if the expression is a `void("__lifetime_test_point_...")` cast. +/// If so, creates a `TestPointFact` and returns true. +bool FactsGenerator::handleTestPoint(const CXXFunctionalCastExpr *FCE) { + if (!FCE->getType()->isVoidType()) + return false; + + const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) { + llvm::StringRef LiteralValue = SL->getString(); + const std::string Prefix = "__lifetime_test_point_"; + + if (LiteralValue.starts_with(Prefix)) { + StringRef Annotation = LiteralValue.drop_front(Prefix.length()); + CurrentBlockFacts.push_back( + FactMgr.createFact<TestPointFact>(Annotation)); + return true; + } + } + return false; +} + +void FactsGenerator::handleAssignment(const Expr *LHSExpr, + const Expr *RHSExpr) { + if (!hasOrigin(LHSExpr)) + return; + // Find the underlying variable declaration for the left-hand side. + if (const auto *DRE_LHS = + dyn_cast<DeclRefExpr>(LHSExpr->IgnoreParenImpCasts())) { + markUseAsWrite(DRE_LHS); + if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl())) { + // Kill the old loans of the destination origin and flow the new loans + // from the source origin. + killAndFlowOrigin(*VD_LHS, *RHSExpr); + } + } +} + +// A DeclRefExpr will be treated as a use of the referenced decl. It will be +// checked for use-after-free unless it is later marked as being written to +// (e.g. on the left-hand side of an assignment). +void FactsGenerator::handleUse(const DeclRefExpr *DRE) { + if (isPointerType(DRE->getType())) { + UseFact *UF = FactMgr.createFact<UseFact>(DRE); + CurrentBlockFacts.push_back(UF); + assert(!UseFacts.contains(DRE)); + UseFacts[DRE] = UF; + } +} + +void FactsGenerator::markUseAsWrite(const DeclRefExpr *DRE) { + if (!isPointerType(DRE->getType())) + return; + assert(UseFacts.contains(DRE)); + UseFacts[DRE]->markAsWritten(); +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeAnnotations.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp index e791224..ad61a42 100644 --- a/clang/lib/Analysis/LifetimeAnnotations.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" @@ -13,8 +13,7 @@ #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" -namespace clang { -namespace lifetimes { +namespace clang::lifetimes { const FunctionDecl * getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { @@ -71,5 +70,4 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { return isNormalAssignmentOperator(FD); } -} // namespace lifetimes -} // namespace clang +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp new file mode 100644 index 0000000..00c7ed90 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp @@ -0,0 +1,77 @@ +//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the main LifetimeSafetyAnalysis class, which coordinates +// the various components (fact generation, loan propagation, live origins +// analysis, and checking) to detect lifetime safety violations in C++ code. +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include <memory> + +namespace clang::lifetimes { +namespace internal { + +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) + : AC(AC), Reporter(Reporter) {} + +void LifetimeSafetyAnalysis::run() { + llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + + const CFG &Cfg = *AC.getCFG(); + DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), + /*ShowColors=*/true)); + + FactsGenerator FactGen(FactMgr, AC); + FactGen.run(); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); + + /// TODO(opt): Consider optimizing individual blocks before running the + /// dataflow analysis. + /// 1. Expression Origins: These are assigned once and read at most once, + /// forming simple chains. These chains can be compressed into a single + /// assignment. + /// 2. Block-Local Loans: Origins of expressions are never read by other + /// blocks; only Decls are visible. Therefore, loans in a block that + /// never reach an Origin associated with a Decl can be safely dropped by + /// the analysis. + /// 3. Collapse ExpireFacts belonging to same source location into a single + /// Fact. + LoanPropagation = std::make_unique<LoanPropagationAnalysis>( + Cfg, AC, FactMgr, Factory.OriginMapFactory, Factory.LoanSetFactory); + + LiveOrigins = std::make_unique<LiveOriginsAnalysis>( + Cfg, AC, FactMgr, Factory.LivenessMapFactory); + DEBUG_WITH_TYPE("LiveOrigins", + LiveOrigins->dump(llvm::dbgs(), FactMgr.getTestPoints())); + + runLifetimeChecker(*LoanPropagation, *LiveOrigins, FactMgr, AC, Reporter); +} +} // namespace internal + +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) { + internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); + Analysis.run(); +} +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp new file mode 100644 index 0000000..cddb3f3c --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -0,0 +1,180 @@ +//===- LiveOrigins.cpp - Live Origins Analysis -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "Dataflow.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang::lifetimes::internal { +namespace { + +/// The dataflow lattice for origin liveness analysis. +/// It tracks which origins are live, why they're live (which UseFact), +/// and the confidence level of that liveness. +struct Lattice { + LivenessMap LiveOrigins; + + Lattice() : LiveOrigins(nullptr) {}; + + explicit Lattice(LivenessMap L) : LiveOrigins(L) {} + + bool operator==(const Lattice &Other) const { + return LiveOrigins == Other.LiveOrigins; + } + + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { + if (LiveOrigins.isEmpty()) + OS << " <empty>\n"; + for (const auto &Entry : LiveOrigins) { + OriginID OID = Entry.first; + const LivenessInfo &Info = Entry.second; + OS << " "; + OM.dump(OID, OS); + OS << " is "; + switch (Info.Kind) { + case LivenessKind::Must: + OS << "definitely"; + break; + case LivenessKind::Maybe: + OS << "maybe"; + break; + case LivenessKind::Dead: + llvm_unreachable("liveness kind of live origins should not be dead."); + } + OS << " live at this point\n"; + } + } +}; + +/// The analysis that tracks which origins are live, with granular information +/// about the causing use fact and confidence level. This is a backward +/// analysis. +class AnalysisImpl + : public DataflowAnalysis<AnalysisImpl, Lattice, Direction::Backward> { + +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF) + : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} + using DataflowAnalysis<AnalysisImpl, Lattice, Direction::Backward>::transfer; + + StringRef getAnalysisName() const { return "LiveOrigins"; } + + Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } + + /// Merges two lattices by combining liveness information. + /// When the same origin has different confidence levels, we take the lower + /// one. + Lattice join(Lattice L1, Lattice L2) const { + LivenessMap Merged = L1.LiveOrigins; + // Take the earliest UseFact to make the join hermetic and commutative. + auto CombineUseFact = [](const UseFact &A, + const UseFact &B) -> const UseFact * { + return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A + : &B; + }; + auto CombineLivenessKind = [](LivenessKind K1, + LivenessKind K2) -> LivenessKind { + assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); + assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); + // Only return "Must" if both paths are "Must", otherwise Maybe. + if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) + return LivenessKind::Must; + return LivenessKind::Maybe; + }; + auto CombineLivenessInfo = [&](const LivenessInfo *L1, + const LivenessInfo *L2) -> LivenessInfo { + assert((L1 || L2) && "unexpectedly merging 2 empty sets"); + if (!L1) + return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); + if (!L2) + return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); + return LivenessInfo( + CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), + CombineLivenessKind(L1->Kind, L2->Kind)); + }; + return Lattice(utils::join( + L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, + // A symmetric join is required here. If an origin is live on one + // branch but not the other, its confidence must be demoted to `Maybe`. + utils::JoinKind::Symmetric)); + } + + /// A read operation makes the origin live with definite confidence, as it + /// dominates this program point. A write operation kills the liveness of + /// the origin since it overwrites the value. + Lattice transfer(Lattice In, const UseFact &UF) { + OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); + // Write kills liveness. + if (UF.isWritten()) + return Lattice(Factory.remove(In.LiveOrigins, OID)); + // Read makes origin live with definite confidence (dominates this point). + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&UF, LivenessKind::Must))); + } + + /// Issuing a new loan to an origin kills its liveness. + Lattice transfer(Lattice In, const IssueFact &IF) { + return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); + } + + /// An OriginFlow kills the liveness of the destination origin if `KillDest` + /// is true. Otherwise, it propagates liveness from destination to source. + Lattice transfer(Lattice In, const OriginFlowFact &OF) { + if (!OF.getKillDest()) + return In; + return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); + } + + LivenessMap getLiveOriginsAt(ProgramPoint P) const { + return getState(P).LiveOrigins; + } + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, + llvm::StringMap<ProgramPoint> TestPoints) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + for (const auto &Entry : TestPoints) { + OS << "TestPoint: " << Entry.getKey() << "\n"; + getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); + } + } + +private: + FactManager &FactMgr; + LivenessMap::Factory &Factory; +}; +} // namespace + +// PImpl wrapper implementation +class LiveOriginsAnalysis::Impl : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LiveOriginsAnalysis::LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, + FactManager &F, + LivenessMap::Factory &SF) + : PImpl(std::make_unique<Impl>(C, AC, F, SF)) { + PImpl->run(); +} + +LiveOriginsAnalysis::~LiveOriginsAnalysis() = default; + +LivenessMap LiveOriginsAnalysis::getLiveOriginsAt(ProgramPoint P) const { + return PImpl->getLiveOriginsAt(P); +} + +void LiveOriginsAnalysis::dump(llvm::raw_ostream &OS, + llvm::StringMap<ProgramPoint> TestPoints) const { + PImpl->dump(OS, TestPoints); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp new file mode 100644 index 0000000..387097e --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp @@ -0,0 +1,138 @@ +//===- LoanPropagation.cpp - Loan Propagation Analysis ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "Dataflow.h" +#include <memory> + +namespace clang::lifetimes::internal { +namespace { +/// Represents the dataflow lattice for loan propagation. +/// +/// This lattice tracks which loans each origin may hold at a given program +/// point.The lattice has a finite height: An origin's loan set is bounded by +/// the total number of loans in the function. +/// TODO(opt): To reduce the lattice size, propagate origins of declarations, +/// not expressions, because expressions are not visible across blocks. +struct Lattice { + /// The map from an origin to the set of loans it contains. + OriginLoanMap Origins = OriginLoanMap(nullptr); + + explicit Lattice(const OriginLoanMap &S) : Origins(S) {} + Lattice() = default; + + bool operator==(const Lattice &Other) const { + return Origins == Other.Origins; + } + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS) const { + OS << "LoanPropagationLattice State:\n"; + if (Origins.isEmpty()) + OS << " <empty>\n"; + for (const auto &Entry : Origins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + } +}; + +class AnalysisImpl + : public DataflowAnalysis<AnalysisImpl, Lattice, Direction::Forward> { +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), + LoanSetFactory(LoanSetFactory) {} + + using Base::transfer; + + StringRef getAnalysisName() const { return "LoanPropagation"; } + + Lattice getInitialState() { return Lattice{}; } + + /// Merges two lattices by taking the union of loans for each origin. + // TODO(opt): Keep the state small by removing origins which become dead. + Lattice join(Lattice A, Lattice B) { + OriginLoanMap JoinedOrigins = utils::join( + A.Origins, B.Origins, OriginLoanMapFactory, + [&](const LoanSet *S1, const LoanSet *S2) { + assert((S1 || S2) && "unexpectedly merging 2 empty sets"); + if (!S1) + return *S2; + if (!S2) + return *S1; + return utils::join(*S1, *S2, LoanSetFactory); + }, + // Asymmetric join is a performance win. For origins present only on one + // branch, the loan set can be carried over as-is. + utils::JoinKind::Asymmetric); + return Lattice(JoinedOrigins); + } + + /// A new loan is issued to the origin. Old loans are erased. + Lattice transfer(Lattice In, const IssueFact &F) { + OriginID OID = F.getOriginID(); + LoanID LID = F.getLoanID(); + return Lattice(OriginLoanMapFactory.add( + In.Origins, OID, + LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); + } + + /// A flow from source to destination. If `KillDest` is true, this replaces + /// the destination's loans with the source's. Otherwise, the source's loans + /// are merged into the destination's. + Lattice transfer(Lattice In, const OriginFlowFact &F) { + OriginID DestOID = F.getDestOriginID(); + OriginID SrcOID = F.getSrcOriginID(); + + LoanSet DestLoans = + F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); + LoanSet SrcLoans = getLoans(In, SrcOID); + LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); + + return Lattice(OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); + } + + LoanSet getLoans(OriginID OID, ProgramPoint P) const { + return getLoans(getState(P), OID); + } + +private: + LoanSet getLoans(Lattice L, OriginID OID) const { + if (auto *Loans = L.Origins.lookup(OID)) + return *Loans; + return LoanSetFactory.getEmptySet(); + } + + OriginLoanMap::Factory &OriginLoanMapFactory; + LoanSet::Factory &LoanSetFactory; +}; +} // namespace + +class LoanPropagationAnalysis::Impl final : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LoanPropagationAnalysis::LoanPropagationAnalysis( + const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : PImpl(std::make_unique<Impl>(C, AC, F, OriginLoanMapFactory, + LoanSetFactory)) { + PImpl->run(); +} + +LoanPropagationAnalysis::~LoanPropagationAnalysis() = default; + +LoanSet LoanPropagationAnalysis::getLoans(OriginID OID, ProgramPoint P) const { + return PImpl->getLoans(OID, P); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Loans.cpp b/clang/lib/Analysis/LifetimeSafety/Loans.cpp new file mode 100644 index 0000000..2c85a3c --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Loans.cpp @@ -0,0 +1,18 @@ +//===- Loans.cpp - Loan Implementation --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" + +namespace clang::lifetimes::internal { + +void Loan::dump(llvm::raw_ostream &OS) const { + OS << ID << " (Path: "; + OS << Path.D->getNameAsString() << ")"; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp new file mode 100644 index 0000000..ea51a75 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -0,0 +1,89 @@ +//===- Origins.cpp - Origin Implementation -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" + +namespace clang::lifetimes::internal { + +void OriginManager::dump(OriginID OID, llvm::raw_ostream &OS) const { + OS << OID << " ("; + Origin O = getOrigin(OID); + if (const ValueDecl *VD = O.getDecl()) + OS << "Decl: " << VD->getNameAsString(); + else if (const Expr *E = O.getExpr()) + OS << "Expr: " << E->getStmtClassName(); + else + OS << "Unknown"; + OS << ")"; +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::ValueDecl &D) { + AllOrigins.emplace_back(ID, &D); + return AllOrigins.back(); +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::Expr &E) { + AllOrigins.emplace_back(ID, &E); + return AllOrigins.back(); +} + +// TODO: Mark this method as const once we remove the call to getOrCreate. +OriginID OriginManager::get(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + // If the expression itself has no specific origin, and it's a reference + // to a declaration, its origin is that of the declaration it refers to. + // For pointer types, where we don't pre-emptively create an origin for the + // DeclRefExpr itself. + if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) + return get(*DRE->getDecl()); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + return getOrCreate(E); +} + +OriginID OriginManager::get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + // TODO: This should be an assert(It != DeclToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + + return It->second; +} + +OriginID OriginManager::getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + + OriginID NewID = getNextOriginID(); + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; + return NewID; +} + +const Origin &OriginManager::getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; +} + +OriginID OriginManager::getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; + return NewID; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index d9aafc6..b7e8bad 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -704,6 +704,10 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::Emscripten: return std::make_unique<EmscriptenTargetInfo<WebAssembly32TargetInfo>>( Triple, Opts); + + case llvm::Triple::Linux: + return std::make_unique<WALITargetInfo<WebAssembly32TargetInfo>>(Triple, + Opts); case llvm::Triple::UnknownOS: return std::make_unique<WebAssemblyOSTargetInfo<WebAssembly32TargetInfo>>( Triple, Opts); diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 6c49a09..bd6ffcf 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -948,6 +948,23 @@ public: using WebAssemblyOSTargetInfo<Target>::WebAssemblyOSTargetInfo; }; +// WALI target +template <typename Target> +class LLVM_LIBRARY_VISIBILITY WALITargetInfo + : public WebAssemblyOSTargetInfo<Target> { + void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, + MacroBuilder &Builder) const final { + WebAssemblyOSTargetInfo<Target>::getOSDefines(Opts, Triple, Builder); + // Linux defines; list based off of gcc output + DefineStd(Builder, "unix", Opts); + DefineStd(Builder, "linux", Opts); + Builder.defineMacro("__wali__"); + } + +public: + using WebAssemblyOSTargetInfo<Target>::WebAssemblyOSTargetInfo; +}; + // Emscripten target template <typename Target> class LLVM_LIBRARY_VISIBILITY EmscriptenTargetInfo diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index eba7422..4de6ce6 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -88,12 +88,23 @@ public: LongDoubleWidth = LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; - // size_t being unsigned long for both wasm32 and wasm64 makes mangled names - // more consistent between the two. - SizeType = UnsignedLong; - PtrDiffType = SignedLong; - IntPtrType = SignedLong; HasUnalignedAccess = true; + if (T.isWALI()) { + // The WALI ABI is documented here: + // https://doc.rust-lang.org/rustc/platform-support/wasm32-wali-linux.html + // Currently, this ABI only applies to wasm32 targets and notably requires + // 64-bit longs + LongAlign = LongWidth = 64; + SizeType = UnsignedInt; + PtrDiffType = SignedInt; + IntPtrType = SignedInt; + } else { + // size_t being unsigned long for both wasm32 and wasm64 makes mangled + // names more consistent between the two. + SizeType = UnsignedLong; + PtrDiffType = SignedLong; + IntPtrType = SignedLong; + } } StringRef getABI() const override; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 85a13357..40ea513 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6836,6 +6836,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args); else if (Target.isOHOSFamily()) TC = std::make_unique<toolchains::OHOS>(*this, Target, Args); + else if (Target.isWALI()) + TC = std::make_unique<toolchains::WebAssembly>(*this, Target, Args); else TC = std::make_unique<toolchains::Linux>(*this, Target, Args); break; diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index e9ca8ce..9abaf79 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -29,7 +29,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" -#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 51e0ee1..0ebf56e 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -111,6 +111,7 @@ add_clang_library(clangSema clangAPINotes clangAST clangAnalysis + clangAnalysisLifetimeSafety clangBasic clangEdit clangLex diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index e8a7ad3..8aebf53 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -10,7 +10,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index 35cdfbf..0d8d0fa 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -17,7 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/TypeLoc.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Sema/SemaObjC.h" diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index f4df63c..9cbd1bd 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -604,6 +604,10 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments( return std::nullopt; const NormalizedConstraint::OccurenceList &Used = Constraint.mappingOccurenceList(); + // The empty MLTAL situation should only occur when evaluating non-dependent + // constraints. + if (!MLTAL.getNumSubstitutedLevels()) + MLTAL.addOuterTemplateArguments(TD, {}, /*Final=*/false); SubstitutedOuterMost = llvm::to_vector_of<TemplateArgument>(MLTAL.getOutermost()); unsigned Offset = 0; @@ -623,9 +627,7 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments( if (Offset < SubstitutedOuterMost.size()) SubstitutedOuterMost.erase(SubstitutedOuterMost.begin() + Offset); - MLTAL.replaceOutermostTemplateArguments( - const_cast<NamedDecl *>(Constraint.getConstraintDecl()), - SubstitutedOuterMost); + MLTAL.replaceOutermostTemplateArguments(TD, SubstitutedOuterMost); return std::move(MLTAL); } @@ -956,11 +958,20 @@ ExprResult ConstraintSatisfactionChecker::Evaluate( ? Constraint.getPackSubstitutionIndex() : PackSubstitutionIndex; - Sema::InstantiatingTemplate _(S, ConceptId->getBeginLoc(), - Sema::InstantiatingTemplate::ConstraintsCheck{}, - ConceptId->getNamedConcept(), - MLTAL.getInnermost(), - Constraint.getSourceRange()); + Sema::InstantiatingTemplate InstTemplate( + S, ConceptId->getBeginLoc(), + Sema::InstantiatingTemplate::ConstraintsCheck{}, + ConceptId->getNamedConcept(), + // We may have empty template arguments when checking non-dependent + // nested constraint expressions. + // In such cases, non-SFINAE errors would have already been diagnosed + // during parameter mapping substitution, so the instantiating template + // arguments are less useful here. + MLTAL.getNumSubstitutedLevels() ? MLTAL.getInnermost() + : ArrayRef<TemplateArgument>{}, + Constraint.getSourceRange()); + if (InstTemplate.isInvalid()) + return ExprError(); unsigned Size = Satisfaction.Details.size(); diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 779b6e9..f3969a9 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -2898,15 +2898,6 @@ OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe( dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts())) VarTy = ASE->getElementType(); - llvm::SmallVector<OpenACCReductionRecipe::CombinerRecipe, 1> CombinerRecipes; - - // We use the 'set-ness' of the alloca-decl to determine whether the combiner - // is 'set' or not, so we can skip any attempts at it if we're going to fail - // at any of the combiners. - if (CreateReductionCombinerRecipe(VarExpr->getBeginLoc(), ReductionOperator, - VarTy, CombinerRecipes)) - return OpenACCReductionRecipe::Empty(); - VarDecl *AllocaDecl = CreateAllocaDecl( getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(), &getASTContext().Idents.get("openacc.reduction.init"), VarTy); @@ -2955,163 +2946,5 @@ OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe( AllocaDecl->setInit(Init.get()); AllocaDecl->setInitStyle(VarDecl::CallInit); } - - return OpenACCReductionRecipe(AllocaDecl, CombinerRecipes); -} - -bool SemaOpenACC::CreateReductionCombinerRecipe( - SourceLocation Loc, OpenACCReductionOperator ReductionOperator, - QualType VarTy, - llvm::SmallVectorImpl<OpenACCReductionRecipe::CombinerRecipe> - &CombinerRecipes) { - // Now we can try to generate the 'combiner' recipe. This is a little - // complicated in that if the 'VarTy' is an array type, we want to take its - // element type so we can generate that. Additionally, if this is a struct, - // we have two options: If there is overloaded operators, we want to take - // THOSE, else we want to do the individual elements. - - BinaryOperatorKind BinOp; - switch (ReductionOperator) { - case OpenACCReductionOperator::Invalid: - // This can only happen when there is an error, and since these inits - // are used for code generation, we can just ignore/not bother doing any - // initialization here. - CombinerRecipes.push_back({nullptr, nullptr, nullptr}); - return false; - case OpenACCReductionOperator::Addition: - BinOp = BinaryOperatorKind::BO_AddAssign; - break; - case OpenACCReductionOperator::Multiplication: - BinOp = BinaryOperatorKind::BO_MulAssign; - break; - case OpenACCReductionOperator::BitwiseAnd: - BinOp = BinaryOperatorKind::BO_AndAssign; - break; - case OpenACCReductionOperator::BitwiseOr: - BinOp = BinaryOperatorKind::BO_OrAssign; - break; - case OpenACCReductionOperator::BitwiseXOr: - BinOp = BinaryOperatorKind::BO_XorAssign; - break; - - case OpenACCReductionOperator::Max: - case OpenACCReductionOperator::Min: - case OpenACCReductionOperator::And: - case OpenACCReductionOperator::Or: - // We just want a 'NYI' error in the backend, so leave an empty combiner - // recipe, and claim success. - CombinerRecipes.push_back({nullptr, nullptr, nullptr}); - return false; - } - - // If VarTy is an array type, at the top level only, we want to do our - // compares/decomp/etc at the element level. - if (auto *AT = getASTContext().getAsArrayType(VarTy)) - VarTy = AT->getElementType(); - - assert(!VarTy->isArrayType() && "Only 1 level of array allowed"); - - auto tryCombiner = [&, this](DeclRefExpr *LHSDRE, DeclRefExpr *RHSDRE, - bool IncludeTrap) { - // TODO: OpenACC: we have to figure out based on the bin-op how to do the - // ones that we can't just use compound operators for. So &&, ||, max, and - // min aren't really clear what we could do here. - if (IncludeTrap) { - // Trap all of the errors here, we'll emit our own at the end. - Sema::TentativeAnalysisScope Trap{SemaRef}; - - return SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, - RHSDRE, - /*ForFoldExpr=*/false); - } else { - return SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, - RHSDRE, - /*ForFoldExpr=*/false); - } - }; - - struct CombinerAttemptTy { - VarDecl *LHS; - DeclRefExpr *LHSDRE; - VarDecl *RHS; - DeclRefExpr *RHSDRE; - Expr *Op; - }; - - auto formCombiner = [&, this](QualType Ty) -> CombinerAttemptTy { - VarDecl *LHSDecl = CreateAllocaDecl( - getASTContext(), SemaRef.getCurContext(), Loc, - &getASTContext().Idents.get("openacc.reduction.combiner.lhs"), Ty); - auto *LHSDRE = DeclRefExpr::Create( - getASTContext(), NestedNameSpecifierLoc{}, SourceLocation{}, LHSDecl, - /*ReferstoEnclosingVariableOrCapture=*/false, - DeclarationNameInfo{DeclarationName{LHSDecl->getDeclName()}, - LHSDecl->getBeginLoc()}, - Ty, clang::VK_LValue, LHSDecl, nullptr, NOUR_None); - VarDecl *RHSDecl = CreateAllocaDecl( - getASTContext(), SemaRef.getCurContext(), Loc, - &getASTContext().Idents.get("openacc.reduction.combiner.lhs"), Ty); - auto *RHSDRE = DeclRefExpr::Create( - getASTContext(), NestedNameSpecifierLoc{}, SourceLocation{}, RHSDecl, - /*ReferstoEnclosingVariableOrCapture=*/false, - DeclarationNameInfo{DeclarationName{RHSDecl->getDeclName()}, - RHSDecl->getBeginLoc()}, - Ty, clang::VK_LValue, RHSDecl, nullptr, NOUR_None); - - ExprResult BinOpResult = tryCombiner(LHSDRE, RHSDRE, /*IncludeTrap=*/true); - - return {LHSDecl, LHSDRE, RHSDecl, RHSDRE, BinOpResult.get()}; - }; - - CombinerAttemptTy TopLevelCombinerInfo = formCombiner(VarTy); - - if (TopLevelCombinerInfo.Op) { - if (!TopLevelCombinerInfo.Op->containsErrors() && - TopLevelCombinerInfo.Op->isInstantiationDependent()) { - // If this is instantiation dependent, we're just going to 'give up' here - // and count on us to get it right during instantaition. - CombinerRecipes.push_back({nullptr, nullptr, nullptr}); - return false; - } else if (!TopLevelCombinerInfo.Op->containsErrors()) { - // Else, we succeeded, we can just return this combiner. - CombinerRecipes.push_back({TopLevelCombinerInfo.LHS, - TopLevelCombinerInfo.RHS, - TopLevelCombinerInfo.Op}); - return false; - } - } - - // Since the 'root' level didn't fail, the only thing that could be successful - // is a struct that we decompose on its individual fields. - - RecordDecl *RD = VarTy->getAsRecordDecl(); - if (!RD) { - Diag(Loc, diag::err_acc_reduction_recipe_no_op) << VarTy; - tryCombiner(TopLevelCombinerInfo.LHSDRE, TopLevelCombinerInfo.RHSDRE, - /*IncludeTrap=*/false); - return true; - } - - for (const FieldDecl *FD : RD->fields()) { - CombinerAttemptTy FieldCombinerInfo = formCombiner(FD->getType()); - - if (!FieldCombinerInfo.Op || FieldCombinerInfo.Op->containsErrors()) { - Diag(Loc, diag::err_acc_reduction_recipe_no_op) << FD->getType(); - Diag(FD->getBeginLoc(), diag::note_acc_reduction_recipe_noop_field) << RD; - tryCombiner(FieldCombinerInfo.LHSDRE, FieldCombinerInfo.RHSDRE, - /*IncludeTrap=*/false); - return true; - } - - if (FieldCombinerInfo.Op->isInstantiationDependent()) { - // If this is instantiation dependent, we're just going to 'give up' here - // and count on us to get it right during instantaition. - CombinerRecipes.push_back({nullptr, nullptr, nullptr}); - } else { - CombinerRecipes.push_back( - {FieldCombinerInfo.LHS, FieldCombinerInfo.RHS, FieldCombinerInfo.Op}); - } - } - - return false; + return OpenACCReductionRecipe(AllocaDecl, {}); } diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp index 040ddd3..7b74b7c 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp @@ -8,19 +8,12 @@ struct DefaultOperators { bool b; }; -struct DefaultOperatorsNoFloats { - int i; - unsigned int u; - bool b; -}; - template<typename T> void acc_combined() { T someVar; T someVarArr[5]; - struct DefaultOperatorsNoFloats someVarNoFloats; - struct DefaultOperatorsNoFloats someVarArrNoFloats[5]; #pragma acc parallel loop reduction(+:someVar) + for(int i=0;i < 5; ++i); // CHECK: acc.reduction.recipe @reduction_add__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <add> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] @@ -46,7 +39,6 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(*:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <mul> init { @@ -129,67 +121,86 @@ void acc_combined() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc parallel loop reduction(&:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc parallel loop reduction(&:someVar) + +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i = 0; i < 5; ++i); -#pragma acc parallel loop reduction(|:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(|:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i = 0; i < 5; ++i); -#pragma acc parallel loop reduction(^:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(^:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVar) @@ -595,152 +606,194 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i = 0; i < 5; ++i); -#pragma acc parallel loop reduction(&:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(&:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[DECAY]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[DECAY]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[DECAY]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[DECAY]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[DECAY]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[DECAY]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[ALL_ONES_IDX:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ALL_ONES_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ALL_ONES_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[TWO_IDX:.*]] = cir.const #cir.int<2> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // // CHECK-NEXT: %[[THREE_IDX:.*]] = cir.const #cir.int<3> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[FOUR_IDX:.*]] = cir.const #cir.int<4> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc parallel loop reduction(|:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, ["arrayinit.temp"] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +#pragma acc parallel loop reduction(|:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i -// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[TEMP_LOAD]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: cir.yield // CHECK-NEXT: } while { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.bool +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperators>, !cir.bool // CHECK-NEXT: cir.condition(%[[CMP]]) // CHECK-NEXT: } // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i = 0; i < 5; ++i); -#pragma acc parallel loop reduction(^:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, ["arrayinit.temp"] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(^:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i -// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[TEMP_LOAD]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: cir.yield // CHECK-NEXT: } while { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.bool +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperators>, !cir.bool // CHECK-NEXT: cir.condition(%[[CMP]]) // CHECK-NEXT: } // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr) @@ -1082,10 +1135,10 @@ void acc_combined() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc parallel loop reduction(&:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc parallel loop reduction(&:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1099,15 +1152,21 @@ void acc_combined() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1120,14 +1179,14 @@ void acc_combined() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc parallel loop reduction(|:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc parallel loop reduction(|:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1141,15 +1200,21 @@ void acc_combined() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1162,14 +1227,14 @@ void acc_combined() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc parallel loop reduction(^:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc parallel loop reduction(^:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1183,15 +1248,21 @@ void acc_combined() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1204,8 +1275,8 @@ void acc_combined() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr[2]) @@ -1313,11 +1384,11 @@ void acc_combined() { for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(min:someVarArr[1:1]) for(int i=0;i < 5; ++i); -#pragma acc parallel loop reduction(&:someVarArrNoFloats[1:1]) - for(int i = 0; i < 5; ++i); -#pragma acc parallel loop reduction(|:someVarArrNoFloats[1:1]) - for(int i = 0; i < 5; ++i); -#pragma acc parallel loop reduction(^:someVarArrNoFloats[1:1]) +#pragma acc parallel loop reduction(&:someVarArr[1:1]) + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(|:someVarArr[1:1]) + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(^:someVarArr[1:1]) for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr[1:1]) for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp index 6e885cc..001c2fc 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp @@ -57,6 +57,47 @@ void acc_combined() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(&:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSf : !cir.ptr<!cir.float> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(|:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSf : !cir.ptr<!cir.float> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(^:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSf : !cir.ptr<!cir.float> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSf : !cir.ptr<!cir.float> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) @@ -208,6 +249,97 @@ void acc_combined() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(&:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ONE_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[TWO_IDX:.*]] = cir.const #cir.int<2> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[THREE_IDX:.*]] = cir.const #cir.int<3> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[FOUR_IDX:.*]] = cir.const #cir.int<4> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(|:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[TEMP_LOAD]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!cir.float>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(^:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[TEMP_LOAD]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!cir.float>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) @@ -410,6 +542,111 @@ void acc_combined() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(&:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(|:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(^:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -489,6 +726,12 @@ void acc_combined() { for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(min:someVarArr[1:1]) for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(&:someVarArr[1:1]) + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(|:someVarArr[1:1]) + for(int i=0;i < 5; ++i); +#pragma acc parallel loop reduction(^:someVarArr[1:1]) + for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(&&:someVarArr[1:1]) for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(||:someVarArr[1:1]) diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c index c99dc09..f70113e 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c @@ -8,17 +8,9 @@ struct DefaultOperators { bool b; }; -struct DefaultOperatorsNoFloats { - int i; - unsigned int u; - bool b; -}; - void acc_compute() { struct DefaultOperators someVar; struct DefaultOperators someVarArr[5]; - struct DefaultOperatorsNoFloats someVarNoFloats; - struct DefaultOperatorsNoFloats someVarArrNoFloats[5]; #pragma acc parallel reduction(+:someVar) // CHECK: acc.reduction.recipe @reduction_add__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <add> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) @@ -127,67 +119,85 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; -#pragma acc parallel reduction(&:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc parallel reduction(&:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; -#pragma acc parallel reduction(|:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc parallel reduction(|:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; -#pragma acc parallel reduction(^:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc parallel reduction(^:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; #pragma acc parallel reduction(&&:someVar) @@ -582,136 +592,166 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(&:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc parallel reduction(&:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[DECAY]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[DECAY]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[DECAY]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[DECAY]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[DECAY]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[DECAY]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[ALL_ONES_IDX:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ALL_ONES_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ALL_ONES_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[TWO_IDX:.*]] = cir.const #cir.int<2> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // // CHECK-NEXT: %[[THREE_IDX:.*]] = cir.const #cir.int<3> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[FOUR_IDX:.*]] = cir.const #cir.int<4> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(|:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, ["arrayinit.temp"] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +#pragma acc parallel reduction(|:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i -// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.zero : !rec_DefaultOperatorsNoFloats +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.zero : !rec_DefaultOperators // CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[TEMP_LOAD]] // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: cir.yield // CHECK-NEXT: } while { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.bool +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperators>, !cir.bool // CHECK-NEXT: cir.condition(%[[CMP]]) // CHECK-NEXT: } // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(^:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, ["arrayinit.temp"] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +#pragma acc parallel reduction(^:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i -// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.zero : !rec_DefaultOperatorsNoFloats +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.zero : !rec_DefaultOperators // CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[TEMP_LOAD]] // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: cir.yield // CHECK-NEXT: } while { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.bool +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperators>, !cir.bool // CHECK-NEXT: cir.condition(%[[CMP]]) // CHECK-NEXT: } // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; #pragma acc parallel reduction(&&:someVarArr) @@ -1039,10 +1079,10 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(&:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc parallel reduction(&:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1056,16 +1096,21 @@ void acc_compute() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i - -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1078,14 +1123,14 @@ void acc_compute() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(|:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc parallel reduction(|:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1099,15 +1144,21 @@ void acc_compute() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1120,14 +1171,14 @@ void acc_compute() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(^:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc parallel reduction(^:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1141,15 +1192,21 @@ void acc_compute() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1162,8 +1219,8 @@ void acc_compute() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; #pragma acc parallel reduction(&&:someVarArr[2]) @@ -1271,11 +1328,11 @@ void acc_compute() { ; #pragma acc parallel reduction(min:someVarArr[1:1]) ; -#pragma acc parallel reduction(&:someVarArrNoFloats[1:1]) +#pragma acc parallel reduction(&:someVarArr[1:1]) ; -#pragma acc parallel reduction(|:someVarArrNoFloats[1:1]) +#pragma acc parallel reduction(|:someVarArr[1:1]) ; -#pragma acc parallel reduction(^:someVarArrNoFloats[1:1]) +#pragma acc parallel reduction(^:someVarArr[1:1]) ; #pragma acc parallel reduction(&&:someVarArr[1:1]) ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp index b90a2fc..6a987c2 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp @@ -8,18 +8,10 @@ struct DefaultOperators { bool b; }; -struct DefaultOperatorsNoFloats { - int i; - unsigned int u; - bool b; -}; - template<typename T> void acc_compute() { T someVar; T someVarArr[5]; - struct DefaultOperatorsNoFloats someVarNoFloats; - struct DefaultOperatorsNoFloats someVarArrNoFloats[5]; #pragma acc parallel reduction(+:someVar) ; // CHECK: acc.reduction.recipe @reduction_add__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <add> init { @@ -129,67 +121,86 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; -#pragma acc parallel reduction(&:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc parallel reduction(&:someVar) + +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; -#pragma acc parallel reduction(|:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc parallel reduction(|:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; -#pragma acc parallel reduction(^:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc parallel reduction(^:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } ; #pragma acc parallel reduction(&&:someVar) @@ -596,151 +607,193 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(&:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc parallel reduction(&:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[DECAY]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[DECAY]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[DECAY]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[DECAY]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[DECAY]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[DECAY]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[ALL_ONES_IDX:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ALL_ONES_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ALL_ONES_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[TWO_IDX:.*]] = cir.const #cir.int<2> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // // CHECK-NEXT: %[[THREE_IDX:.*]] = cir.const #cir.int<3> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[FOUR_IDX:.*]] = cir.const #cir.int<4> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(|:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, ["arrayinit.temp"] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +#pragma acc parallel reduction(|:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i -// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[TEMP_LOAD]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: cir.yield // CHECK-NEXT: } while { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.bool +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperators>, !cir.bool // CHECK-NEXT: cir.condition(%[[CMP]]) // CHECK-NEXT: } // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(^:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, ["arrayinit.temp"] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +#pragma acc parallel reduction(^:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i -// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[TEMP_LOAD]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: cir.yield // CHECK-NEXT: } while { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.bool +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperators>, !cir.bool // CHECK-NEXT: cir.condition(%[[CMP]]) // CHECK-NEXT: } // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; #pragma acc parallel reduction(&&:someVarArr) @@ -1082,10 +1135,10 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(&:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc parallel reduction(&:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1099,15 +1152,21 @@ void acc_compute() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1120,14 +1179,14 @@ void acc_compute() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(|:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc parallel reduction(|:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1141,15 +1200,21 @@ void acc_compute() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1162,14 +1227,14 @@ void acc_compute() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; -#pragma acc parallel reduction(^:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc parallel reduction(^:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1183,15 +1248,21 @@ void acc_compute() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1204,8 +1275,8 @@ void acc_compute() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } ; #pragma acc parallel reduction(&&:someVarArr[2]) @@ -1313,11 +1384,11 @@ void acc_compute() { ; #pragma acc parallel reduction(min:someVarArr[1:1]) ; -#pragma acc parallel reduction(&:someVarArrNoFloats[1:1]) +#pragma acc parallel reduction(&:someVarArr[1:1]) ; -#pragma acc parallel reduction(|:someVarArrNoFloats[1:1]) +#pragma acc parallel reduction(|:someVarArr[1:1]) ; -#pragma acc parallel reduction(^:someVarArrNoFloats[1:1]) +#pragma acc parallel reduction(^:someVarArr[1:1]) ; #pragma acc parallel reduction(&&:someVarArr[1:1]) ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c index 0f7fd84..1b969ac 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c @@ -56,6 +56,48 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } ; +#pragma acc parallel reduction(&:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSf : !cir.ptr<!cir.float> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(|:someVar) + +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSf : !cir.ptr<!cir.float> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(^:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSf : !cir.ptr<!cir.float> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + ; #pragma acc parallel reduction(&&:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSf : !cir.ptr<!cir.float> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) @@ -207,6 +249,97 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; +#pragma acc parallel reduction(&:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ONE_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[TWO_IDX:.*]] = cir.const #cir.int<2> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[THREE_IDX:.*]] = cir.const #cir.int<3> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[FOUR_IDX:.*]] = cir.const #cir.int<4> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(|:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[TEMP_LOAD]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!cir.float>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(^:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[TEMP_LOAD]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!cir.float>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; #pragma acc parallel reduction(&&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) @@ -409,6 +542,111 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; +#pragma acc parallel reduction(&:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(|:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(^:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; #pragma acc parallel reduction(&&:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -488,6 +726,12 @@ void acc_compute() { ; #pragma acc parallel reduction(min:someVarArr[1:1]) ; +#pragma acc parallel reduction(&:someVarArr[1:1]) + ; +#pragma acc parallel reduction(|:someVarArr[1:1]) + ; +#pragma acc parallel reduction(^:someVarArr[1:1]) + ; #pragma acc parallel reduction(&&:someVarArr[1:1]) ; #pragma acc parallel reduction(||:someVarArr[1:1]) diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp index 4d99a43..b406bd5 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp @@ -58,6 +58,47 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } ; +#pragma acc parallel reduction(&:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSf : !cir.ptr<!cir.float> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(|:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSf : !cir.ptr<!cir.float> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(^:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSf : !cir.ptr<!cir.float> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + ; #pragma acc parallel reduction(&&:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSf : !cir.ptr<!cir.float> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) @@ -209,6 +250,97 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; +#pragma acc parallel reduction(&:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ONE_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[TWO_IDX:.*]] = cir.const #cir.int<2> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[THREE_IDX:.*]] = cir.const #cir.int<3> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[FOUR_IDX:.*]] = cir.const #cir.int<4> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(|:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[TEMP_LOAD]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!cir.float>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(^:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[TEMP_LOAD]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!cir.float>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; #pragma acc parallel reduction(&&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) @@ -411,6 +543,111 @@ void acc_compute() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } ; +#pragma acc parallel reduction(&:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(|:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; +#pragma acc parallel reduction(^:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + ; #pragma acc parallel reduction(&&:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -490,6 +727,12 @@ void acc_compute() { ; #pragma acc parallel reduction(min:someVarArr[1:1]) ; +#pragma acc parallel reduction(&:someVarArr[1:1]) + ; +#pragma acc parallel reduction(|:someVarArr[1:1]) + ; +#pragma acc parallel reduction(^:someVarArr[1:1]) + ; #pragma acc parallel reduction(&&:someVarArr[1:1]) ; #pragma acc parallel reduction(||:someVarArr[1:1]) diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp index 038afcaa..750c7b4 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp @@ -8,19 +8,12 @@ struct DefaultOperators { bool b; }; -struct DefaultOperatorsNoFloats { - int i; - unsigned int u; - bool b; -}; - template<typename T> -void acc_combined() { +void acc_loop() { T someVar; T someVarArr[5]; - struct DefaultOperatorsNoFloats someVarNoFloats; - struct DefaultOperatorsNoFloats someVarArrNoFloats[5]; #pragma acc loop reduction(+:someVar) + for(int i=0;i < 5; ++i); // CHECK: acc.reduction.recipe @reduction_add__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <add> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] @@ -46,7 +39,6 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i=0;i < 5; ++i); #pragma acc loop reduction(*:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <mul> init { @@ -129,67 +121,86 @@ void acc_combined() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc loop reduction(&:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +#pragma acc loop reduction(&:someVar) + +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i = 0; i < 5; ++i); -#pragma acc loop reduction(|:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(|:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } - for(int i = 0; i < 5; ++i); -#pragma acc loop reduction(^:someVarNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS24DefaultOperatorsNoFloats : !cir.ptr<!rec_DefaultOperatorsNoFloats> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperatorsNoFloats, !cir.ptr<!rec_DefaultOperatorsNoFloats>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(^:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTS16DefaultOperators : !cir.ptr<!rec_DefaultOperators> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_DefaultOperators>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_DefaultOperators, !cir.ptr<!rec_DefaultOperators>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[ALLOCA]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[ALLOCA]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[ALLOCA]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[ALLOCA]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperatorsNoFloats> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!rec_DefaultOperators> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVar) @@ -595,152 +606,194 @@ void acc_combined() { // TODO OpenACC: Expecting combination operation here // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i = 0; i < 5; ++i); -#pragma acc loop reduction(&:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(&:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[DECAY]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[DECAY]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[DECAY]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[DECAY]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[DECAY]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[DECAY]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[ALL_ONES_IDX:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ALL_ONES_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ALL_ONES_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[TWO_IDX:.*]] = cir.const #cir.int<2> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // // CHECK-NEXT: %[[THREE_IDX:.*]] = cir.const #cir.int<3> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[FOUR_IDX:.*]] = cir.const #cir.int<4> : !s64i -// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[NEXT_ELT]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[NEXT_ELT]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[NEXT_ELT]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[NEXT_ELT]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[NEXT_ELT]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc loop reduction(|:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, ["arrayinit.temp"] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +#pragma acc loop reduction(|:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i -// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[TEMP_LOAD]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: cir.yield // CHECK-NEXT: } while { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.bool +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperators>, !cir.bool // CHECK-NEXT: cir.condition(%[[CMP]]) // CHECK-NEXT: } // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } - for(int i = 0; i < 5; ++i); -#pragma acc loop reduction(^:someVarArrNoFloats) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init", init] -// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, ["arrayinit.temp"] -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(^:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i -// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> // CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[TEMP_LOAD]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[TEMP_LOAD]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[TEMP_LOAD]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[TEMP_LOAD]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[TEMP_LOAD]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i -// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !s64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>> +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!rec_DefaultOperators>, !s64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>> // CHECK-NEXT: cir.yield // CHECK-NEXT: } while { -// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperatorsNoFloats>>, !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperatorsNoFloats>, !cir.bool +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!rec_DefaultOperators>, !cir.bool // CHECK-NEXT: cir.condition(%[[CMP]]) // CHECK-NEXT: } // CHECK-NEXT: acc.yield // // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}) +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}) // TODO OpenACC: Expecting combination operation here -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVarArr) @@ -1082,10 +1135,10 @@ void acc_combined() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc loop reduction(&:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <iand> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc loop reduction(&:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1099,15 +1152,21 @@ void acc_combined() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xFF{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #true // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1120,14 +1179,14 @@ void acc_combined() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc loop reduction(|:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <ior> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc loop reduction(|:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1141,15 +1200,21 @@ void acc_combined() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1162,14 +1227,14 @@ void acc_combined() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); -#pragma acc loop reduction(^:someVarArrNoFloats[2]) -// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_24DefaultOperatorsNoFloats : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> reduction_operator <xor> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperatorsNoFloats x 5>, !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>>, ["openacc.reduction.init"] +#pragma acc loop reduction(^:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"] // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index // CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i @@ -1183,15 +1248,21 @@ void acc_combined() { // CHECK-NEXT: cir.condition(%[[COND]]) // CHECK-NEXT: } body { // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i -// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperatorsNoFloats>, !u64i) -> !cir.ptr<!rec_DefaultOperatorsNoFloats> -// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!rec_DefaultOperators>, !u64i) -> !cir.ptr<!rec_DefaultOperators> +// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i> -// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!u32i> +// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i> -// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][2] {name = "b"} : !cir.ptr<!rec_DefaultOperatorsNoFloats> -> !cir.ptr<!cir.bool> +// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double> +// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool> // CHECK-NEXT: %[[ZERO:.*]] = cir.const #false // CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool> // CHECK-NEXT: cir.yield @@ -1204,8 +1275,8 @@ void acc_combined() { // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { -// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperatorsNoFloats x 5>> +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVarArr[2]) @@ -1313,19 +1384,19 @@ void acc_combined() { for(int i=0;i < 5; ++i); #pragma acc loop reduction(min:someVarArr[1:1]) for(int i=0;i < 5; ++i); -#pragma acc loop reduction(&:someVarArrNoFloats[1:1]) - for(int i = 0; i < 5; ++i); -#pragma acc loop reduction(|:someVarArrNoFloats[1:1]) - for(int i = 0; i < 5; ++i); -#pragma acc loop reduction(^:someVarArrNoFloats[1:1]) +#pragma acc loop reduction(&:someVarArr[1:1]) + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(|:someVarArr[1:1]) + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(^:someVarArr[1:1]) for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVarArr[1:1]) for(int i=0;i < 5; ++i); #pragma acc loop reduction(||:someVarArr[1:1]) for(int i=0;i < 5; ++i); - // CHECK-NEXT: cir.func {{.*}}@_Z12acc_combined + // CHECK-NEXT: cir.func {{.*}}@_Z8acc_loop } void uses() { - acc_combined<DefaultOperators>(); + acc_loop<DefaultOperators>(); } diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp index 11b7c35..52406fd 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp @@ -58,6 +58,47 @@ void acc_loop() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> // CHECK-NEXT: } for(int i=0;i < 5; ++i); +#pragma acc loop reduction(&:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSf : !cir.ptr<!cir.float> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(|:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSf : !cir.ptr<!cir.float> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(^:someVar) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSf : !cir.ptr<!cir.float> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[ALLOCA]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.float> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.float> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVar) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSf : !cir.ptr<!cir.float> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float>{{.*}}) @@ -209,6 +250,97 @@ void acc_loop() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); +#pragma acc loop reduction(&:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[ONE_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[TWO_IDX:.*]] = cir.const #cir.int<2> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[THREE_IDX:.*]] = cir.const #cir.int<3> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[THREE_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[FOUR_IDX:.*]] = cir.const #cir.int<4> : !s64i +// CHECK-NEXT: %[[NEXT_ELT:.*]] = cir.ptr_stride %[[DECAY]], %[[FOUR_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[NEXT_ELT]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(|:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[TEMP_LOAD]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!cir.float>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(^:someVarArr) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init] +// CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"] +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride %[[DECAY]], %[[LAST_IDX]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[TEMP_LOAD]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i +// CHECK-NEXT: %[[NEXT_ITEM:.*]] = cir.ptr_stride %[[TEMP_LOAD]], %[[ONE]] : (!cir.ptr<!cir.float>, !s64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: cir.store {{.*}} %[[NEXT_ITEM]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[TEMP_LOAD:.*]] = cir.load {{.*}} %[[TEMP_ITR]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[TEMP_LOAD]], %[[END_ITR]]) : !cir.ptr<!cir.float>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}) +// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVarArr) // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}) @@ -411,6 +543,111 @@ void acc_loop() { // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> // CHECK-NEXT: } for(int i=0;i < 5; ++i); +#pragma acc loop reduction(&:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_iand__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(|:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_ior__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <ior> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(^:someVarArr[2]) +// CHECK-NEXT: acc.reduction.recipe @reduction_xor__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <xor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init"] +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ITR_LOAD]] : (!cir.ptr<!cir.float>, !u64i) -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[STRIDE]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } combiner { +// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!cir.float x 5>> +// CHECK-NEXT: } + for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVarArr[2]) // CHECK-NEXT: acc.reduction.recipe @reduction_land__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) @@ -490,6 +727,12 @@ void acc_loop() { for(int i=0;i < 5; ++i); #pragma acc loop reduction(min:someVarArr[1:1]) for(int i=0;i < 5; ++i); +#pragma acc loop reduction(&:someVarArr[1:1]) + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(|:someVarArr[1:1]) + for(int i=0;i < 5; ++i); +#pragma acc loop reduction(^:someVarArr[1:1]) + for(int i=0;i < 5; ++i); #pragma acc loop reduction(&&:someVarArr[1:1]) for(int i=0;i < 5; ++i); #pragma acc loop reduction(||:someVarArr[1:1]) diff --git a/clang/test/DebugInfo/KeyInstructions/flag.cpp b/clang/test/DebugInfo/KeyInstructions/flag.cpp index a5cd855..6aeeed6 100644 --- a/clang/test/DebugInfo/KeyInstructions/flag.cpp +++ b/clang/test/DebugInfo/KeyInstructions/flag.cpp @@ -1,12 +1,15 @@ // RUN: %clang -### -target x86_64 -c -gdwarf -gkey-instructions %s 2>&1 | FileCheck %s --check-prefixes=KEY-INSTRUCTIONS // RUN: %clang -### -target x86_64 -c -gdwarf -gno-key-instructions %s 2>&1 | FileCheck %s --check-prefixes=NO-KEY-INSTRUCTIONS +// RUN: %clang -### -target x86_64 -c -gno-key-instructions %s 2>&1 | FileCheck %s --check-prefixes=NO-DEBUG //// Help. // RUN %clang --help | FileCheck %s --check-prefix=HELP -// HELP: -gkey-instructions Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code in some debuggers. DWARF only. Implies -g. +// HELP: -gkey-instructions Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code in some debuggers. DWARF only. // KEY-INSTRUCTIONS: "-gkey-instructions" // NO-KEY-INSTRUCTIONS-NOT: key-instructions +// NO-DEBUG-NOT: debug-info-kind +// NO-DEBUG-NOT: dwarf //// Help hidden: flag should not be visible. // RUN: %clang --help | FileCheck %s --check-prefix=HELP diff --git a/clang/test/Driver/debug-options.c b/clang/test/Driver/debug-options.c index 73f2f40..45ac450 100644 --- a/clang/test/Driver/debug-options.c +++ b/clang/test/Driver/debug-options.c @@ -268,11 +268,11 @@ // RUN: %clang -### -c %s 2>&1 | FileCheck -check-prefix=NORNGBSE %s // RUN: %clang -### -c -fdebug-ranges-base-address -fno-debug-ranges-base-address %s 2>&1 | FileCheck -check-prefix=NORNGBSE %s // -// RUN: %clang -### -c -gomit-unreferenced-methods -fno-standalone-debug %s 2>&1 | FileCheck -check-prefix=INCTYPES %s +// RUN: %clang -### -c -g -gomit-unreferenced-methods -fno-standalone-debug %s 2>&1 | FileCheck -check-prefix=INCTYPES %s // RUN: %clang -### -c %s 2>&1 | FileCheck -check-prefix=NOINCTYPES %s -// RUN: %clang -### -c -gomit-unreferenced-methods -fdebug-types-section -target x86_64-unknown-linux %s 2>&1 \ +// RUN: %clang -### -c -g -gomit-unreferenced-methods -fdebug-types-section -target x86_64-unknown-linux %s 2>&1 \ // RUN: | FileCheck -check-prefix=NOINCTYPES %s -// RUN: %clang -### -c -gomit-unreferenced-methods -fstandalone-debug %s 2>&1 | FileCheck -check-prefix=NOINCTYPES %s +// RUN: %clang -### -c -g -gomit-unreferenced-methods -fstandalone-debug %s 2>&1 | FileCheck -check-prefix=NOINCTYPES %s // // RUN: %clang -### -c -glldb %s 2>&1 | FileCheck -check-prefix=NOPUB %s // RUN: %clang -### -c -glldb -gno-pubnames %s 2>&1 | FileCheck -check-prefix=NOPUB %s diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index 91803fe..29a94ae 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -296,3 +296,10 @@ // RUN: | FileCheck -check-prefix=LINK_WASIP2_USE_WASMLD %s // LINK_WASIP2_USE_WASMLD: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK_WASIP2_USE_WASMLD: wasm-ld{{.*}}" "-m" "wasm32" {{.*}} "[[temp]]" {{.*}} + +// Basic `wasm32-linux-muslwali` compile-link test. + +// RUN: %clang -### --target=wasm32-linux-muslwali --sysroot=/foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=LINK_WALI_BASIC %s +// LINK_WALI_BASIC: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" +// LINK_WALI_BASIC: wasm-ld{{.*}}" "-L/foo/lib/wasm32-linux-muslwali" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" diff --git a/clang/test/Driver/wasm-toolchain.cpp b/clang/test/Driver/wasm-toolchain.cpp index ba1c55b..d7ff76c 100644 --- a/clang/test/Driver/wasm-toolchain.cpp +++ b/clang/test/Driver/wasm-toolchain.cpp @@ -86,3 +86,28 @@ // COMPILE_STDCXX: "-internal-isystem" "[[RESOURCE_DIR]]{{(/|\\\\)}}include" // COMPILE_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-wasi" // COMPILE_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include" + +// RUN: %clangxx -### --target=wasm32-linux-muslwali --stdlib=libc++ %s 2>&1 \ +// RUN: --sysroot=%S/Inputs/basic_linux_libcxx_tree/usr \ +// RUN: | FileCheck -check-prefix=COMPILE_WALI %s +// COMPILE_WALI: "-cc1" +// COMPILE_WALI: "-resource-dir" "[[RESOURCE_DIR:[^"]*]]" +// COMPILE_WALI: "-isysroot" "[[SYSROOT:[^"]+]]" +// COMPILE_WALI: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-linux-muslwali/c++/v1" +// COMPILE_WALI: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/c++/v1" +// COMPILE_WALI: "-internal-isystem" "[[RESOURCE_DIR]]{{(/|\\\\)}}include" +// COMPILE_WALI: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-linux-muslwali" +// COMPILE_WALI: "-internal-isystem" "[[SYSROOT:[^"]+]]/include" + +// RUN: %clangxx -### --target=wasm32-linux-muslwali --stdlib=libstdc++ %s 2>&1 \ +// RUN: --sysroot=%S/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr \ +// RUN: | FileCheck -check-prefix=COMPILE_WALI_STDCXX %s +// COMPILE_WALI_STDCXX: "-cc1" +// COMPILE_WALI_STDCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]*]]" +// COMPILE_WALI_STDCXX: "-isysroot" "[[SYSROOT:[^"]+]]" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/c++/4.8/wasm32-linux-muslwali" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/c++/4.8" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/c++/4.8/backward" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[RESOURCE_DIR]]{{(/|\\\\)}}include" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-linux-muslwali" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include" diff --git a/clang/test/InstallAPI/project-header-only-args-visibility.test b/clang/test/InstallAPI/project-header-only-args-visibility.test new file mode 100644 index 0000000..0403487 --- /dev/null +++ b/clang/test/InstallAPI/project-header-only-args-visibility.test @@ -0,0 +1,69 @@ +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json + +; RUN: clang-installapi \ +; RUN: -target arm64-apple-macos26 -install_name @rpath/libfoo.dylib \ +; RUN: -current_version 1 -compatibility_version 1 \ +; RUN: -Xproject -fvisibility=hidden -I%t/usr/include \ +; RUN: -I%t -dynamiclib %t/inputs.json \ +; RUN: -o %t/output.tbd 2>&1 | FileCheck %s --allow-empty +; RUN: llvm-readtapi --compare %t/output.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty + +; CHECK-NOT: error +; CHECK-NOT: warning + +//--- usr/include/public.h +int foo(void); + +//--- project.h +int bar(void); + +//--- expected.tbd +{ + "main_library": { + "exported_symbols": [ + { + "text": { + "global": [ + "_foo" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "@rpath/libfoo.dylib" + } + ], + "target_info": [ + { + "min_deployment": "26", + "target": "arm64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} + +;--- inputs.json.in +{ + "headers": [ + { + "path" : "DSTROOT/usr/include/public.h", + "type" : "public" + }, + { + "path" : "DSTROOT/project.h", + "type" : "project" + } + ], + "version": "3" +} diff --git a/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp index 1b50336..72d7e6b 100644 --- a/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp +++ b/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp @@ -2,7 +2,7 @@ struct CompositeOfScalars { int I; - float F; // #COS_FLOAT + float F; short J; char C; double D; @@ -30,11 +30,6 @@ void uses(unsigned Parm) { for(int i = 0; i < 5; ++i); #pragma acc serial loop reduction(&: CoS, I, F) - // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-error@-2{{invalid operands to binary expression ('float' and 'float')}} - // expected-error@-3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-5{{invalid operands to binary expression ('float' and 'float')}} for(int i = 0; i < 5; ++i); #pragma acc kernels loop reduction(min: CoS, Array[I], Array[0:I]) diff --git a/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp b/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp index c4ff12d..babff53 100644 --- a/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp +++ b/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp @@ -86,6 +86,15 @@ void NormalFunc(int i, float f) { // CHECK-NEXT: CXXBoolLiteralExpr // CHECK-NEXT: NullStmt +#pragma acc serial reduction(|: f) + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial + // CHECK-NEXT: reduction clause Operator: | + // CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float' + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + #pragma acc parallel reduction(^: i) while(true); @@ -274,6 +283,16 @@ void TemplFunc() { // CHECK-NEXT: CXXBoolLiteralExpr // CHECK-NEXT: NullStmt +#pragma acc parallel reduction(&: T::SomeFloat) + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel + // CHECK-NEXT: reduction clause Operator: & + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + #pragma acc serial reduction(|: i) while(true); // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial @@ -456,6 +475,14 @@ void TemplFunc() { // CHECK-NEXT: CXXBoolLiteralExpr // CHECK-NEXT: NullStmt + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel + // CHECK-NEXT: reduction clause Operator: & + // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy' + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial // CHECK-NEXT: reduction clause Operator: | // CHECK-NEXT: DeclRefExpr{{.*}} 'typename InstTy::IntTy':'int' lvalue Var{{.*}} 'i' 'typename InstTy::IntTy':'int' diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c index 96c01d0..265c498 100644 --- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c +++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c @@ -2,7 +2,7 @@ struct CompositeOfScalars { int I; - float F; // #COS_FLOAT + float F; short J; char C; double D; @@ -58,11 +58,6 @@ void uses(unsigned Parm) { // Vars in a reduction must be a scalar or a composite of scalars. #pragma acc parallel reduction(&: CoS, I, F) - // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-error@-2{{invalid operands to binary expression ('float' and 'float')}} - // expected-error@-3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-5{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} @@ -74,18 +69,12 @@ void uses(unsigned Parm) { while (1); #pragma acc parallel reduction(&: CoS, Array[I], Array[0:I]) - // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} while (1); struct CompositeHasComposite ChCArray[5]; - // expected-error@+6{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} + // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} - // expected-note@+4{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} - // expected-error@+3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@+1{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel reduction(&: CoS, Array[I], ChCArray[0:I]) while (1); diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp index e3a487a..edc67ce 100644 --- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp +++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp @@ -2,7 +2,7 @@ struct CompositeOfScalars { int I; - float F; // #COS_FLOAT + float F; short J; char C; double D; @@ -61,11 +61,6 @@ void uses(unsigned Parm) { // Vars in a reduction must be a scalar or a composite of scalars. #pragma acc parallel reduction(&: CoS, I, F) - // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-error@-2{{invalid operands to binary expression ('float' and 'float')}} - // expected-error@-3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-5{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} @@ -76,9 +71,6 @@ void uses(unsigned Parm) { while (1); #pragma acc parallel reduction(&: CoS, Array[I], Array[0:I]) - // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}} @@ -219,9 +211,6 @@ void TemplUses(T Parm, U CoS, V ChC) { // Vars in a reduction must be a scalar or a composite of scalars. #pragma acc parallel reduction(&: CoS, Var, Parm) - // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} @@ -234,9 +223,6 @@ void TemplUses(T Parm, U CoS, V ChC) { while (1); #pragma acc parallel reduction(&: CoS, Array[Var], Array[0:Var]) - // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}} diff --git a/clang/test/SemaOpenACC/loop-construct-reduction-ast.cpp b/clang/test/SemaOpenACC/loop-construct-reduction-ast.cpp index 2daa56b4..4afb370 100644 --- a/clang/test/SemaOpenACC/loop-construct-reduction-ast.cpp +++ b/clang/test/SemaOpenACC/loop-construct-reduction-ast.cpp @@ -102,6 +102,24 @@ void NormalFunc(int i, float f) { // CHECK-NEXT: DeclRefExpr{{.*}}'i' 'int' // CHECK-NEXT: NullStmt +#pragma acc loop reduction(|: f) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan> + // CHECK-NEXT: reduction clause Operator: | + // CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float' + // CHECK-NEXT: ForStmt + // CHECK-NEXT: DeclStmt + // CHECK-NEXT: VarDecl{{.*}} i 'int' + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0 + // CHECK-NEXT: <<<NULL>>> + // CHECK-NEXT: BinaryOperator{{.*}}'<' + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}}'i' 'int' + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 5 + // CHECK-NEXT: UnaryOperator{{.*}}++ + // CHECK-NEXT: DeclRefExpr{{.*}}'i' 'int' + // CHECK-NEXT: NullStmt + #pragma acc loop reduction(^: i) for(int i = 0; i < 5; ++i); @@ -249,6 +267,25 @@ void TemplFunc() { // CHECK-NEXT: DeclRefExpr{{.*}}'i' 'int' // CHECK-NEXT: NullStmt +#pragma acc loop reduction(&: T::SomeFloat) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan> + // CHECK-NEXT: reduction clause Operator: & + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' + // CHECK-NEXT: ForStmt + // CHECK-NEXT: DeclStmt + // CHECK-NEXT: VarDecl{{.*}} i 'int' + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0 + // CHECK-NEXT: <<<NULL>>> + // CHECK-NEXT: BinaryOperator{{.*}}'<' + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}}'i' 'int' + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 5 + // CHECK-NEXT: UnaryOperator{{.*}}++ + // CHECK-NEXT: DeclRefExpr{{.*}}'i' 'int' + // CHECK-NEXT: NullStmt + #pragma acc loop reduction(|: i) for(int i = 0; i < 5; ++i); // CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan> @@ -403,6 +440,23 @@ void TemplFunc() { // CHECK-NEXT: NullStmt // // CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan> + // CHECK-NEXT: reduction clause Operator: & + // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy' + // CHECK-NEXT: ForStmt + // CHECK-NEXT: DeclStmt + // CHECK-NEXT: VarDecl{{.*}} i 'int' + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0 + // CHECK-NEXT: <<<NULL>>> + // CHECK-NEXT: BinaryOperator{{.*}}'<' + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}}'i' 'int' + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 5 + // CHECK-NEXT: UnaryOperator{{.*}}++ + // CHECK-NEXT: DeclRefExpr{{.*}}'i' 'int' + // CHECK-NEXT: NullStmt + // + // CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan> // CHECK-NEXT: reduction clause Operator: | // CHECK-NEXT: DeclRefExpr{{.*}} 'typename InstTy::IntTy':'int' lvalue Var{{.*}} 'i' 'typename InstTy::IntTy':'int' // CHECK-NEXT: ForStmt diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp index 768af09..3fbe7c0 100644 --- a/clang/test/SemaTemplate/concepts.cpp +++ b/clang/test/SemaTemplate/concepts.cpp @@ -1404,6 +1404,18 @@ static_assert(!std::is_constructible_v<span<4>, array<int, 3>>); } +namespace case7 { + +template <class _Tp, class _Up> +concept __same_as_impl = __is_same(_Tp, _Up); +template <class _Tp, class _Up> +concept same_as = __same_as_impl<_Tp, _Up>; +template <typename> +concept IsEntitySpec = + requires { requires same_as<void, void>; }; + +} + } namespace GH162125 { @@ -1476,3 +1488,20 @@ static_assert( requires {{ &f } -> C;} ); // expected-error {{reference to overl // expected-error@-1 {{static assertion failed due to requirement 'requires { { &f() } -> C; }'}} } + +namespace GH162770 { + enum e {}; + template<e> struct s {}; + + template<typename> struct specialized; + template<e x> struct specialized<s<x>> { + static auto make(auto) -> s<x>; + }; + + template<e x> struct check { + static constexpr auto m = requires { specialized<s<x>>::make(0); }; + }; + + template<typename... Ts> auto comma = (..., Ts()); + auto b = comma<check<e{}>>; +} // namespace GH162770 diff --git a/clang/test/SemaTemplate/partial-spec-instantiate.cpp b/clang/test/SemaTemplate/partial-spec-instantiate.cpp index 0b84df6..44b5800 100644 --- a/clang/test/SemaTemplate/partial-spec-instantiate.cpp +++ b/clang/test/SemaTemplate/partial-spec-instantiate.cpp @@ -152,3 +152,16 @@ namespace GH60778 { ClassTemplate<>::Nested<int> instantiation; } } +#if __cplusplus >= 201103L +namespace GH162855 { + template <class...> using A = int; + template <class, int> struct B; + template <class...> struct C; + template <template <class, int...> class TT, long... X> + struct C<TT<int, X...>> { + template <class... Y> using l = A<B<Y, X>...>; + }; + template <class> struct D; + template struct C<D<int>>; +} // namespace GH162855 +#endif diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 169b2d2..0c05184 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Testing/TestAST.h" @@ -63,7 +63,7 @@ public: Analysis = std::make_unique<LifetimeSafetyAnalysis>(*AnalysisCtx, nullptr); Analysis->run(); - AnnotationToPointMap = Analysis->getTestPoints(); + AnnotationToPointMap = Analysis->getFactManager().getTestPoints(); } LifetimeSafetyAnalysis &getAnalysis() { return *Analysis; } @@ -98,10 +98,11 @@ public: auto *VD = findDecl<ValueDecl>(VarName); if (!VD) return std::nullopt; - auto OID = Analysis.getOriginIDForDecl(VD); - if (!OID) - ADD_FAILURE() << "Origin for '" << VarName << "' not found."; - return OID; + // This assumes the OriginManager's `get` can find an existing origin. + // We might need a `find` method on OriginManager to avoid `getOrCreate` + // logic in a const-query context if that becomes an issue. + return const_cast<OriginManager &>(Analysis.getFactManager().getOriginMgr()) + .get(*VD); } std::vector<LoanID> getLoansForVar(llvm::StringRef VarName) { @@ -110,7 +111,10 @@ public: ADD_FAILURE() << "Failed to find VarDecl for '" << VarName << "'"; return {}; } - std::vector<LoanID> LID = Analysis.getLoanIDForVar(VD); + std::vector<LoanID> LID; + for (const Loan &L : Analysis.getFactManager().getLoanMgr().getLoans()) + if (L.Path.D == VD) + LID.push_back(L.ID); if (LID.empty()) { ADD_FAILURE() << "Loan for '" << VarName << "' not found."; return {}; @@ -123,7 +127,7 @@ public: ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getLoansAtPoint(OID, PP); + return Analysis.getLoanPropagation().getLoans(OID, PP); } std::optional<std::vector<std::pair<OriginID, LivenessKind>>> @@ -131,7 +135,10 @@ public: ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getLiveOriginsAtPoint(PP); + std::vector<std::pair<OriginID, LivenessKind>> Result; + for (auto &[OID, Info] : Analysis.getLiveOrigins().getLiveOriginsAt(PP)) + Result.push_back({OID, Info.Kind}); + return Result; } private: diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 320f913..0e3c9aa2 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -208,6 +208,9 @@ struct IntrinsicLibrary { fir::ExtendedValue genAssociated(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); mlir::Value genAtand(mlir::Type, llvm::ArrayRef<mlir::Value>); + mlir::Value genBarrierArrive(mlir::Type, llvm::ArrayRef<mlir::Value>); + mlir::Value genBarrierArriveCnt(mlir::Type, llvm::ArrayRef<mlir::Value>); + void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genBesselJn(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genBesselYn(mlir::Type, @@ -271,6 +274,7 @@ struct IntrinsicLibrary { llvm::ArrayRef<fir::ExtendedValue>); template <Extremum, ExtremumBehavior> mlir::Value genExtremum(mlir::Type, llvm::ArrayRef<mlir::Value>); + void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>); mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>); mlir::Value genFraction(mlir::Type resultType, mlir::ArrayRef<mlir::Value> args); @@ -453,6 +457,8 @@ struct IntrinsicLibrary { mlir::Value genTand(mlir::Type, llvm::ArrayRef<mlir::Value>); mlir::Value genTanpi(mlir::Type, llvm::ArrayRef<mlir::Value>); mlir::Value genTime(mlir::Type, llvm::ArrayRef<mlir::Value>); + void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>); mlir::Value genTrailz(mlir::Type, llvm::ArrayRef<mlir::Value>); fir::ExtendedValue genTransfer(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index bd94651..444f274 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3383,7 +3383,8 @@ static void genOMPDispatch(lower::AbstractConverter &converter, } } - switch (llvm::omp::Directive dir = item->id) { + llvm::omp::Directive dir = item->id; + switch (dir) { case llvm::omp::Directive::OMPD_barrier: newOp = genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item); break; diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index de7694f..7c5c5fb 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -346,6 +346,18 @@ static constexpr IntrinsicHandler handlers[]{ &I::genVoteSync<mlir::NVVM::VoteSyncKind::ballot>, {{{"mask", asValue}, {"pred", asValue}}}, /*isElemental=*/false}, + {"barrier_arrive", + &I::genBarrierArrive, + {{{"barrier", asAddr}}}, + /*isElemental=*/false}, + {"barrier_arrive_cnt", + &I::genBarrierArriveCnt, + {{{"barrier", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, + {"barrier_init", + &I::genBarrierInit, + {{{"barrier", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, {"bessel_jn", &I::genBesselJn, {{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}}, @@ -490,6 +502,10 @@ static constexpr IntrinsicHandler handlers[]{ &I::genExtendsTypeOf, {{{"a", asBox}, {"mold", asBox}}}, /*isElemental=*/false}, + {"fence_proxy_async", + &I::genFenceProxyAsync, + {}, + /*isElemental=*/false}, {"findloc", &I::genFindloc, {{{"array", asBox}, @@ -1000,6 +1016,14 @@ static constexpr IntrinsicHandler handlers[]{ {"threadfence_block", &I::genThreadFenceBlock, {}, /*isElemental=*/false}, {"threadfence_system", &I::genThreadFenceSystem, {}, /*isElemental=*/false}, {"time", &I::genTime, {}, /*isElemental=*/false}, + {"tma_bulk_commit_group", + &I::genTMABulkCommitGroup, + {{}}, + /*isElemental=*/false}, + {"tma_bulk_wait_group", + &I::genTMABulkWaitGroup, + {{}}, + /*isElemental=*/false}, {"trailz", &I::genTrailz}, {"transfer", &I::genTransfer, @@ -3176,6 +3200,61 @@ IntrinsicLibrary::genAssociated(mlir::Type resultType, return fir::runtime::genAssociated(builder, loc, pointerBox, targetBox); } +static mlir::Value convertBarrierToLLVM(fir::FirOpBuilder &builder, + mlir::Location loc, + mlir::Value barrier) { + mlir::Value llvmPtr = fir::ConvertOp::create( + builder, loc, mlir::LLVM::LLVMPointerType::get(builder.getContext()), + barrier); + mlir::Value addrCast = mlir::LLVM::AddrSpaceCastOp::create( + builder, loc, + mlir::LLVM::LLVMPointerType::get( + builder.getContext(), + static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared)), + llvmPtr); + return addrCast; +} + +// BARRIER_ARRIVE (CUDA) +mlir::Value +IntrinsicLibrary::genBarrierArrive(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 1); + mlir::Value barrier = convertBarrierToLLVM(builder, loc, args[0]); + return mlir::NVVM::MBarrierArriveSharedOp::create(builder, loc, resultType, + barrier) + .getResult(); +} + +// BARRIER_ARRIBVE_CNT (CUDA) +mlir::Value +IntrinsicLibrary::genBarrierArriveCnt(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 2); + mlir::Value barrier = convertBarrierToLLVM(builder, loc, args[0]); + mlir::Value token = fir::AllocaOp::create(builder, loc, resultType); + // TODO: the MBarrierArriveExpectTxOp is not taking the state argument and + // currently just the sink symbol `_`. + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive + mlir::NVVM::MBarrierArriveExpectTxOp::create(builder, loc, barrier, args[1], + {}); + return fir::LoadOp::create(builder, loc, token); +} + +// BARRIER_INIT (CUDA) +void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 2); + mlir::Value barrier = + convertBarrierToLLVM(builder, loc, fir::getBase(args[0])); + mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, barrier, + fir::getBase(args[1]), {}); + auto kind = mlir::NVVM::ProxyKindAttr::get( + builder.getContext(), mlir::NVVM::ProxyKind::async_shared); + auto space = mlir::NVVM::SharedSpaceAttr::get( + builder.getContext(), mlir::NVVM::SharedSpace::shared_cta); + mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space); +} + // BESSEL_JN fir::ExtendedValue IntrinsicLibrary::genBesselJn(mlir::Type resultType, @@ -4292,6 +4371,17 @@ IntrinsicLibrary::genExtendsTypeOf(mlir::Type resultType, fir::getBase(args[1]))); } +// FENCE_PROXY_ASYNC (CUDA) +void IntrinsicLibrary::genFenceProxyAsync( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 0); + auto kind = mlir::NVVM::ProxyKindAttr::get( + builder.getContext(), mlir::NVVM::ProxyKind::async_shared); + auto space = mlir::NVVM::SharedSpaceAttr::get( + builder.getContext(), mlir::NVVM::SharedSpace::shared_cta); + mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space); +} + // FINDLOC fir::ExtendedValue IntrinsicLibrary::genFindloc(mlir::Type resultType, @@ -9107,6 +9197,21 @@ mlir::Value IntrinsicLibrary::genTime(mlir::Type resultType, fir::runtime::genTime(builder, loc)); } +// TMA_BULK_COMMIT_GROUP (CUDA) +void IntrinsicLibrary::genTMABulkCommitGroup( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 0); + mlir::NVVM::CpAsyncBulkCommitGroupOp::create(builder, loc); +} + +// TMA_BULK_WAIT_GROUP (CUDA) +void IntrinsicLibrary::genTMABulkWaitGroup( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 0); + auto group = builder.getIntegerAttr(builder.getI32Type(), 0); + mlir::NVVM::CpAsyncBulkWaitGroupOp::create(builder, loc, group, {}); +} + // TRIM fir::ExtendedValue IntrinsicLibrary::genTrim(mlir::Type resultType, diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index 1598c64..106f3e2 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -1987,6 +1987,42 @@ implicit none end function end interface + ! TMA Operations + + interface + attributes(device) subroutine barrier_init(barrier, count) + integer(8), shared :: barrier + integer(4) :: count + end subroutine + end interface + + interface barrier_arrive + attributes(device) function barrier_arrive(barrier) result(token) + integer(8), shared :: barrier + integer(8) :: token + end function + attributes(device) function barrier_arrive_cnt(barrier, count) result(token) + integer(8), shared :: barrier + integer(4) :: count + integer(8) :: token + end function + end interface + + interface + attributes(device) subroutine fence_proxy_async() + end subroutine + end interface + + interface + attributes(device) subroutine tma_bulk_commit_group() + end subroutine + end interface + + interface + attributes(device) subroutine tma_bulk_wait_group() + end subroutine + end interface + contains attributes(device) subroutine syncthreads() diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 5e1f6b6..697b17b 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -392,28 +392,48 @@ end subroutine ! CHECK: %{{.*}} = nvvm.vote.sync any %{{.*}}, %{{.*}} -> i1 ! CHECK: %{{.*}} = nvvm.vote.sync ballot %{{.*}}, %{{.*}} -> i32 -! CHECK-DAG: func.func private @__ldca_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldcg_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldcs_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldlu_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldcv_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldca_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldcg_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldcs_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldlu_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldcv_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldca_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldcg_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldcs_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldlu_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldcv_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldca_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldcg_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldcs_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldlu_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldcv_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldca_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) -! CHECK-DAG: func.func private @__ldcg_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) -! CHECK-DAG: func.func private @__ldcs_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) -! CHECK-DAG: func.func private @__ldlu_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) -! CHECK-DAG: func.func private @__ldcv_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) +attributes(global) subroutine test_barrier() + integer(8), shared :: barrier + integer(8) :: token + integer :: count + call barrier_init(barrier, 256) + + token = barrier_arrive(barrier) + + token = barrier_arrive(barrier, count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_barrier() + +! CHECK: %[[SHARED:.*]] = cuf.shared_memory i64 {bindc_name = "barrier", uniq_name = "_QFtest_barrierEbarrier"} -> !fir.ref<i64> +! CHECK: %[[DECL_SHARED:.*]]:2 = hlfir.declare %[[SHARED]] {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_barrierEbarrier"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[COUNT:.*]] = arith.constant 256 : i32 +! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3> +! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32 +! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>} + +! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3> +! CHECK: %{{.*}} = nvvm.mbarrier.arrive.shared %[[SHARED_PTR]] : !llvm.ptr<3> -> i64 + +! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3> +! CHECK: nvvm.mbarrier.arrive.expect_tx %[[SHARED_PTR]], %{{.*}} : !llvm.ptr<3>, i32 + + +attributes(global) subroutine test_fence() + call fence_proxy_async() +end subroutine + +! CHECK-LABEL: func.func @_QPtest_fence() +! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>} + +attributes(global) subroutine test_tma() + call tma_bulk_commit_group() + call tma_bulk_wait_group() +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma() +! CHECK: nvvm.cp.async.bulk.commit.group +! CHECK: nvvm.cp.async.bulk.wait_group 0 diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index 3da339c..813a2a4 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -43,6 +43,9 @@ constexpr static uint32_t MAX_TRIES = 1024; // The number of previously allocated slabs we will keep in memory. constexpr static uint32_t CACHED_SLABS = 8; +// Configuration for whether or not we will return unused slabs to memory. +constexpr static bool RECLAIM = true; + static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two"); namespace impl { @@ -399,7 +402,7 @@ private: // and obtain exclusive rights to deconstruct it. If the CAS failed either // another thread resurrected the counter and we quit, or a parallel read // helped us invalidating it. For the latter, claim that flag and return. - if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) { + if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n && RECLAIM) { uint32_t expected = 0; if (counter.compare_exchange_strong(expected, INVALID, cpp::MemoryOrder::RELAXED, @@ -417,8 +420,9 @@ private: // thread. uint64_t read() { auto val = counter.load(cpp::MemoryOrder::RELAXED); - if (val == 0 && counter.compare_exchange_strong( - val, INVALID | HELPED, cpp::MemoryOrder::RELAXED)) + if (val == 0 && RECLAIM && + counter.compare_exchange_strong(val, INVALID | HELPED, + cpp::MemoryOrder::RELAXED)) return 0; return (val & INVALID) ? 0 : val; } @@ -463,7 +467,7 @@ private: return nullptr; cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); - return ptr.load(cpp::MemoryOrder::RELAXED); + return RECLAIM ? ptr.load(cpp::MemoryOrder::RELAXED) : expected; } // Finalize the associated memory and signal that it is ready to use by diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 1bcd205..1077d80 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#if !defined(__wasm__) + #include "assembly.h" #define FROM_0_TO_15 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 @@ -20,7 +22,7 @@ .text #endif -#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) +#if !defined(__USING_SJLJ_EXCEPTIONS__) #if defined(__i386__) DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto) @@ -1253,7 +1255,8 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind19Registers_loongarch6jumptoEv) #endif -#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) */ +#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ NO_EXEC_STACK_DIRECTIVE +#endif /* !defined(__wasm__) */ diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index 5139a55..8bf99eb 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#if !defined(__wasm__) + #include "assembly.h" #define FROM_0_TO_15 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 @@ -20,7 +22,7 @@ .text #endif -#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) +#if !defined(__USING_SJLJ_EXCEPTIONS__) #if defined(__i386__) @@ -1232,6 +1234,8 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) WEAK_ALIAS(__unw_getcontext, unw_getcontext) #endif -#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) */ +#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ NO_EXEC_STACK_DIRECTIVE + +#endif /* !defined(__wasm__) */ diff --git a/lldb/test/API/commands/expression/diagnostics/TestExprDiagnostics.py b/lldb/test/API/commands/expression/diagnostics/TestExprDiagnostics.py index ec208f2..759b620 100644 --- a/lldb/test/API/commands/expression/diagnostics/TestExprDiagnostics.py +++ b/lldb/test/API/commands/expression/diagnostics/TestExprDiagnostics.py @@ -218,11 +218,9 @@ note: candidate function not viable: requires single argument 'x', but 2 argumen # Detail 1/3: note: requested expression language diag = details.GetItemAtIndex(0) self.assertEqual(str(diag.GetValueForKey("severity")), "note") - self.assertEqual( - str(diag.GetValueForKey("message")), "Ran expression as 'C++11'." - ) - self.assertEqual( - str(diag.GetValueForKey("rendered")), "Ran expression as 'C++11'." + self.assertIn("Ran expression as 'C++", str(diag.GetValueForKey("message"))) + self.assertIn( + "Ran expression as 'C++", str(diag.GetValueForKey("rendered")) ) self.assertEqual(str(diag.GetValueForKey("source_location")), "") self.assertEqual(str(diag.GetValueForKey("file")), "") diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py index ebba4d1..10cbd26 100644 --- a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py +++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py @@ -28,6 +28,9 @@ class TestCortexMExceptionUnwind(TestBase): core = self.getBuildArtifact("core") self.yaml2macho_core("armv7m-nofpu-exception.yaml", core, exe_uuid) + if self.TraceOn(): + self.runCmd("log enable lldb unwind") + process = target.LoadCore(core) self.assertTrue(process.IsValid()) diff --git a/lldb/test/Shell/Expr/TestExprLanguageNote.test b/lldb/test/Shell/Expr/TestExprLanguageNote.test index f3dc592..b4387bf 100644 --- a/lldb/test/Shell/Expr/TestExprLanguageNote.test +++ b/lldb/test/Shell/Expr/TestExprLanguageNote.test @@ -26,7 +26,7 @@ run expr blah # CHECK-TARGET: (lldb) expr -# CHECK-TARGET: note: Ran expression as 'C++14'. +# CHECK-TARGET: note: Ran expression as 'C++{{.*}}' expr -l objc -- blah diff --git a/llvm/docs/QualGroup.rst b/llvm/docs/QualGroup.rst index b45f569..5c05e4e 100644 --- a/llvm/docs/QualGroup.rst +++ b/llvm/docs/QualGroup.rst @@ -75,6 +75,16 @@ They meet the criteria for inclusion below. Knowing their handles help us keep t - capitan-davide - capitan_davide - capitan-davide + * - Jorge Pinto Sousa + - Critical Techworks + - sousajo-cc + - sousajo-cc + - sousajo-cc + * - José Rui Simões + - Critical Software + - jr-simoes + - jr_simoes + - iznogoud-zz * - Oscar Slotosch - Validas - slotosch @@ -100,6 +110,11 @@ They meet the criteria for inclusion below. Knowing their handles help us keep t - YoungJunLee - YoungJunLee - IamYJLee + * - Zaky Hermawan + - No Affiliation + - ZakyHermawan + - quarkz99 + - zakyHermawan Organizations are limited to three representatives within the group to maintain diversity. diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h index 017585a4..310539f 100644 --- a/llvm/include/llvm/ADT/ImmutableSet.h +++ b/llvm/include/llvm/ADT/ImmutableSet.h @@ -21,7 +21,9 @@ #include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Signals.h" #include <cassert> #include <cstdint> #include <functional> diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 88691b9..73f2c55 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -847,8 +847,7 @@ public: /// This is usually true on most targets. But some targets, like Thumb1, /// have immediate shift instructions, but no immediate "and" instruction; /// this makes the fold unprofitable. - virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, - CombineLevel Level) const { + virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N) const { return true; } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h index ed6ea96..2dd5abe 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Compiler.h" #include <atomic> @@ -71,6 +72,7 @@ private: /// from nullptr to enable comparison with these values. class SymbolStringPtrBase { friend class SymbolStringPool; + friend class SymbolStringPoolEntryUnsafe; friend struct DenseMapInfo<SymbolStringPtr>; friend struct DenseMapInfo<NonOwningSymbolStringPtr>; @@ -204,7 +206,7 @@ public: SymbolStringPoolEntryUnsafe(PoolEntry *E) : E(E) {} /// Create an unsafe pool entry ref without changing the ref-count. - static SymbolStringPoolEntryUnsafe from(const SymbolStringPtr &S) { + static SymbolStringPoolEntryUnsafe from(const SymbolStringPtrBase &S) { return S.S; } @@ -318,6 +320,10 @@ SymbolStringPool::getRefCount(const SymbolStringPtrBase &S) const { LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtrBase &Sym); +inline hash_code hash_value(const orc::SymbolStringPtrBase &S) { + return hash_value(orc::SymbolStringPoolEntryUnsafe::from(S).rawPtr()); +} + } // end namespace orc template <> diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h index face5da..d47f6c0 100644 --- a/llvm/include/llvm/IR/ConstantFPRange.h +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -216,6 +216,12 @@ public: /// Get the range without infinities. It is useful when we apply ninf flag to /// range of operands/results. LLVM_ABI ConstantFPRange getWithoutInf() const; + + /// Return a new range in the specified format with the specified rounding + /// mode. + LLVM_ABI ConstantFPRange + cast(const fltSemantics &DstSem, + APFloat::roundingMode RM = APFloat::rmNearestTiesToEven) const; }; inline raw_ostream &operator<<(raw_ostream &OS, const ConstantFPRange &CR) { diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index f87344e..70916d8 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -739,7 +739,7 @@ class LLVM_ABI raw_null_ostream : public raw_pwrite_stream { uint64_t current_pos() const override; public: - explicit raw_null_ostream() = default; + explicit raw_null_ostream() : raw_pwrite_stream(/*Unbuffered=*/true) {} ~raw_null_ostream() override; }; diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index ed2e01c..dc8cd86d 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -277,6 +277,7 @@ public: MuslF32, MuslSF, MuslX32, + MuslWALI, LLVM, MSVC, @@ -798,6 +799,12 @@ public: return getObjectFormat() == Triple::DXContainer; } + /// Tests whether the target uses WALI Wasm + bool isWALI() const { + return getArch() == Triple::wasm32 && isOSLinux() && + getEnvironment() == Triple::MuslWALI; + } + /// Tests whether the target is the PS4 platform. bool isPS4() const { return getArch() == Triple::x86_64 && @@ -840,6 +847,7 @@ public: getEnvironment() == Triple::MuslF32 || getEnvironment() == Triple::MuslSF || getEnvironment() == Triple::MuslX32 || + getEnvironment() == Triple::MuslWALI || getEnvironment() == Triple::OpenHOS || isOSLiteOS(); } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 0ca55a26..54e916e 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -118,6 +118,10 @@ static cl::opt<bool> #endif cl::desc("")); +static cl::opt<bool> PreserveBitcodeUseListOrder( + "preserve-bc-uselistorder", cl::Hidden, cl::init(true), + cl::desc("Preserve use-list order when writing LLVM bitcode.")); + namespace llvm { extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold; } @@ -217,7 +221,10 @@ public: bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index) : BitcodeWriterBase(Stream, StrtabBuilder), M(M), - VE(M, ShouldPreserveUseListOrder), Index(Index) { + VE(M, PreserveBitcodeUseListOrder.getNumOccurrences() + ? PreserveBitcodeUseListOrder + : ShouldPreserveUseListOrder), + Index(Index) { // Assign ValueIds to any callee values in the index that came from // indirect call profiles and were recorded as a GUID not a Value* // (which would have been assigned an ID by the ValueEnumerator). diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index aa078f3..e40fb76 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -704,9 +704,17 @@ void DwarfUnit::addType(DIE &Entity, const DIType *Ty, addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty))); } +// FIXME: change callsites to use the new DW_LNAME_ language codes. llvm::dwarf::SourceLanguage DwarfUnit::getSourceLanguage() const { - return static_cast<llvm::dwarf::SourceLanguage>( - getLanguage().getUnversionedName()); + const auto &Lang = getLanguage(); + + if (!Lang.hasVersionedName()) + return static_cast<llvm::dwarf::SourceLanguage>(Lang.getName()); + + return llvm::dwarf::toDW_LANG( + static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()), + Lang.getVersion()) + .value_or(llvm::dwarf::DW_LANG_hi_user); } std::string DwarfUnit::getParentContextString(const DIScope *Context) const { diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index d5153b7..cdcb29d9 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1209,7 +1209,7 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB, MIE = MBB.instr_begin(); MII != MIE; --MII) { const MachineInstr &MI = *std::prev(MII); - if (MI.isDebugInstr() || MI.isPseudoProbe()) + if (MI.isDebugOrPseudoInstr()) continue; RegisterOperands RegOpers; RegOpers.collect(MI, *TRI, *MRI, false, false); diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp index 5f37890..7d4674b 100644 --- a/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/llvm/lib/CodeGen/RegisterPressure.cpp @@ -858,7 +858,7 @@ void RegPressureTracker::recedeSkipDebugValues() { void RegPressureTracker::recede(SmallVectorImpl<VRegMaskOrUnit> *LiveUses) { recedeSkipDebugValues(); - if (CurrPos->isDebugInstr() || CurrPos->isPseudoProbe()) { + if (CurrPos->isDebugOrPseudoInstr()) { // It's possible to only have debug_value and pseudo probe instructions and // hit the start of the block. assert(CurrPos == MBB->begin()); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b47274b..b23b190 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10628,7 +10628,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // folding this will increase the total number of instructions. if (N0.getOpcode() == ISD::SRL && (N0.getOperand(1) == N1 || N0.hasOneUse()) && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + TLI.shouldFoldConstantShiftPairToMask(N)) { if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { @@ -11207,7 +11207,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or // (and (srl x, (sub c2, c1), MASK) if ((N0.getOperand(1) == N1 || N0->hasOneUse()) && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + TLI.shouldFoldConstantShiftPairToMask(N)) { auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { const APInt &LHSC = LHS->getAPIntValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b5f8a61..437d0f4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -3313,7 +3313,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break; // Unary FP Operations - case ISD::FABS: case ISD::FACOS: case ISD::FASIN: case ISD::FATAN: @@ -3329,7 +3328,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FLOG2: case ISD::FLOG10: case ISD::FNEARBYINT: - case ISD::FNEG: case ISD::FREEZE: case ISD::FRINT: case ISD::FROUND: @@ -3341,6 +3339,12 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FTAN: case ISD::FTANH: case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; + case ISD::FABS: + R = SoftPromoteHalfRes_FABS(N); + break; + case ISD::FNEG: + R = SoftPromoteHalfRes_FNEG(N); + break; case ISD::AssertNoFPClass: R = SoftPromoteHalfRes_AssertNoFPClass(N); break; @@ -3670,6 +3674,24 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FABS(SDNode *N) { + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Clear the sign bit. + return DAG.getNode(ISD::AND, dl, MVT::i16, Op, + DAG.getConstant(0x7fff, dl, MVT::i16)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FNEG(SDNode *N) { + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Invert the sign bit. + return DAG.getNode(ISD::XOR, dl, MVT::i16, Op, + DAG.getConstant(0x8000, dl, MVT::i16)); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_AssertNoFPClass(SDNode *N) { return GetSoftPromotedHalf(N->getOperand(0)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d580ce0..603dc34 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -832,6 +832,8 @@ private: SDValue SoftPromoteHalfRes_SELECT(SDNode *N); SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N); SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N); + SDValue SoftPromoteHalfRes_FABS(SDNode *N); + SDValue SoftPromoteHalfRes_FNEG(SDNode *N); SDValue SoftPromoteHalfRes_AssertNoFPClass(SDNode *N); SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N); SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 0bc877d..2430d98 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -102,6 +102,10 @@ static cl::opt<bool> PrintProfData( "print-prof-data", cl::Hidden, cl::desc("Pretty print perf data (branch weights, etc) when dumping")); +static cl::opt<bool> PreserveAssemblyUseListOrder( + "preserve-ll-uselistorder", cl::Hidden, cl::init(false), + cl::desc("Preserve use-list order when writing LLVM assembly.")); + // Make virtual table appear in this compilation unit. AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default; @@ -2939,7 +2943,10 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, bool IsForDebug, bool ShouldPreserveUseListOrder) : Out(o), TheModule(M), Machine(Mac), TypePrinter(M), AnnotationWriter(AAW), IsForDebug(IsForDebug), - ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { + ShouldPreserveUseListOrder( + PreserveAssemblyUseListOrder.getNumOccurrences() + ? PreserveAssemblyUseListOrder + : ShouldPreserveUseListOrder) { if (!TheModule) return; for (const GlobalObject &GO : TheModule->global_objects()) @@ -2950,7 +2957,8 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const ModuleSummaryIndex *Index, bool IsForDebug) : Out(o), TheIndex(Index), Machine(Mac), TypePrinter(/*Module=*/nullptr), - IsForDebug(IsForDebug), ShouldPreserveUseListOrder(false) {} + IsForDebug(IsForDebug), + ShouldPreserveUseListOrder(PreserveAssemblyUseListOrder) {} void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { if (!Operand) { diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index 2477e22..070e833 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -326,6 +326,8 @@ std::optional<bool> ConstantFPRange::getSignBit() const { } bool ConstantFPRange::operator==(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); if (MayBeSNaN != CR.MayBeSNaN || MayBeQNaN != CR.MayBeQNaN) return false; return Lower.bitwiseIsEqual(CR.Lower) && Upper.bitwiseIsEqual(CR.Upper); @@ -425,3 +427,20 @@ ConstantFPRange ConstantFPRange::getWithoutInf() const { return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN, MayBeSNaN); } + +ConstantFPRange ConstantFPRange::cast(const fltSemantics &DstSem, + APFloat::roundingMode RM) const { + bool LosesInfo; + APFloat NewLower = Lower; + APFloat NewUpper = Upper; + // For conservative, return full range if conversion is invalid. + if (NewLower.convert(DstSem, RM, &LosesInfo) == APFloat::opInvalidOp || + NewLower.isNaN()) + return getFull(DstSem); + if (NewUpper.convert(DstSem, RM, &LosesInfo) == APFloat::opInvalidOp || + NewUpper.isNaN()) + return getFull(DstSem); + return ConstantFPRange(std::move(NewLower), std::move(NewUpper), + /*MayBeQNaNVal=*/MayBeQNaN || MayBeSNaN, + /*MayBeSNaNVal=*/false); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7294f3e..fbce3b0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18640,7 +18640,7 @@ bool AArch64TargetLowering::isDesirableToCommuteXorWithShift( } bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index e472e7d..00956fd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -300,8 +300,7 @@ public: bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; /// Return true if it is profitable to fold a pair of shifts into a mask. - bool shouldFoldConstantShiftPairToMask(const SDNode *N, - CombineLevel Level) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override; /// Return true if it is profitable to fold a pair of shifts into a mask. bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 83c7def..67ea2dd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13816,7 +13816,7 @@ bool ARMTargetLowering::isDesirableToCommuteXorWithShift( } bool ARMTargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && @@ -13826,7 +13826,8 @@ bool ARMTargetLowering::shouldFoldConstantShiftPairToMask( if (!Subtarget->isThumb1Only()) return true; - if (Level == BeforeLegalizeTypes) + EVT VT = N->getValueType(0); + if (VT.getScalarSizeInBits() > 32) return true; return false; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 26ff54c..70aa001 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -772,8 +772,7 @@ class VectorType; bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; - bool shouldFoldConstantShiftPairToMask(const SDNode *N, - CombineLevel Level) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override; /// Return true if it is profitable to fold a pair of shifts into a mask. bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override { diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index b05de49..7f1ff45 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1306,7 +1306,7 @@ bool MipsTargetLowering::hasBitTest(SDValue X, SDValue Y) const { } bool MipsTargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index c65c76c..25a0bf9 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -290,8 +290,7 @@ class TargetRegisterClass; bool isCheapToSpeculateCttz(Type *Ty) const override; bool isCheapToSpeculateCtlz(Type *Ty) const override; bool hasBitTest(SDValue X, SDValue Y) const override; - bool shouldFoldConstantShiftPairToMask(const SDNode *N, - CombineLevel Level) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override; /// Return the register type for a given MVT, ensuring vectors are treated /// as a series of gpr sized integers. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f692180..944a1e2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -585,6 +585,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + // Custom handling for PowerPC ucmp instruction + setOperationAction(ISD::UCMP, MVT::i32, Custom); + setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no @@ -12618,6 +12622,33 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues({Sub, OverflowTrunc}, dl); } +// Lower unsigned 3-way compare producing -1/0/1. +SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue A = DAG.getFreeze(Op.getOperand(0)); + SDValue B = DAG.getFreeze(Op.getOperand(1)); + EVT OpVT = A.getValueType(); // operand type + EVT ResVT = Op.getValueType(); // result type + + // First compute diff = A - B (will become subf). + SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B); + + // Generate B - A using SUBC to capture carry. + SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); + SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A); + SDValue CA0 = SubC.getValue(1); + + // t2 = A - B + CA0 using SUBE. + SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0); + SDValue CA1 = SubE1.getValue(1); + + // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1). + SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1); + + // Extract the first result and truncate to result type if needed + return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12722,6 +12753,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UADDO_CARRY: case ISD::USUBO_CARRY: return LowerADDSUBO_CARRY(Op, DAG); + case ISD::UCMP: + return LowerUCMP(Op, DAG); } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 6694305..59f3387 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1318,6 +1318,7 @@ namespace llvm { SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToLibCall(const char *LibCallName, SDValue Op, SelectionDAG &DAG) const; SDValue lowerLibCallBasedOnType(const char *LibCallFloatName, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 7a14929..66717b9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1367,9 +1367,8 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, (${rs1})", class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> : Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>; -class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt1 = XLenVT, - ValueType vt2 = XLenVT> - : Pat<(vt1 (OpNode (vt1 GPR:$rs1), (vt2 GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>; +class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> + : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>; class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType, ValueType vt = XLenVT> @@ -1653,17 +1652,18 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc, node:$falsev), [{}], IntCCtoRISCVCC>; -multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> { +multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt, + ValueType cmpvt = XLenVT> { let usesCustomInserter = 1 in def _Using_CC_GPR : Pseudo<(outs valty:$dst), (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, valty:$truev, valty:$falsev), [(set valty:$dst, - (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond, + (riscv_selectcc_frag:$cc (cmpvt GPR:$lhs), GPR:$rhs, cond, (vt valty:$truev), valty:$falsev))]>; // Explicitly select 0 in the condition to X0. The register coalescer doesn't // always do it. - def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), 0, cond, (vt valty:$truev), + def : Pat<(riscv_selectcc_frag:$cc (cmpvt GPR:$lhs), 0, cond, (vt valty:$truev), valty:$falsev), (!cast<Instruction>(NAME#"_Using_CC_GPR") GPR:$lhs, (XLenVT X0), (IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>; @@ -1972,8 +1972,9 @@ def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs /// Loads -class LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT> - : Pat<(vt (LoadOp (AddrRegImm (XLenVT GPRMem:$rs1), simm12_lo:$imm12))), +class LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT, + ValueType PtrVT = XLenVT> + : Pat<(vt (LoadOp (AddrRegImm (PtrVT GPRMem:$rs1), simm12_lo:$imm12))), (Inst GPRMem:$rs1, simm12_lo:$imm12)>; def : LdPat<sextloadi8, LB>; @@ -1987,8 +1988,8 @@ def : LdPat<zextloadi16, LHU>; /// Stores class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, - ValueType vt> - : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm (XLenVT GPRMem:$rs1), + ValueType vt, ValueType PtrVT = XLenVT> + : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm (PtrVT GPRMem:$rs1), simm12_lo:$imm12)), (Inst StTy:$rs2, GPRMem:$rs1, simm12_lo:$imm12)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index b9510ef..afac37d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -59,9 +59,9 @@ def FPR64IN32X : RegisterOperand<GPRPair> { def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>; def ZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZdinx, IsRV64], - f64, FPR64INX, FPR32INX, FPR64INX, ?>; + f64, FPR64INX, FPR32INX, FPR64INX, ?, i64>; def Zdinx32Ext : ExtInfo<"_IN32X", "ZdinxRV32Only", [HasStdExtZdinx, IsRV32], - f64, FPR64IN32X, FPR32INX, FPR64IN32X, ?>; + f64, FPR64IN32X, FPR32INX, FPR64IN32X, ?, i32>; defvar DExts = [DExt, ZdinxExt, Zdinx32Ext]; defvar DExtsRV64 = [DExt, ZdinxExt]; @@ -261,8 +261,10 @@ let Predicates = [HasStdExtZdinx, IsRV32] in { /// Float conversion operations // f64 -> f32, f32 -> f64 -def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_S_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1, FRM_RNE)>; +def : Pat<(any_fpround FPR64IN32X:$rs1), + (FCVT_S_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>; +def : Pat<(any_fpextend FPR32INX:$rs1), + (FCVT_D_S_IN32X FPR32INX:$rs1, (i32 FRM_RNE))>; } // Predicates = [HasStdExtZdinx, IsRV32] // [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so @@ -321,7 +323,7 @@ def : Pat<(any_fsqrt FPR64INX:$rs1), (FSQRT_D_INX FPR64INX:$rs1, FRM_DYN)>; def : Pat<(fneg FPR64INX:$rs1), (FSGNJN_D_INX $rs1, $rs1)>; def : Pat<(fabs FPR64INX:$rs1), (FSGNJX_D_INX $rs1, $rs1)>; -def : Pat<(riscv_fclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>; +def : Pat<(i64 (riscv_fclass FPR64INX:$rs1)), (FCLASS_D_INX $rs1)>; def : PatFprFpr<fcopysign, FSGNJ_D_INX, FPR64INX, f64>; def : PatFprFpr<riscv_fsgnjx, FSGNJX_D_INX, FPR64INX, f64>; @@ -354,41 +356,46 @@ def : Pat<(fneg (any_fma_nsz FPR64INX:$rs1, FPR64INX:$rs2, FPR64INX:$rs3)), } // Predicates = [HasStdExtZdinx, IsRV64] let Predicates = [HasStdExtZdinx, IsRV32] in { -def : Pat<(any_fsqrt FPR64IN32X:$rs1), (FSQRT_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>; +def : Pat<(any_fsqrt FPR64IN32X:$rs1), + (FSQRT_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>; def : Pat<(fneg FPR64IN32X:$rs1), (FSGNJN_D_IN32X $rs1, $rs1)>; def : Pat<(fabs FPR64IN32X:$rs1), (FSGNJX_D_IN32X $rs1, $rs1)>; -def : Pat<(riscv_fclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>; +def : Pat<(i32 (riscv_fclass FPR64IN32X:$rs1)), (FCLASS_D_IN32X $rs1)>; def : PatFprFpr<fcopysign, FSGNJ_D_IN32X, FPR64IN32X, f64>; def : PatFprFpr<riscv_fsgnjx, FSGNJX_D_IN32X, FPR64IN32X, f64>; def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)), (FSGNJN_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2)>; def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2), - (FSGNJ_D_IN32X $rs1, (FCVT_D_S_IN32X $rs2, FRM_RNE))>; + (FSGNJ_D_IN32X $rs1, (FCVT_D_S_IN32X $rs2, (i32 FRM_RNE)))>; def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2), - (FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, FRM_DYN))>; + (FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, (i32 FRM_DYN)))>; // fmadd: rs1 * rs2 + rs3 def : Pat<(any_fma FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3), - (FMADD_D_IN32X $rs1, $rs2, $rs3, FRM_DYN)>; + (FMADD_D_IN32X $rs1, $rs2, $rs3, (i32 FRM_DYN))>; // fmsub: rs1 * rs2 - rs3 def : Pat<(any_fma FPR64IN32X:$rs1, FPR64IN32X:$rs2, (fneg FPR64IN32X:$rs3)), - (FMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>; + (FMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, + (i32 FRM_DYN))>; // fnmsub: -rs1 * rs2 + rs3 def : Pat<(any_fma (fneg FPR64IN32X:$rs1), FPR64IN32X:$rs2, FPR64IN32X:$rs3), - (FNMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>; + (FNMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, + (i32 FRM_DYN))>; // fnmadd: -rs1 * rs2 - rs3 def : Pat<(any_fma (fneg FPR64IN32X:$rs1), FPR64IN32X:$rs2, (fneg FPR64IN32X:$rs3)), - (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>; + (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, + (i32 FRM_DYN))>; // fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA) def : Pat<(fneg (any_fma_nsz FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3)), - (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>; + (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, + (i32 FRM_DYN))>; } // Predicates = [HasStdExtZdinx, IsRV32] // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches @@ -441,42 +448,42 @@ def : PatSetCC<FPR64, any_fsetccs, SETOLE, FLE_D, f64>; let Predicates = [HasStdExtZdinx, IsRV64] in { // Match signaling FEQ_D -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs2, SETEQ)), +def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs2, SETEQ)), (AND (XLenVT (FLE_D_INX $rs1, $rs2)), (XLenVT (FLE_D_INX $rs2, $rs1)))>; -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs2, SETOEQ)), +def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs2, SETOEQ)), (AND (XLenVT (FLE_D_INX $rs1, $rs2)), (XLenVT (FLE_D_INX $rs2, $rs1)))>; // If both operands are the same, use a single FLE. -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs1, SETEQ)), +def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs1, SETEQ)), (FLE_D_INX $rs1, $rs1)>; -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs1, SETOEQ)), +def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs1, SETOEQ)), (FLE_D_INX $rs1, $rs1)>; -def : PatSetCC<FPR64INX, any_fsetccs, SETLT, FLT_D_INX, f64>; -def : PatSetCC<FPR64INX, any_fsetccs, SETOLT, FLT_D_INX, f64>; -def : PatSetCC<FPR64INX, any_fsetccs, SETLE, FLE_D_INX, f64>; -def : PatSetCC<FPR64INX, any_fsetccs, SETOLE, FLE_D_INX, f64>; +def : PatSetCC<FPR64INX, any_fsetccs, SETLT, FLT_D_INX, f64, i64>; +def : PatSetCC<FPR64INX, any_fsetccs, SETOLT, FLT_D_INX, f64, i64>; +def : PatSetCC<FPR64INX, any_fsetccs, SETLE, FLE_D_INX, f64, i64>; +def : PatSetCC<FPR64INX, any_fsetccs, SETOLE, FLE_D_INX, f64, i64>; } // Predicates = [HasStdExtZdinx, IsRV64] let Predicates = [HasStdExtZdinx, IsRV32] in { // Match signaling FEQ_D -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs2, SETEQ)), +def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs2, SETEQ)), (AND (XLenVT (FLE_D_IN32X $rs1, $rs2)), (XLenVT (FLE_D_IN32X $rs2, $rs1)))>; -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs2, SETOEQ)), +def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs2, SETOEQ)), (AND (XLenVT (FLE_D_IN32X $rs1, $rs2)), (XLenVT (FLE_D_IN32X $rs2, $rs1)))>; // If both operands are the same, use a single FLE. -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs1, SETEQ)), +def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs1, SETEQ)), (FLE_D_IN32X $rs1, $rs1)>; -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs1, SETOEQ)), +def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs1, SETOEQ)), (FLE_D_IN32X $rs1, $rs1)>; -def : PatSetCC<FPR64IN32X, any_fsetccs, SETLT, FLT_D_IN32X, f64>; -def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLT, FLT_D_IN32X, f64>; -def : PatSetCC<FPR64IN32X, any_fsetccs, SETLE, FLE_D_IN32X, f64>; -def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLE, FLE_D_IN32X, f64>; +def : PatSetCC<FPR64IN32X, any_fsetccs, SETLT, FLT_D_IN32X, f64, i32>; +def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLT, FLT_D_IN32X, f64, i32>; +def : PatSetCC<FPR64IN32X, any_fsetccs, SETLE, FLE_D_IN32X, f64, i32>; +def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLE, FLE_D_IN32X, f64, i32>; } // Predicates = [HasStdExtZdinx, IsRV32] let Predicates = [HasStdExtD] in { @@ -511,7 +518,7 @@ def SplitF64Pseudo } // Predicates = [HasStdExtD, NoStdExtZfa, IsRV32] let Predicates = [HasStdExtZdinx, IsRV64] in { -defm Select_FPR64INX : SelectCC_GPR_rrirr<FPR64INX, f64>; +defm Select_FPR64INX : SelectCC_GPR_rrirr<FPR64INX, f64, i64>; def PseudoFROUND_D_INX : PseudoFROUND<FPR64INX, f64>; @@ -523,9 +530,9 @@ def : StPat<store, SD, GPR, f64>; } // Predicates = [HasStdExtZdinx, IsRV64] let Predicates = [HasStdExtZdinx, IsRV32] in { -defm Select_FPR64IN32X : SelectCC_GPR_rrirr<FPR64IN32X, f64>; +defm Select_FPR64IN32X : SelectCC_GPR_rrirr<FPR64IN32X, f64, i32>; -def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>; +def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64, i32>; /// Loads let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in @@ -537,8 +544,8 @@ def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12_l } // Predicates = [HasStdExtZdinx, IsRV32] let Predicates = [HasStdExtZdinx, HasStdExtZilsd, IsRV32] in { -def : LdPat<load, LD_RV32, f64>; -def : StPat<store, SD_RV32, GPRPair, f64>; +def : LdPat<load, LD_RV32, f64, i32>; +def : StPat<store, SD_RV32, GPRPair, f64, i32>; } let Predicates = [HasStdExtD, IsRV32] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index fde030e..6571d99 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -131,7 +131,7 @@ def FPR32INX : RegisterOperand<GPRF32> { // The DAGOperand can be unset if the predicates are not enough to define it. class ExtInfo<string suffix, string space, list<Predicate> predicates, ValueType primaryvt, DAGOperand primaryty, DAGOperand f32ty, - DAGOperand f64ty, DAGOperand f16ty> { + DAGOperand f64ty, DAGOperand f16ty, ValueType intvt = XLenVT> { list<Predicate> Predicates = predicates; string Suffix = suffix; string Space = space; @@ -140,6 +140,7 @@ class ExtInfo<string suffix, string space, list<Predicate> predicates, DAGOperand F32Ty = f32ty; DAGOperand F64Ty = f64ty; ValueType PrimaryVT = primaryvt; + ValueType IntVT = intvt; } def FExt : ExtInfo<"", "", [HasStdExtF], f32, FPR32, FPR32, ?, ?>; @@ -314,9 +315,9 @@ multiclass FPCmp_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr, def Ext.Suffix : FPCmp_rr<funct7, funct3, opcodestr, Ext.PrimaryTy, Commutable>; } -class PseudoFROUND<DAGOperand Ty, ValueType vt> +class PseudoFROUND<DAGOperand Ty, ValueType vt, ValueType intvt = XLenVT> : Pseudo<(outs Ty:$rd), (ins Ty:$rs1, Ty:$rs2, ixlenimm:$rm), - [(set Ty:$rd, (vt (riscv_fround Ty:$rs1, Ty:$rs2, timm:$rm)))]> { + [(set Ty:$rd, (vt (riscv_fround Ty:$rs1, Ty:$rs2, (intvt timm:$rm))))]> { let hasSideEffects = 0; let mayLoad = 0; let mayStore = 0; @@ -529,13 +530,14 @@ def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; /// Generic pattern classes class PatSetCC<DAGOperand Ty, SDPatternOperator OpNode, CondCode Cond, - RVInstCommon Inst, ValueType vt> - : Pat<(XLenVT (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>; + RVInstCommon Inst, ValueType vt, ValueType intvt = XLenVT> + : Pat<(intvt (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>; multiclass PatSetCC_m<SDPatternOperator OpNode, CondCode Cond, RVInstCommon Inst, ExtInfo Ext> { let Predicates = Ext.Predicates in def Ext.Suffix : PatSetCC<Ext.PrimaryTy, OpNode, Cond, - !cast<RVInstCommon>(Inst#Ext.Suffix), Ext.PrimaryVT>; + !cast<RVInstCommon>(Inst#Ext.Suffix), + Ext.PrimaryVT, Ext.IntVT>; } class PatFprFpr<SDPatternOperator OpNode, RVInstR Inst, @@ -549,14 +551,15 @@ multiclass PatFprFpr_m<SDPatternOperator OpNode, RVInstR Inst, } class PatFprFprDynFrm<SDPatternOperator OpNode, RVInstRFrm Inst, - DAGOperand RegTy, ValueType vt> - : Pat<(OpNode (vt RegTy:$rs1), (vt RegTy:$rs2)), (Inst $rs1, $rs2, FRM_DYN)>; + DAGOperand RegTy, ValueType vt, ValueType intvt> + : Pat<(OpNode (vt RegTy:$rs1), (vt RegTy:$rs2)), + (Inst $rs1, $rs2,(intvt FRM_DYN))>; multiclass PatFprFprDynFrm_m<SDPatternOperator OpNode, RVInstRFrm Inst, ExtInfo Ext> { let Predicates = Ext.Predicates in def Ext.Suffix : PatFprFprDynFrm<OpNode, !cast<RVInstRFrm>(Inst#Ext.Suffix), - Ext.PrimaryTy, Ext.PrimaryVT>; + Ext.PrimaryTy, Ext.PrimaryVT, Ext.IntVT>; } /// Float conversion operations diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index d8f5d3e..aa8f1a1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -669,19 +669,19 @@ let Predicates = [HasVendorXCValu, IsRV32] in { // Patterns for load & store operations //===----------------------------------------------------------------------===// class CVLdrrPat<PatFrag LoadOp, RVInst Inst> - : Pat<(XLenVT (LoadOp CVrr:$regreg)), + : Pat<(i32 (LoadOp CVrr:$regreg)), (Inst CVrr:$regreg)>; class CVStriPat<PatFrag StoreOp, RVInst Inst> - : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, simm12_lo:$imm12), + : Pat<(StoreOp (i32 GPR:$rs2), GPR:$rs1, simm12_lo:$imm12), (Inst GPR:$rs2, GPR:$rs1, simm12_lo:$imm12)>; class CVStrriPat<PatFrag StoreOp, RVInst Inst> - : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, GPR:$rs3), + : Pat<(StoreOp (i32 GPR:$rs2), GPR:$rs1, GPR:$rs3), (Inst GPR:$rs2, GPR:$rs1, GPR:$rs3)>; class CVStrrPat<PatFrag StoreOp, RVInst Inst> - : Pat<(StoreOp (XLenVT GPR:$rs2), CVrr:$regreg), + : Pat<(StoreOp (i32 GPR:$rs2), CVrr:$regreg), (Inst GPR:$rs2, CVrr:$regreg)>; let Predicates = [HasVendorXCVmem, IsRV32], AddedComplexity = 1 in { @@ -725,17 +725,17 @@ let Predicates = [HasVendorXCVbitmanip, IsRV32] in { (CV_INSERT GPR:$rd, GPR:$rs1, (CV_HI5 cv_uimm10:$imm), (CV_LO5 cv_uimm10:$imm))>; - def : PatGpr<cttz, CV_FF1>; - def : PatGpr<ctlz, CV_FL1>; + def : PatGpr<cttz, CV_FF1, i32>; + def : PatGpr<ctlz, CV_FL1, i32>; def : PatGpr<int_riscv_cv_bitmanip_clb, CV_CLB>; - def : PatGpr<ctpop, CV_CNT>; + def : PatGpr<ctpop, CV_CNT, i32>; - def : PatGprGpr<rotr, CV_ROR>; + def : PatGprGpr<rotr, CV_ROR, i32>; def : Pat<(int_riscv_cv_bitmanip_bitrev GPR:$rs1, cv_tuimm5:$pts, cv_tuimm2:$radix), (CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>; - def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>; + def : Pat<(bitreverse (i32 GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>; } class PatCoreVAluGpr<string intr, string asm> : @@ -760,18 +760,18 @@ multiclass PatCoreVAluGprGprImm<Intrinsic intr> { } let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in { - def : PatGpr<abs, CV_ABS>; - def : PatGprGpr<setle, CV_SLE>; - def : PatGprGpr<setule, CV_SLEU>; - def : PatGprGpr<smin, CV_MIN>; - def : PatGprGpr<umin, CV_MINU>; - def : PatGprGpr<smax, CV_MAX>; - def : PatGprGpr<umax, CV_MAXU>; - - def : Pat<(sext_inreg (XLenVT GPR:$rs1), i16), (CV_EXTHS GPR:$rs1)>; - def : Pat<(sext_inreg (XLenVT GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>; - def : Pat<(and (XLenVT GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>; - def : Pat<(and (XLenVT GPR:$rs1), 0xff), (CV_EXTBZ GPR:$rs1)>; + def : PatGpr<abs, CV_ABS, i32>; + def : PatGprGpr<setle, CV_SLE, i32>; + def : PatGprGpr<setule, CV_SLEU, i32>; + def : PatGprGpr<smin, CV_MIN, i32>; + def : PatGprGpr<umin, CV_MINU, i32>; + def : PatGprGpr<smax, CV_MAX, i32>; + def : PatGprGpr<umax, CV_MAXU, i32>; + + def : Pat<(sext_inreg (i32 GPR:$rs1), i16), (CV_EXTHS GPR:$rs1)>; + def : Pat<(sext_inreg (i32 GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>; + def : Pat<(and (i32 GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>; + def : Pat<(and (i32 GPR:$rs1), 0xff), (CV_EXTBZ GPR:$rs1)>; defm CLIP : PatCoreVAluGprImm<int_riscv_cv_alu_clip>; defm CLIPU : PatCoreVAluGprImm<int_riscv_cv_alu_clipu>; @@ -790,9 +790,9 @@ let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in { //===----------------------------------------------------------------------===// let Predicates = [HasVendorXCVbi, IsRV32], AddedComplexity = 2 in { - def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETEQ, bb:$imm12), + def : Pat<(riscv_brcc (i32 GPR:$rs1), simm5:$imm5, SETEQ, bb:$imm12), (CV_BEQIMM GPR:$rs1, simm5:$imm5, bare_simm13_lsb0_bb:$imm12)>; - def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETNE, bb:$imm12), + def : Pat<(riscv_brcc (i32 GPR:$rs1), simm5:$imm5, SETNE, bb:$imm12), (CV_BNEIMM GPR:$rs1, simm5:$imm5, bare_simm13_lsb0_bb:$imm12)>; defm CC_SImm5_CV : SelectCC_GPR_riirr<GPR, simm5>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 5e1d07a..4537bfe 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1648,10 +1648,10 @@ def : Pat<(qc_setwmi (i32 GPR:$rs3), GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb0 } // Predicates = [HasVendorXqcilsm, IsRV32] let Predicates = [HasVendorXqcili, IsRV32] in { -def: Pat<(qc_e_li tglobaladdr:$A), (QC_E_LI bare_simm32:$A)>; -def: Pat<(qc_e_li tblockaddress:$A), (QC_E_LI bare_simm32:$A)>; -def: Pat<(qc_e_li tjumptable:$A), (QC_E_LI bare_simm32:$A)>; -def: Pat<(qc_e_li tconstpool:$A), (QC_E_LI bare_simm32:$A)>; +def: Pat<(i32 (qc_e_li tglobaladdr:$A)), (QC_E_LI bare_simm32:$A)>; +def: Pat<(i32 (qc_e_li tblockaddress:$A)), (QC_E_LI bare_simm32:$A)>; +def: Pat<(i32 (qc_e_li tjumptable:$A)), (QC_E_LI bare_simm32:$A)>; +def: Pat<(i32 (qc_e_li tconstpool:$A)), (QC_E_LI bare_simm32:$A)>; } // Predicates = [HasVendorXqcili, IsRV32] //===----------------------------------------------------------------------===/i diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 014da99..c31713e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -69,16 +69,16 @@ def ZhinxminExt : ExtInfo<"_INX", "Zfinx", f16, FPR16INX, FPR32INX, ?, FPR16INX>; def ZhinxZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinx, HasStdExtZdinx, IsRV64], - ?, ?, FPR32INX, FPR64INX, FPR16INX>; + ?, ?, FPR32INX, FPR64INX, FPR16INX, i64>; def ZhinxminZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinxmin, HasStdExtZdinx, IsRV64], - ?, ?, FPR32INX, FPR64INX, FPR16INX>; + ?, ?, FPR32INX, FPR64INX, FPR16INX, i64>; def ZhinxZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32", [HasStdExtZhinx, HasStdExtZdinx, IsRV32], - ?, ?, FPR32INX, FPR64IN32X, FPR16INX>; + ?, ?, FPR32INX, FPR64IN32X, FPR16INX, i32>; def ZhinxminZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32", [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32], - ?, ?, FPR32INX, FPR64IN32X, FPR16INX>; + ?, ?, FPR32INX, FPR64IN32X, FPR16INX, i32>; defvar ZfhExts = [ZfhExt, ZhinxExt]; defvar ZfhminExts = [ZfhminExt, ZhinxminExt]; @@ -607,13 +607,16 @@ def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs let Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32] in { /// Float conversion operations // f64 -> f16, f16 -> f64 -def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_H_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1, FRM_RNE)>; +def : Pat<(any_fpround FPR64IN32X:$rs1), + (FCVT_H_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>; +def : Pat<(any_fpextend FPR16INX:$rs1), + (FCVT_D_H_IN32X FPR16INX:$rs1, (i32 FRM_RNE))>; /// Float arithmetic operations def : Pat<(fcopysign FPR16INX:$rs1, FPR64IN32X:$rs2), - (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, 0b111))>; -def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, FRM_RNE))>; + (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, (i32 FRM_DYN)))>; +def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), + (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, (i32 FRM_RNE)))>; } // Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32] let Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV64] in { diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt index 46afe03..eab7b21 100644 --- a/llvm/lib/Target/SPIRV/CMakeLists.txt +++ b/llvm/lib/Target/SPIRV/CMakeLists.txt @@ -36,6 +36,7 @@ add_llvm_target(SPIRVCodeGen SPIRVMetadata.cpp SPIRVModuleAnalysis.cpp SPIRVStructurizer.cpp + SPIRVCombinerHelper.cpp SPIRVPreLegalizer.cpp SPIRVPreLegalizerCombiner.cpp SPIRVPostLegalizer.cpp diff --git a/llvm/lib/Target/SPIRV/SPIRVCombine.td b/llvm/lib/Target/SPIRV/SPIRVCombine.td index 6f726e0..fde56c4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCombine.td +++ b/llvm/lib/Target/SPIRV/SPIRVCombine.td @@ -11,8 +11,8 @@ include "llvm/Target/GlobalISel/Combine.td" def vector_length_sub_to_distance_lowering : GICombineRule < (defs root:$root), (match (wip_match_opcode G_INTRINSIC):$root, - [{ return matchLengthToDistance(*${root}, MRI); }]), - (apply [{ applySPIRVDistance(*${root}, MRI, B); }]) + [{ return Helper.matchLengthToDistance(*${root}); }]), + (apply [{ Helper.applySPIRVDistance(*${root}); }]) >; def SPIRVPreLegalizerCombiner diff --git a/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp new file mode 100644 index 0000000..267794c --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp @@ -0,0 +1,60 @@ +//===-- SPIRVCombinerHelper.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SPIRVCombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; +using namespace MIPatternMatch; + +SPIRVCombinerHelper::SPIRVCombinerHelper( + GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, + GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI, + const SPIRVSubtarget &STI) + : CombinerHelper(Observer, B, IsPreLegalize, VT, MDT, LI), STI(STI) {} + +/// This match is part of a combine that +/// rewrites length(X - Y) to distance(X, Y) +/// (f32 (g_intrinsic length +/// (g_fsub (vXf32 X) (vXf32 Y)))) +/// -> +/// (f32 (g_intrinsic distance +/// (vXf32 X) (vXf32 Y))) +/// +bool SPIRVCombinerHelper::matchLengthToDistance(MachineInstr &MI) const { + if (MI.getOpcode() != TargetOpcode::G_INTRINSIC || + cast<GIntrinsic>(MI).getIntrinsicID() != Intrinsic::spv_length) + return false; + + // First operand of MI is `G_INTRINSIC` so start at operand 2. + Register SubReg = MI.getOperand(2).getReg(); + MachineInstr *SubInstr = MRI.getVRegDef(SubReg); + if (SubInstr->getOpcode() != TargetOpcode::G_FSUB) + return false; + + return true; +} + +void SPIRVCombinerHelper::applySPIRVDistance(MachineInstr &MI) const { + // Extract the operands for X and Y from the match criteria. + Register SubDestReg = MI.getOperand(2).getReg(); + MachineInstr *SubInstr = MRI.getVRegDef(SubDestReg); + Register SubOperand1 = SubInstr->getOperand(1).getReg(); + Register SubOperand2 = SubInstr->getOperand(2).getReg(); + Register ResultReg = MI.getOperand(0).getReg(); + + Builder.setInstrAndDebugLoc(MI); + Builder.buildIntrinsic(Intrinsic::spv_distance, ResultReg) + .addUse(SubOperand1) + .addUse(SubOperand2); + + MI.eraseFromParent(); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h new file mode 100644 index 0000000..0b39d34 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h @@ -0,0 +1,38 @@ +//===-- SPIRVCombinerHelper.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This contains common combine transformations that may be used in a combine +/// pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVCOMBINERHELPER_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVCOMBINERHELPER_H + +#include "SPIRVSubtarget.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" + +namespace llvm { +class SPIRVCombinerHelper : public CombinerHelper { +protected: + const SPIRVSubtarget &STI; + +public: + using CombinerHelper::CombinerHelper; + SPIRVCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, + bool IsPreLegalize, GISelValueTracking *VT, + MachineDominatorTree *MDT, const LegalizerInfo *LI, + const SPIRVSubtarget &STI); + + bool matchLengthToDistance(MachineInstr &MI) const; + void applySPIRVDistance(MachineInstr &MI) const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVCOMBINERHELPER_H diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 989950f..a466ab2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -316,6 +316,9 @@ private: bool selectImageWriteIntrinsic(MachineInstr &I) const; bool selectResourceGetPointer(Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectResourceNonUniformIndex(Register &ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const; bool selectModf(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; bool selectUpdateCounter(Register &ResVReg, const SPIRVType *ResType, @@ -347,7 +350,7 @@ private: SPIRV::StorageClass::StorageClass SC, uint32_t Set, uint32_t Binding, uint32_t ArraySize, Register IndexReg, - bool IsNonUniform, StringRef Name, + StringRef Name, MachineIRBuilder MIRBuilder) const; SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const; bool extractSubvector(Register &ResVReg, const SPIRVType *ResType, @@ -364,6 +367,7 @@ private: MachineInstr &I) const; bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType, GIntrinsic &HandleDef, MachineInstr &Pos) const; + void decorateUsesAsNonUniform(Register &NonUniformReg) const; }; bool sampledTypeIsSignedInteger(const llvm::Type *HandleType) { @@ -3465,6 +3469,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_discard: { return selectDiscard(ResVReg, ResType, I); } + case Intrinsic::spv_resource_nonuniformindex: { + return selectResourceNonUniformIndex(ResVReg, ResType, I); + } default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); @@ -3504,7 +3511,6 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding( uint32_t Binding = getIConstVal(Intr.getOperand(3).getReg(), MRI); uint32_t ArraySize = getIConstVal(MainHandleDef->getOperand(4).getReg(), MRI); Register IndexReg = MainHandleDef->getOperand(5).getReg(); - const bool IsNonUniform = false; std::string CounterName = getStringValueFromReg(MainHandleDef->getOperand(6).getReg(), *MRI) + ".counter"; @@ -3513,7 +3519,7 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding( MachineIRBuilder MIRBuilder(I); Register CounterVarReg = buildPointerToResource( GR.getPointeeType(ResType), GR.getPointerStorageClass(ResType), Set, - Binding, ArraySize, IndexReg, IsNonUniform, CounterName, MIRBuilder); + Binding, ArraySize, IndexReg, CounterName, MIRBuilder); return BuildCOPY(ResVReg, CounterVarReg, I); } @@ -3713,6 +3719,55 @@ bool SPIRVInstructionSelector::selectResourceGetPointer( .constrainAllUses(TII, TRI, RBI); } +bool SPIRVInstructionSelector::selectResourceNonUniformIndex( + Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const { + Register ObjReg = I.getOperand(2).getReg(); + if (!BuildCOPY(ResVReg, ObjReg, I)) + return false; + + buildOpDecorate(ResVReg, I, TII, SPIRV::Decoration::NonUniformEXT, {}); + // Check for the registers that use the index marked as non-uniform + // and recursively mark them as non-uniform. + // Per the spec, it's necessary that the final argument used for + // load/store/sample/atomic must be decorated, so we need to propagate the + // decoration through access chains and copies. + // https://docs.vulkan.org/samples/latest/samples/extensions/descriptor_indexing/README.html#_when_to_use_non_uniform_indexing_qualifier + decorateUsesAsNonUniform(ResVReg); + return true; +} + +void SPIRVInstructionSelector::decorateUsesAsNonUniform( + Register &NonUniformReg) const { + llvm::SmallVector<Register> WorkList = {NonUniformReg}; + while (WorkList.size() > 0) { + Register CurrentReg = WorkList.back(); + WorkList.pop_back(); + + bool IsDecorated = false; + for (MachineInstr &Use : MRI->use_instructions(CurrentReg)) { + if (Use.getOpcode() == SPIRV::OpDecorate && + Use.getOperand(1).getImm() == SPIRV::Decoration::NonUniformEXT) { + IsDecorated = true; + continue; + } + // Check if the instruction has the result register and add it to the + // worklist. + if (Use.getOperand(0).isReg() && Use.getOperand(0).isDef()) { + Register ResultReg = Use.getOperand(0).getReg(); + if (ResultReg == CurrentReg) + continue; + WorkList.push_back(ResultReg); + } + } + + if (!IsDecorated) { + buildOpDecorate(CurrentReg, *MRI->getVRegDef(CurrentReg), TII, + SPIRV::Decoration::NonUniformEXT, {}); + } + } + return; +} + bool SPIRVInstructionSelector::extractSubvector( Register &ResVReg, const SPIRVType *ResType, Register &ReadReg, MachineInstr &InsertionPoint) const { @@ -3784,7 +3839,7 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic( Register SPIRVInstructionSelector::buildPointerToResource( const SPIRVType *SpirvResType, SPIRV::StorageClass::StorageClass SC, uint32_t Set, uint32_t Binding, uint32_t ArraySize, Register IndexReg, - bool IsNonUniform, StringRef Name, MachineIRBuilder MIRBuilder) const { + StringRef Name, MachineIRBuilder MIRBuilder) const { const Type *ResType = GR.getTypeForSPIRVType(SpirvResType); if (ArraySize == 1) { SPIRVType *PtrType = @@ -3803,14 +3858,7 @@ Register SPIRVInstructionSelector::buildPointerToResource( SPIRVType *ResPointerType = GR.getOrCreateSPIRVPointerType(ResType, MIRBuilder, SC); - Register AcReg = MRI->createVirtualRegister(GR.getRegClass(ResPointerType)); - if (IsNonUniform) { - // It is unclear which value needs to be marked an non-uniform, so both - // the index and the access changed are decorated as non-uniform. - buildOpDecorate(IndexReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {}); - buildOpDecorate(AcReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {}); - } MIRBuilder.buildInstr(SPIRV::OpAccessChain) .addDef(AcReg) @@ -4560,9 +4608,6 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition( uint32_t Binding = foldImm(HandleDef.getOperand(3), MRI); uint32_t ArraySize = foldImm(HandleDef.getOperand(4), MRI); Register IndexReg = HandleDef.getOperand(5).getReg(); - // FIXME: The IsNonUniform flag needs to be set based on resource analysis. - // https://github.com/llvm/llvm-project/issues/155701 - bool IsNonUniform = false; std::string Name = getStringValueFromReg(HandleDef.getOperand(6).getReg(), *MRI); @@ -4576,13 +4621,8 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition( SC = GR.getPointerStorageClass(ResType); } - Register VarReg = - buildPointerToResource(VarType, SC, Set, Binding, ArraySize, IndexReg, - IsNonUniform, Name, MIRBuilder); - - if (IsNonUniform) - buildOpDecorate(HandleReg, HandleDef, TII, SPIRV::Decoration::NonUniformEXT, - {}); + Register VarReg = buildPointerToResource(VarType, SC, Set, Binding, ArraySize, + IndexReg, Name, MIRBuilder); // The handle for the buffer is the pointer to the resource. For an image, the // handle is the image object. So images get an extra load. diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp index 8356751..48f4047 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp @@ -1,4 +1,3 @@ - //===-- SPIRVPreLegalizerCombiner.cpp - combine legalization ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -13,24 +12,17 @@ //===----------------------------------------------------------------------===// #include "SPIRV.h" -#include "SPIRVTargetMachine.h" +#include "SPIRVCombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" -#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" -#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/IntrinsicsSPIRV.h" #define GET_GICOMBINER_DEPS #include "SPIRVGenPreLegalizeGICombiner.inc" @@ -47,72 +39,9 @@ namespace { #include "SPIRVGenPreLegalizeGICombiner.inc" #undef GET_GICOMBINER_TYPES -/// This match is part of a combine that -/// rewrites length(X - Y) to distance(X, Y) -/// (f32 (g_intrinsic length -/// (g_fsub (vXf32 X) (vXf32 Y)))) -/// -> -/// (f32 (g_intrinsic distance -/// (vXf32 X) (vXf32 Y))) -/// -bool matchLengthToDistance(MachineInstr &MI, MachineRegisterInfo &MRI) { - if (MI.getOpcode() != TargetOpcode::G_INTRINSIC || - cast<GIntrinsic>(MI).getIntrinsicID() != Intrinsic::spv_length) - return false; - - // First operand of MI is `G_INTRINSIC` so start at operand 2. - Register SubReg = MI.getOperand(2).getReg(); - MachineInstr *SubInstr = MRI.getVRegDef(SubReg); - if (!SubInstr || SubInstr->getOpcode() != TargetOpcode::G_FSUB) - return false; - - return true; -} -void applySPIRVDistance(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) { - - // Extract the operands for X and Y from the match criteria. - Register SubDestReg = MI.getOperand(2).getReg(); - MachineInstr *SubInstr = MRI.getVRegDef(SubDestReg); - Register SubOperand1 = SubInstr->getOperand(1).getReg(); - Register SubOperand2 = SubInstr->getOperand(2).getReg(); - - // Remove the original `spv_length` instruction. - - Register ResultReg = MI.getOperand(0).getReg(); - DebugLoc DL = MI.getDebugLoc(); - MachineBasicBlock &MBB = *MI.getParent(); - MachineBasicBlock::iterator InsertPt = MI.getIterator(); - - // Build the `spv_distance` intrinsic. - MachineInstrBuilder NewInstr = - BuildMI(MBB, InsertPt, DL, B.getTII().get(TargetOpcode::G_INTRINSIC)); - NewInstr - .addDef(ResultReg) // Result register - .addIntrinsicID(Intrinsic::spv_distance) // Intrinsic ID - .addUse(SubOperand1) // Operand X - .addUse(SubOperand2); // Operand Y - - SPIRVGlobalRegistry *GR = - MI.getMF()->getSubtarget<SPIRVSubtarget>().getSPIRVGlobalRegistry(); - auto RemoveAllUses = [&](Register Reg) { - SmallVector<MachineInstr *, 4> UsesToErase( - llvm::make_pointer_range(MRI.use_instructions(Reg))); - - // calling eraseFromParent to early invalidates the iterator. - for (auto *MIToErase : UsesToErase) { - GR->invalidateMachineInstr(MIToErase); - MIToErase->eraseFromParent(); - } - }; - RemoveAllUses(SubDestReg); // remove all uses of FSUB Result - GR->invalidateMachineInstr(SubInstr); - SubInstr->eraseFromParent(); // remove FSUB instruction -} - class SPIRVPreLegalizerCombinerImpl : public Combiner { protected: - const CombinerHelper Helper; + const SPIRVCombinerHelper Helper; const SPIRVPreLegalizerCombinerImplRuleConfig &RuleConfig; const SPIRVSubtarget &STI; @@ -147,7 +76,7 @@ SPIRVPreLegalizerCombinerImpl::SPIRVPreLegalizerCombinerImpl( const SPIRVSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) : Combiner(MF, CInfo, TPC, &VT, CSEInfo), - Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI), + Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI, STI), RuleConfig(RuleConfig), STI(STI), #define GET_GICOMBINER_CONSTRUCTOR_INITS #include "SPIRVGenPreLegalizeGICombiner.inc" diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1cfcb1f..eea84a2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3633,7 +3633,7 @@ bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const { } bool X86TargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && @@ -3648,7 +3648,7 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask( // the fold for non-splats yet. return N->getOperand(1) == N->getOperand(0).getOperand(1); } - return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level); + return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N); } bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index b55556a..e28b9c1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1244,8 +1244,7 @@ namespace llvm { getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override; - bool shouldFoldConstantShiftPairToMask(const SDNode *N, - CombineLevel Level) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override; bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index ac3626d..f021094 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -375,6 +375,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { case MuslSF: return "muslsf"; case MuslX32: return "muslx32"; + case MuslWALI: + return "muslwali"; case Simulator: return "simulator"; case Pixel: return "pixel"; case Vertex: return "vertex"; @@ -767,6 +769,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("muslf32", Triple::MuslF32) .StartsWith("muslsf", Triple::MuslSF) .StartsWith("muslx32", Triple::MuslX32) + .StartsWith("muslwali", Triple::MuslWALI) .StartsWith("musl", Triple::Musl) .StartsWith("msvc", Triple::MSVC) .StartsWith("itanium", Triple::Itanium) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 8c8fc69..6b67b48 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -544,8 +544,18 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp, Swapped ? OOp : C, "", &SI); - if (isa<FPMathOperator>(&SI)) - cast<Instruction>(NewSel)->setFastMathFlags(FMF); + if (isa<FPMathOperator>(&SI)) { + FastMathFlags NewSelFMF = FMF; + // We cannot propagate ninf from the original select, because OOp may be + // inf and the flag only guarantees that FalseVal (op OOp) is never + // infinity. + // Examples: -inf + +inf = NaN, -inf - -inf = NaN, 0 * inf = NaN + // Specifically, if the original select has both ninf and nnan, we can + // safely propagate the flag. + NewSelFMF.setNoInfs(TVI->hasNoInfs() || + (NewSelFMF.noInfs() && NewSelFMF.noNaNs())); + cast<Instruction>(NewSel)->setFastMathFlags(NewSelFMF); + } NewSel->takeName(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 45d3d49..b9d332b 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2961,6 +2961,7 @@ public: isa<FixedVectorType>(NewAI.getAllocatedType()) ? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType() : Type::getInt8Ty(NewAI.getContext()); + unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy); // Helper to check if a type is // 1. A fixed vector type @@ -2991,10 +2992,17 @@ public: // Do not handle the case if // 1. The store does not meet the conditions in the helper function // 2. The store is volatile + // 3. The total store size is not a multiple of the allocated element + // type size if (!IsTypeValidForTreeStructuredMerge( SI->getValueOperand()->getType()) || SI->isVolatile()) return std::nullopt; + auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType()); + unsigned NumElts = VecTy->getNumElements(); + unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType()); + if (NumElts * EltSize % AllocatedEltTySize != 0) + return std::nullopt; StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(), SI->getValueOperand()); } else { diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 3ae570c..4f1ff7b 100644 --- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -20,9 +20,8 @@ using namespace llvm; -namespace { -void nameInstructions(Function &F) { - for (auto &Arg : F.args()) { +static void nameInstructions(Function &F) { + for (Argument &Arg : F.args()) { if (!Arg.hasName()) Arg.setName("arg"); } @@ -38,8 +37,6 @@ void nameInstructions(Function &F) { } } -} // namespace - PreservedAnalyses InstructionNamerPass::run(Function &F, FunctionAnalysisManager &FAM) { nameInstructions(F); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cfa8d27..2388375 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2245,6 +2245,26 @@ public: Align Alignment, const int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const; + /// Return true if an array of scalar loads can be replaced with a strided + /// load (with run-time stride). + /// \param PointerOps list of pointer arguments of loads. + /// \param ScalarTy type of loads. + /// \param CommonAlignment common alignement of loads as computed by + /// `computeCommonAlignment<LoadInst>`. + /// \param SortedIndicies is a list of indicies computed by this function such + /// that the sequence `PointerOps[SortedIndices[0]], + /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is + /// ordered by the coefficient of the stride. For example, if PointerOps is + /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be + /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be + /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`. + /// \param SPtrInfo If the function return `true`, it also sets all the fields + /// of `SPtrInfo` necessary to generate the strided load later. + bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const; + /// Checks if the given array of loads can be represented as a vectorized, /// scatter or just simple gather. /// \param VL list of loads. @@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, return false; } +bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, + Type *ScalarTy, Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const { + const unsigned Sz = PointerOps.size(); + FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz); + if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) || + !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment)) + return false; + if (const SCEV *Stride = + calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) { + SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); + SPtrInfo.StrideSCEV = Stride; + return true; + } + return false; +} + BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order, SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo, @@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( auto *VecTy = getWidenedType(ScalarTy, Sz); Align CommonAlignment = computeCommonAlignment<LoadInst>(VL); if (!IsSorted) { - if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) { - if (const SCEV *Stride = - calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order); - Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) { - SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); - SPtrInfo.StrideSCEV = Stride; - return LoadsState::StridedVectorize; - } - } + if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order, + SPtrInfo)) + return LoadsState::StridedVectorize; if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) || TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) @@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis { void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) { BasicBlock *Parent = nullptr; // Checks if the instruction has supported opcode. - auto IsSupportedInstruction = [&](Instruction *I) { + auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) { + if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I))) + return false; return I && isSupportedOpcode(I->getOpcode()) && (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I)); }; @@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis { // will be unable to schedule anyway. SmallDenseSet<Value *, 8> Operands; SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates; + bool AnyUndef = false; for (Value *V : VL) { auto *I = dyn_cast<Instruction>(V); - if (!I) + if (!I) { + AnyUndef |= isa<UndefValue>(V); continue; + } if (!DT.isReachableFromEntry(I->getParent())) continue; if (Candidates.empty()) { @@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis { if (P.second.size() < BestOpcodeNum) continue; for (Instruction *I : P.second) { - if (IsSupportedInstruction(I) && !Operands.contains(I)) { + if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) { MainOp = I; BestOpcodeNum = P.second.size(); break; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 2555ebe..1fea068 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1777,6 +1777,9 @@ InstructionCost VPCostContext::getScalarizationOverhead( if (VF.isScalar()) return 0; + assert(!VF.isScalable() && + "Scalarization overhead not supported for scalable vectors"); + InstructionCost ScalarizationCost = 0; // Compute the cost of scalarizing the result if needed. if (!ResultTy->isVoidTy()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 600ff8a..8e916772 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3174,6 +3174,9 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, // transform, avoid computing their cost multiple times for now. Ctx.SkipCostComputation.insert(UI); + if (VF.isScalable() && !isSingleScalar()) + return InstructionCost::getInvalid(); + switch (UI->getOpcode()) { case Instruction::GetElementPtr: // We mark this instruction as zero-cost because the cost of GEPs in @@ -3221,9 +3224,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, return ScalarCallCost; } - if (VF.isScalable()) - return InstructionCost::getInvalid(); - return ScalarCallCost * VF.getFixedValue() + Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF); } @@ -3274,9 +3274,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, } case Instruction::Load: case Instruction::Store: { - if (VF.isScalable() && !isSingleScalar()) - return InstructionCost::getInvalid(); - // TODO: See getMemInstScalarizationCost for how to handle replicating and // predicated cases. const VPRegionBlock *ParentRegion = getParent()->getParent(); diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 4b14dc6..7ee0015f 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -21204,18 +21204,14 @@ define bfloat @v_fabs_bf16(bfloat %a) { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 -; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: v_fabs_bf16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fabs_bf16: @@ -21440,10 +21436,7 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) { ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 -; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 -; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GCN-NEXT: v_or_b32_e32 v0, 0x80000000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: v_fneg_fabs_bf16: @@ -21451,10 +21444,7 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_or_b32_e32 v0, 0x80000000, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fneg_fabs_bf16: @@ -21510,23 +21500,17 @@ define amdgpu_ps i32 @s_fneg_fabs_bf16(bfloat inreg %a) { ; GCN-LABEL: s_fneg_fabs_bf16: ; GCN: ; %bb.0: ; GCN-NEXT: v_mul_f32_e64 v0, 1.0, s0 +; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GCN-NEXT: v_or_b32_e32 v0, 0x8000, v0 ; GCN-NEXT: v_readfirstlane_b32 s0, v0 -; GCN-NEXT: s_and_b32 s0, s0, 0xffff0000 -; GCN-NEXT: s_bitset0_b32 s0, 31 -; GCN-NEXT: s_and_b32 s0, s0, 0xffff0000 -; GCN-NEXT: s_xor_b32 s0, s0, 0x80000000 -; GCN-NEXT: s_lshr_b32 s0, s0, 16 ; GCN-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_fneg_fabs_bf16: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s0 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, 0x8000, v0 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0 -; GFX7-NEXT: s_and_b32 s0, s0, 0xffff0000 -; GFX7-NEXT: s_bitset0_b32 s0, 31 -; GFX7-NEXT: s_and_b32 s0, s0, 0xffff0000 -; GFX7-NEXT: s_xor_b32 s0, s0, 0x80000000 -; GFX7-NEXT: s_lshr_b32 s0, s0, 16 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_fneg_fabs_bf16: diff --git a/llvm/test/CodeGen/AMDGPU/fabs.bf16.ll b/llvm/test/CodeGen/AMDGPU/fabs.bf16.ll index 5d184b1..c46fcde 100644 --- a/llvm/test/CodeGen/AMDGPU/fabs.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/fabs.bf16.ll @@ -218,19 +218,11 @@ define amdgpu_kernel void @s_fabs_v4bf16(ptr addrspace(1) %out, <4 x bfloat> %in ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s4, s3, 0xffff0000 -; CI-NEXT: s_lshl_b32 s3, s3, 16 -; CI-NEXT: s_and_b32 s5, s2, 0xffff0000 -; CI-NEXT: v_mul_f32_e64 v0, 1.0, |s4| -; CI-NEXT: v_mul_f32_e64 v1, 1.0, |s3| -; CI-NEXT: v_mul_f32_e64 v2, 1.0, |s5| -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: s_lshl_b32 s2, s2, 16 -; CI-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; CI-NEXT: v_mul_f32_e64 v2, 1.0, |s2| -; CI-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; CI-NEXT: s_and_b32 s3, s3, 0x7fff7fff +; CI-NEXT: s_and_b32 s2, s2, 0x7fff7fff ; CI-NEXT: v_mov_b32_e32 v3, s1 +; CI-NEXT: v_mov_b32_e32 v0, s2 +; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: v_mov_b32_e32 v2, s0 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm @@ -537,16 +529,15 @@ define amdgpu_kernel void @v_fabs_fold_self_v2bf16(ptr addrspace(1) %out, ptr ad ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v2 -; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; CI-NEXT: v_mul_f32_e64 v4, 1.0, |v3| -; CI-NEXT: v_mul_f32_e64 v5, 1.0, |v2| -; CI-NEXT: v_and_b32_e32 v4, 0xffff0000, v4 -; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v5 -; CI-NEXT: v_mul_f32_e32 v3, v4, v3 -; CI-NEXT: v_mul_f32_e32 v2, v5, v2 -; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16 +; CI-NEXT: v_and_b32_e32 v3, 0x7fff, v2 +; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; CI-NEXT: v_and_b32_e32 v2, 0x7fff0000, v2 +; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; CI-NEXT: v_mul_f32_e32 v2, v2, v5 +; CI-NEXT: v_mul_f32_e32 v3, v3, v4 +; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_alignbit_b32 v2, v2, v3, 16 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -898,16 +889,13 @@ define amdgpu_kernel void @v_extract_fabs_fold_v2bf16(ptr addrspace(1) %in) #0 { ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_dword v0, v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; CI-NEXT: v_mul_f32_e64 v1, 1.0, |v1| -; CI-NEXT: v_mul_f32_e64 v0, 1.0, |v0| -; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; CI-NEXT: v_mul_f32_e32 v1, 4.0, v1 +; CI-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; CI-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0 +; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_add_f32_e32 v0, 2.0, v0 -; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; CI-NEXT: v_mul_f32_e32 v1, 4.0, v1 ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; CI-NEXT: flat_store_short v[0:1], v1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: flat_store_short v[0:1], v0 diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll index 64a9727..76da0aa 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll @@ -107,12 +107,10 @@ define amdgpu_kernel void @fneg_fabs_fmul_bf16(ptr addrspace(1) %out, bfloat %x, ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s3, s2, 0x7fff -; CI-NEXT: s_lshl_b32 s3, s3, 16 -; CI-NEXT: v_mul_f32_e64 v0, -1.0, s3 +; CI-NEXT: s_lshl_b32 s3, s2, 16 ; CI-NEXT: s_and_b32 s2, s2, 0xffff0000 -; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; CI-NEXT: v_mul_f32_e32 v0, s2, v0 +; CI-NEXT: v_mov_b32_e32 v0, s3 +; CI-NEXT: v_mul_f32_e64 v0, s2, -|v0| ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 @@ -204,12 +202,10 @@ define amdgpu_kernel void @fneg_fabs_free_bf16(ptr addrspace(1) %out, i16 %in) { ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s2, s2, 0x7fff -; CI-NEXT: s_lshl_b32 s2, s2, 16 -; CI-NEXT: v_mul_f32_e64 v0, -1.0, s2 -; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; CI-NEXT: s_bitset1_b32 s2, 15 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: flat_store_short v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -279,12 +275,10 @@ define amdgpu_kernel void @fneg_fabs_bf16(ptr addrspace(1) %out, bfloat %in) { ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s2, s2, 0x7fff -; CI-NEXT: s_lshl_b32 s2, s2, 16 -; CI-NEXT: v_mul_f32_e64 v0, -1.0, s2 -; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; CI-NEXT: s_bitset1_b32 s2, 15 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: flat_store_short v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -345,43 +339,22 @@ define amdgpu_kernel void @fneg_fabs_bf16(ptr addrspace(1) %out, bfloat %in) { } define amdgpu_kernel void @v_fneg_fabs_bf16(ptr addrspace(1) %out, ptr addrspace(1) %in) { -; CI-LABEL: v_fneg_fabs_bf16: -; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 -; CI-NEXT: s_add_i32 s12, s12, s17 -; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 -; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_mov_b32_e32 v0, s2 -; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: flat_load_ushort v2, v[0:1] -; CI-NEXT: v_mov_b32_e32 v0, s0 -; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; CI-NEXT: v_mul_f32_e64 v2, 1.0, |v2| -; CI-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; CI-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 -; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; CI-NEXT: flat_store_short v[0:1], v2 -; CI-NEXT: s_endpgm -; -; VI-LABEL: v_fneg_fabs_bf16: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 -; VI-NEXT: s_add_i32 s12, s12, s17 -; VI-NEXT: s_mov_b32 flat_scratch_lo, s13 -; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_load_ushort v2, v[0:1] -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_e32 v2, 0x8000, v2 -; VI-NEXT: flat_store_short v[0:1], v2 -; VI-NEXT: s_endpgm +; CIVI-LABEL: v_fneg_fabs_bf16: +; CIVI: ; %bb.0: +; CIVI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 +; CIVI-NEXT: s_add_i32 s12, s12, s17 +; CIVI-NEXT: s_mov_b32 flat_scratch_lo, s13 +; CIVI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; CIVI-NEXT: s_waitcnt lgkmcnt(0) +; CIVI-NEXT: v_mov_b32_e32 v0, s2 +; CIVI-NEXT: v_mov_b32_e32 v1, s3 +; CIVI-NEXT: flat_load_ushort v2, v[0:1] +; CIVI-NEXT: v_mov_b32_e32 v0, s0 +; CIVI-NEXT: v_mov_b32_e32 v1, s1 +; CIVI-NEXT: s_waitcnt vmcnt(0) +; CIVI-NEXT: v_or_b32_e32 v2, 0x8000, v2 +; CIVI-NEXT: flat_store_short v[0:1], v2 +; CIVI-NEXT: s_endpgm ; ; GFX9-LABEL: v_fneg_fabs_bf16: ; GFX9: ; %bb.0: @@ -431,21 +404,13 @@ define amdgpu_kernel void @s_fneg_fabs_v2bf16_non_bc_src(ptr addrspace(1) %out, ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s3, s2, 0xffff0000 -; CI-NEXT: s_lshl_b32 s2, s2, 16 -; CI-NEXT: v_add_f32_e64 v0, s3, 2.0 -; CI-NEXT: v_add_f32_e64 v1, s2, 1.0 -; CI-NEXT: v_readfirstlane_b32 s2, v0 +; CI-NEXT: s_lshl_b32 s3, s2, 16 ; CI-NEXT: s_and_b32 s2, s2, 0xffff0000 -; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; CI-NEXT: s_bitset0_b32 s2, 31 -; CI-NEXT: v_and_b32_e32 v0, 0x7fffffff, v1 -; CI-NEXT: s_and_b32 s2, s2, 0xffff0000 -; CI-NEXT: s_xor_b32 s2, s2, 0x80000000 -; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; CI-NEXT: s_lshr_b32 s2, s2, 16 -; CI-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 -; CI-NEXT: v_alignbit_b32 v2, s2, v0, 16 +; CI-NEXT: v_add_f32_e64 v1, s2, 2.0 +; CI-NEXT: v_add_f32_e64 v0, s3, 1.0 +; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; CI-NEXT: v_or_b32_e32 v2, 0x80008000, v0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: flat_store_dword v[0:1], v2 @@ -566,15 +531,10 @@ define amdgpu_kernel void @s_fneg_fabs_v2bf16_bc_src(ptr addrspace(1) %out, <2 x ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s3, s2, 0x7fff -; CI-NEXT: s_and_b32 s2, s2, 0x7fff0000 -; CI-NEXT: v_mul_f32_e64 v0, -1.0, s2 -; CI-NEXT: s_lshl_b32 s2, s3, 16 -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_mul_f32_e64 v1, -1.0, s2 -; CI-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; CI-NEXT: s_or_b32 s2, s2, 0x80008000 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -629,27 +589,11 @@ define amdgpu_kernel void @fneg_fabs_v4bf16(ptr addrspace(1) %out, <4 x bfloat> ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_lshl_b32 s4, s2, 16 -; CI-NEXT: s_and_b32 s2, s2, 0xffff0000 -; CI-NEXT: v_mul_f32_e64 v2, 1.0, |s2| -; CI-NEXT: s_and_b32 s2, s3, 0xffff0000 -; CI-NEXT: s_lshl_b32 s5, s3, 16 -; CI-NEXT: v_mul_f32_e64 v3, 1.0, |s2| -; CI-NEXT: v_mul_f32_e64 v0, 1.0, |s4| -; CI-NEXT: v_mul_f32_e64 v1, 1.0, |s5| -; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v3 -; CI-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; CI-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 -; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; CI-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 -; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; CI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 -; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; CI-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 -; CI-NEXT: v_alignbit_b32 v1, v3, v1, 16 -; CI-NEXT: v_alignbit_b32 v0, v2, v0, 16 +; CI-NEXT: s_or_b32 s3, s3, 0x80008000 +; CI-NEXT: s_or_b32 s2, s2, 0x80008000 ; CI-NEXT: v_mov_b32_e32 v3, s1 +; CI-NEXT: v_mov_b32_e32 v0, s2 +; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: v_mov_b32_e32 v2, s0 ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm @@ -860,21 +804,20 @@ define amdgpu_kernel void @s_fneg_multi_use_fabs_v2bf16(ptr addrspace(1) %out0, ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: v_mov_b32_e32 v2, s2 -; CI-NEXT: s_and_b32 s1, s4, 0x7fff -; CI-NEXT: s_and_b32 s2, s4, 0x7fff0000 -; CI-NEXT: v_mul_f32_e64 v4, -1.0, s2 -; CI-NEXT: s_lshl_b32 s1, s1, 16 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: s_and_b32 s0, s4, 0x7fff7fff -; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; CI-NEXT: v_mul_f32_e64 v5, -1.0, s1 -; CI-NEXT: v_alignbit_b32 v4, v4, v5, 16 -; CI-NEXT: v_mov_b32_e32 v5, s0 +; CI-NEXT: v_mov_b32_e32 v2, s2 +; CI-NEXT: s_or_b32 s2, s0, 0x8000 +; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: s_and_b32 s1, s4, 0x7fff0000 +; CI-NEXT: s_and_b32 s2, s2, 0xffff +; CI-NEXT: s_or_b32 s1, s1, s2 +; CI-NEXT: s_bitset1_b32 s1, 31 +; CI-NEXT: v_mov_b32_e32 v4, s0 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: flat_store_dword v[0:1], v5 -; CI-NEXT: flat_store_dword v[2:3], v4 +; CI-NEXT: flat_store_dword v[0:1], v4 +; CI-NEXT: v_mov_b32_e32 v0, s1 +; CI-NEXT: flat_store_dword v[2:3], v0 ; CI-NEXT: s_endpgm ; ; VI-LABEL: s_fneg_multi_use_fabs_v2bf16: @@ -1086,5 +1029,3 @@ declare <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat>) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CIVI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fneg.bf16.ll b/llvm/test/CodeGen/AMDGPU/fneg.bf16.ll index d232693..98044a7 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.bf16.ll @@ -14,11 +14,10 @@ define amdgpu_kernel void @s_fneg_bf16(ptr addrspace(1) %out, bfloat %in) #0 { ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_lshl_b32 s2, s2, 16 -; CI-NEXT: v_mul_f32_e64 v0, -1.0, s2 -; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; CI-NEXT: s_xor_b32 s2, s2, 0x8000 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: flat_store_short v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -93,9 +92,7 @@ define amdgpu_kernel void @v_fneg_bf16(ptr addrspace(1) %out, ptr addrspace(1) % ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_ushort v2, v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; CI-NEXT: v_mul_f32_e32 v2, -1.0, v2 -; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_xor_b32_e32 v2, 0x8000, v2 ; CI-NEXT: flat_store_short v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -170,11 +167,10 @@ define amdgpu_kernel void @s_fneg_free_bf16(ptr addrspace(1) %out, i16 %in) #0 { ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_lshl_b32 s2, s2, 16 -; CI-NEXT: v_mul_f32_e64 v0, -1.0, s2 -; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; CI-NEXT: s_xor_b32 s2, s2, 0x8000 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: flat_store_short v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -248,9 +244,9 @@ define amdgpu_kernel void @v_fneg_fold_bf16(ptr addrspace(1) %out, ptr addrspace ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_xor_b32_e32 v3, 0x8000, v2 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; CI-NEXT: v_mul_f32_e32 v3, -1.0, v2 -; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v3 +; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_mul_f32_e32 v2, v3, v2 ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 ; CI-NEXT: flat_store_short v[0:1], v2 @@ -365,13 +361,13 @@ define amdgpu_kernel void @s_fneg_v2bf16(ptr addrspace(1) %out, <2 x bfloat> %in ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_and_b32 s3, s2, 0xffff0000 -; CI-NEXT: s_lshl_b32 s2, s2, 16 -; CI-NEXT: v_mul_f32_e64 v0, -1.0, s3 -; CI-NEXT: v_mul_f32_e64 v1, -1.0, s2 -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; CI-NEXT: s_xor_b32 s2, s2, 0x8000 +; CI-NEXT: s_and_b32 s2, s2, 0xffff +; CI-NEXT: s_or_b32 s2, s2, s3 +; CI-NEXT: s_add_i32 s2, s2, 0x80000000 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -426,16 +422,16 @@ define amdgpu_kernel void @s_fneg_v2bf16_nonload(ptr addrspace(1) %out) #0 { ; CI-NEXT: ; def s2 ; CI-NEXT: ;;#ASMEND ; CI-NEXT: s_and_b32 s3, s2, 0xffff0000 -; CI-NEXT: v_mul_f32_e64 v0, -1.0, s3 -; CI-NEXT: s_lshl_b32 s2, s2, 16 -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_mul_f32_e64 v1, -1.0, s2 -; CI-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; CI-NEXT: s_xor_b32 s2, s2, 0x8000 +; CI-NEXT: s_and_b32 s2, s2, 0xffff +; CI-NEXT: s_or_b32 s2, s2, s3 +; CI-NEXT: s_add_i32 s2, s2, 0x80000000 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -501,13 +497,11 @@ define amdgpu_kernel void @v_fneg_v2bf16(ptr addrspace(1) %out, ptr addrspace(1) ; CI-NEXT: v_add_i32_e32 v0, vcc, s0, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_dword v2, v[0:1] +; CI-NEXT: s_mov_b32 s0, 0xffff ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v2 -; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; CI-NEXT: v_mul_f32_e32 v3, -1.0, v3 -; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; CI-NEXT: v_mul_f32_e32 v2, -1.0, v2 -; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16 +; CI-NEXT: v_xor_b32_e32 v3, 0x8000, v2 +; CI-NEXT: v_bfi_b32 v2, s0, v3, v2 +; CI-NEXT: v_add_i32_e32 v2, vcc, 0x80000000, v2 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -570,13 +564,13 @@ define amdgpu_kernel void @fneg_free_v2bf16(ptr addrspace(1) %out, i32 %in) #0 { ; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_and_b32 s3, s2, 0xffff0000 -; CI-NEXT: s_lshl_b32 s2, s2, 16 -; CI-NEXT: v_mul_f32_e64 v0, -1.0, s3 -; CI-NEXT: v_mul_f32_e64 v1, -1.0, s2 -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; CI-NEXT: s_xor_b32 s2, s2, 0x8000 +; CI-NEXT: s_and_b32 s2, s2, 0xffff +; CI-NEXT: s_or_b32 s2, s2, s3 +; CI-NEXT: s_add_i32 s2, s2, 0x80000000 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -637,16 +631,14 @@ define amdgpu_kernel void @v_fneg_fold_v2bf16(ptr addrspace(1) %out, ptr addrspa ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v2 -; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; CI-NEXT: v_mul_f32_e32 v4, -1.0, v3 -; CI-NEXT: v_mul_f32_e32 v5, -1.0, v2 -; CI-NEXT: v_and_b32_e32 v4, 0xffff0000, v4 -; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v5 -; CI-NEXT: v_mul_f32_e32 v3, v4, v3 -; CI-NEXT: v_mul_f32_e32 v2, v5, v2 -; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16 +; CI-NEXT: v_xor_b32_e32 v3, 0x8000, v2 +; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; CI-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; CI-NEXT: v_mul_f32_e64 v2, -v2, v2 +; CI-NEXT: v_mul_f32_e32 v3, v3, v4 +; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_alignbit_b32 v2, v2, v3, 16 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm ; @@ -912,12 +904,9 @@ define amdgpu_kernel void @v_extract_fneg_no_fold_v2bf16(ptr addrspace(1) %in) # ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: flat_load_dword v0, v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0 -; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_mul_f32_e32 v1, -1.0, v1 -; CI-NEXT: v_mul_f32_e32 v0, -1.0, v0 -; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; CI-NEXT: v_xor_b32_e32 v0, 0x8000, v0 +; CI-NEXT: v_xor_b32_e32 v1, 0x8000, v1 ; CI-NEXT: flat_store_short v[0:1], v0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: flat_store_short v[0:1], v1 diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll index 800ee87..8230e47 100644 --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -1572,26 +1572,11 @@ define void @test_fma(ptr %p, ptr %q, ptr %r) #0 { } define void @test_fabs(ptr %p) { -; CHECK-FP16-LABEL: test_fabs: -; CHECK-FP16: ldrh r1, [r0] -; CHECK-FP16-NEXT: vmov s0, r1 -; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-FP16-NEXT: vabs.f32 s0, s0 -; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-FP16-NEXT: vmov r1, s0 -; CHECK-FP16-NEXT: strh r1, [r0] -; CHECK-FP16-NEXT: bx lr -; -; CHECK-LIBCALL-LABEL: test_fabs: -; CHECK-LIBCALL: .save {r4, lr} -; CHECK-LIBCALL-NEXT: push {r4, lr} -; CHECK-LIBCALL-NEXT: mov r4, r0 -; CHECK-LIBCALL-NEXT: ldrh r0, [r0] -; CHECK-LIBCALL-NEXT: bl __aeabi_h2f -; CHECK-LIBCALL-NEXT: bic r0, r0, #-2147483648 -; CHECK-LIBCALL-NEXT: bl __aeabi_f2h -; CHECK-LIBCALL-NEXT: strh r0, [r4] -; CHECK-LIBCALL-NEXT: pop {r4, pc} +; CHECK-ALL-LABEL: test_fabs: +; CHECK-ALL: ldrh r1, [r0] +; CHECK-ALL-NEXT: bfc r1, #15, #17 +; CHECK-ALL-NEXT: strh r1, [r0] +; CHECK-ALL-NEXT: bx lr %a = load half, ptr %p, align 2 %r = call half @llvm.fabs.f16(half %a) store half %r, ptr %p @@ -2454,26 +2439,11 @@ define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { } define void @test_fneg(ptr %p1, ptr %p2) #0 { -; CHECK-FP16-LABEL: test_fneg: -; CHECK-FP16: ldrh r0, [r0] -; CHECK-FP16-NEXT: vmov s0, r0 -; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-FP16-NEXT: vneg.f32 s0, s0 -; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-FP16-NEXT: vmov r0, s0 -; CHECK-FP16-NEXT: strh r0, [r1] -; CHECK-FP16-NEXT: bx lr -; -; CHECK-LIBCALL-LABEL: test_fneg: -; CHECK-LIBCALL: .save {r4, lr} -; CHECK-LIBCALL-NEXT: push {r4, lr} -; CHECK-LIBCALL-NEXT: ldrh r0, [r0] -; CHECK-LIBCALL-NEXT: mov r4, r1 -; CHECK-LIBCALL-NEXT: bl __aeabi_h2f -; CHECK-LIBCALL-NEXT: eor r0, r0, #-2147483648 -; CHECK-LIBCALL-NEXT: bl __aeabi_f2h -; CHECK-LIBCALL-NEXT: strh r0, [r4] -; CHECK-LIBCALL-NEXT: pop {r4, pc} +; CHECK-ALL-LABEL: test_fneg: +; CHECK-ALL: ldrh r0, [r0] +; CHECK-ALL-NEXT: eor r0, r0, #32768 +; CHECK-ALL-NEXT: strh r0, [r1] +; CHECK-ALL-NEXT: bx lr %v = load half, ptr %p1, align 2 %res = fneg half %v store half %res, ptr %p2, align 2 diff --git a/llvm/test/CodeGen/Generic/bfloat-op.ll b/llvm/test/CodeGen/Generic/bfloat-op.ll new file mode 100644 index 0000000..d593328 --- /dev/null +++ b/llvm/test/CodeGen/Generic/bfloat-op.ll @@ -0,0 +1,104 @@ +; Same as `bfloat.ll`, but for `fneg`, `fabs`, `copysign` and `fma`. +; Can be merged back into `bfloat.ll` once they have the same platform coverage. +; Once all targets are fixed, the `CHECK-*` prefixes should all be merged into a single `CHECK` prefix and the `BAD-*` prefixes should be removed. + +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-apple-darwin | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=arm64ec-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if amdgpu-registered-target %{ llc %s -o - -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if arc-registered-target %{ llc %s -o - -mtriple=arc-elf | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if avr-registered-target %{ llc %s -o - -mtriple=avr-none | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; FIXME: BPF has a compiler error +; RUN: %if csky-registered-target %{ llc %s -o - -mtriple=csky-unknown-linux-gnuabiv2 | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; FIXME: hard float csky crashes +; FIXME: directx has a compiler error +; FIXME: hexagon crashes +; RUN: %if lanai-registered-target %{ llc %s -o - -mtriple=lanai-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu -mattr=+f | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if m68k-registered-target %{ llc %s -o - -mtriple=m68k-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; FIXME: mips crashes +; RUN: %if msp430-registered-target %{ llc %s -o - -mtriple=msp430-none-elf | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if nvptx-registered-target %{ llc %s -o - -mtriple=nvptx64-nvidia-cuda | FileCheck %s --check-prefixes=NOCRASH %} +; FIXME: powerpc crashes +; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; FIXME: sparc crashes +; FIXME: spirv crashes +; FIXME: s390x crashes +; FIXME: ve crashes +; FIXME: wasm crashes +; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if xcore-registered-target %{ llc %s -o - -mtriple=xcore-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if xtensa-registered-target %{ llc %s -o - -mtriple=xtensa-none-elf | FileCheck %s --check-prefixes=ALL,BAD-COPYSIGN,CHECK-FMA %} + +; Note that arm64ec labels are quoted, hence the `{{"?}}:`. + +; Codegen tests don't work the same for graphics targets. Add a dummy directive +; for filecheck, just make sure we don't crash. +; NOCRASH: {{.*}} + +; fneg, fabs and copysign all need to not quieten signalling NaNs, so should not call any conversion functions which do. +; These tests won't catch cases where the everything is done using native instructions instead of builtins. + +define void @test_fneg(ptr %p1, ptr %p2) #0 { +; ALL-LABEL: test_fneg{{"?}}: +; ALL-NEG-NOT: __extend +; ALL-NEG-NOT: __trunc +; ALL-NEG-NOT: __gnu +; ALL-NEG-NOT: __aeabi + %v = load bfloat, ptr %p1 + %res = fneg bfloat %v + store bfloat %res, ptr %p2 + ret void +} + +define void @test_fabs(ptr %p1, ptr %p2) { +; ALL-LABEL: test_fabs{{"?}}: +; ALL-ABS-NOT: __extend +; ALL-ABS-NOT: __trunc +; ALL-ABS-NOT: __gnu +; ALL-ABS-NOT: __aeabi + %a = load bfloat, ptr %p1 + %r = call bfloat @llvm.fabs.f16(bfloat %a) + store bfloat %r, ptr %p2 + ret void +} + +define void @test_copysign(ptr %p1, ptr %p2, ptr %p3) { +; ALL-LABEL: test_copysign{{"?}}: +; CHECK-COPYSIGN-NOT: __extend +; CHECK-COPYSIGN-NOT: __trunc +; CHECK-COPYSIGN-NOT: __gnu +; CHECK-COPYSIGN-NOT: __aeabi +; BAD-COPYSIGN: __truncsfbf2 + %a = load bfloat, ptr %p1 + %b = load bfloat, ptr %p2 + %r = call bfloat @llvm.copysign.f16(bfloat %a, bfloat %b) + store bfloat %r, ptr %p3 + ret void +} + +; There is no floating-point type LLVM supports that is large enough to promote bfloat FMA to +; without causing double rounding issues. This checks for libcalls to f32/f64 fma and truncating +; f32/f64 to bf16. See https://github.com/llvm/llvm-project/issues/131531 + +define void @test_fma(ptr %p1, ptr %p2, ptr %p3, ptr %p4) { +; ALL-LABEL: test_fma{{"?}}: +; CHECK-FMA-NOT: {{\bfmaf?\b}} +; CHECK-FMA-NOT: __truncsfbf2 +; CHECK-FMA-NOT: __truncdfbf2 +; BAD-FMA: {{__truncsfbf2|\bfmaf?\b}} + %a = load bfloat, ptr %p1 + %b = load bfloat, ptr %p2 + %c = load bfloat, ptr %p3 + %r = call bfloat @llvm.fma.f16(bfloat %a, bfloat %b, bfloat %c) + store bfloat %r, ptr %p4 + ret void +} diff --git a/llvm/test/CodeGen/Generic/bfloat.ll b/llvm/test/CodeGen/Generic/bfloat.ll new file mode 100644 index 0000000..83c6711 --- /dev/null +++ b/llvm/test/CodeGen/Generic/bfloat.ll @@ -0,0 +1,75 @@ +; Simple cross-platform smoke checks for basic bf16 operations. +; +; There shouldn't be any architectures that crash when trying to use `bfloat`; +; check that here. Additionally do a small handful of smoke tests that work +; well cross-platform. + +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-apple-darwin | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} +; FIXME: arm64ec crashes when passing/returning bfloat +; RUN: %if amdgpu-registered-target %{ llc %s -o - -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if arc-registered-target %{ llc %s -o - -mtriple=arc-elf | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if avr-registered-target %{ llc %s -o - -mtriple=avr-none | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if bpf-registered-target %{ llc %s -o - -mtriple=bpfel | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if csky-registered-target %{ llc %s -o - -mtriple=csky-unknown-linux-gnuabiv2 | FileCheck %s --check-prefixes=ALL,CHECK %} +; FIXME: hard float csky crashes +; RUN: %if directx-registered-target %{ llc %s -o - -mtriple=dxil-pc-shadermodel6.3-library | FileCheck %s --check-prefixes=NOCRASH %} +; FIXME: hexagon crashes +; RUN: %if lanai-registered-target %{ llc %s -o - -mtriple=lanai-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu -mattr=+f | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if m68k-registered-target %{ llc %s -o - -mtriple=m68k-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} +; FIXME: mips crashes +; RUN: %if msp430-registered-target %{ llc %s -o - -mtriple=msp430-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if nvptx-registered-target %{ llc %s -o - -mtriple=nvptx64-nvidia-cuda | FileCheck %s --check-prefixes=NOCRASH %} +; FIXME: powerpc crashes +; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} +; FIXME: sparc crashes +; FIXME: spirv crashes +; FIXME: s390x crashes +; FIXME: ve crashes +; FIXME: wasm crashes +; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD %} +; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if xcore-registered-target %{ llc %s -o - -mtriple=xcore-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK %} +; RUN: %if xtensa-registered-target %{ llc %s -o - -mtriple=xtensa-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %} + +; Note that arm64ec labels are quoted, hence the `{{"?}}:`. + +; Codegen tests don't work the same for graphics targets. Add a dummy directive +; for filecheck, just make sure we don't crash. +; NOCRASH: {{.*}} + +; All backends need to be able to bitcast without converting to another format, +; so we assert against libcalls (specifically __truncsfbf2). This won't catch hardware conversions. + +define bfloat @from_bits(i16 %bits) nounwind { +; ALL-LABEL: from_bits{{"?}}: +; ALL-NOT: __extend +; ALL-NOT: __trunc +; ALL-NOT: __gnu + %f = bitcast i16 %bits to bfloat + ret bfloat %f +} + +define i16 @to_bits(bfloat %f) nounwind { +; ALL-LABEL: to_bits{{"?}}: +; CHECK-NOT: __extend +; CHECK-NOT: __trunc +; CHECK-NOT: __gnu +; BAD: __truncsfbf2 + %bits = bitcast bfloat %f to i16 + ret i16 %bits +} + +define bfloat @check_freeze(bfloat %f) nounwind { +; ALL-LABEL: check_freeze{{"?}}: + %t0 = freeze bfloat %f + ret bfloat %t0 +} diff --git a/llvm/test/CodeGen/Generic/half-op.ll b/llvm/test/CodeGen/Generic/half-op.ll new file mode 100644 index 0000000..1037d8e --- /dev/null +++ b/llvm/test/CodeGen/Generic/half-op.ll @@ -0,0 +1,115 @@ +; Same as `half.ll`, but for `fneg`, `fabs`, `copysign` and `fma`. +; Can be merged back into `half.ll` once BPF doesn't have a compiler error. +; Once all targets are fixed, the `CHECK-*` prefixes should all be merged into a single `CHECK` prefix and the `BAD-*` prefixes should be removed. + +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-apple-darwin | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=arm64ec-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if amdgpu-registered-target %{ llc %s -o - -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} +; RUN: %if arc-registered-target %{ llc %s -o - -mtriple=arc-elf | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if avr-registered-target %{ llc %s -o - -mtriple=avr-none | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; FIXME: BPF has a compiler error +; RUN: %if csky-registered-target %{ llc %s -o - -mtriple=csky-unknown-linux-gnuabiv2 | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if csky-registered-target %{ llc %s -o - -mtriple=csky-unknown-linux-gnuabiv2 -mcpu=ck860fv -mattr=+hard-float | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; FIXME: directx has a compiler error +; RUN: %if hexagon-registered-target %{ llc %s -o - -mtriple=hexagon-unknown-linux-musl | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if lanai-registered-target %{ llc %s -o - -mtriple=lanai-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu -mattr=+f | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if m68k-registered-target %{ llc %s -o - -mtriple=m68k-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mips-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mips64-unknown-linux-gnuabi64 | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mips64el-unknown-linux-gnuabi64 | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mipsel-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if msp430-registered-target %{ llc %s -o - -mtriple=msp430-none-elf | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if nvptx-registered-target %{ llc %s -o - -mtriple=nvptx64-nvidia-cuda | FileCheck %s --check-prefixes=NOCRASH %} +; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if sparc-registered-target %{ llc %s -o - -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if sparc-registered-target %{ llc %s -o - -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if spirv-registered-target %{ llc %s -o - -mtriple=spirv-unknown-unknown | FileCheck %s --check-prefixes=NOCRASH %} +; RUN: %if systemz-registered-target %{ llc %s -o - -mtriple=s390x-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if ve-registered-target %{ llc %s -o - -mtriple=ve-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if webassembly-registered-target %{ llc %s -o - -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %} +; RUN: %if xcore-registered-target %{ llc %s -o - -mtriple=xcore-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if xtensa-registered-target %{ llc %s -o - -mtriple=xtensa-none-elf | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,CHECK-FMA %} + +; Note that arm64ec labels are quoted, hence the `{{"?}}:`. + +; Codegen tests don't work the same for graphics targets. Add a dummy directive +; for filecheck, just make sure we don't crash. +; NOCRASH: {{.*}} + +; fneg, fabs and copysign all need to not quieten signalling NaNs, so should not call any conversion functions which do. +; These tests won't catch cases where the everything is done using native instructions instead of builtins. +; See https://github.com/llvm/llvm-project/issues/104915 + +define void @test_fneg(ptr %p1, ptr %p2) #0 { +; ALL-LABEL: test_fneg{{"?}}: +; CHECK-NEG-ABS-NOT: __extend +; CHECK-NEG-ABS-NOT: __trunc +; CHECK-NEG-ABS-NOT: __gnu +; CHECK-NEG-ABS-NOT: __aeabi +; BAD-NEG-ABS: {{__extendhfsf2|__gnu_h2f_ieee|__aeabi_h2f}} + %v = load half, ptr %p1 + %res = fneg half %v + store half %res, ptr %p2 + ret void +} + +define void @test_fabs(ptr %p1, ptr %p2) { +; ALL-LABEL: test_fabs{{"?}}: +; CHECK-NEG-ABS-NOT: __extend +; CHECK-NEG-ABS-NOT: __trunc +; CHECK-NEG-ABS-NOT: __gnu +; CHECK-NEG-ABS-NOT: __aeabi +; BAD-NEG-ABS: {{__extendhfsf2|__gnu_h2f_ieee|__aeabi_h2f}} + %a = load half, ptr %p1 + %r = call half @llvm.fabs.f16(half %a) + store half %r, ptr %p2 + ret void +} + +define void @test_copysign(ptr %p1, ptr %p2, ptr %p3) { +; ALL-LABEL: test_copysign{{"?}}: +; CHECK-COPYSIGN-NOT: __extend +; CHECK-COPYSIGN-NOT: __trunc +; CHECK-COPYSIGN-NOT: __gnu +; CHECK-COPYSIGN-NOT: __aeabi +; BAD-COPYSIGN: {{__extendhfsf2|__gnu_h2f_ieee}} + %a = load half, ptr %p1 + %b = load half, ptr %p2 + %r = call half @llvm.copysign.f16(half %a, half %b) + store half %r, ptr %p3 + ret void +} + +; If promoting, fma must promote at least to f64 to avoid double rounding issues. +; This checks for calls to f32 fmaf and truncating f32 to f16. +; See https://github.com/llvm/llvm-project/issues/98389 + +define void @test_fma(ptr %p1, ptr %p2, ptr %p3, ptr %p4) { +; ALL-LABEL: test_fma{{"?}}: +; Allow fmaf16 +; CHECK-FMA-NOT: fmaf{{\b}} +; CHECK-FMA-NOT: __truncsfhf2 +; CHECK-FMA-NOT: __gnu_f2h_ieee +; CHECK-FMA-NOT: __aeabi_f2h +; BAD-FMA: {{__truncsfhf2|__gnu_f2h_ieee|__aeabi_f2h|fmaf\b}} + %a = load half, ptr %p1 + %b = load half, ptr %p2 + %c = load half, ptr %p3 + %r = call half @llvm.fma.f16(half %a, half %b, half %c) + store half %r, ptr %p4 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll index 39f9269..4998d87 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp.ll @@ -6,12 +6,10 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado ; CHECK: # %bb.0: ; CHECK-NEXT: ldbrx 3, 0, 3 ; CHECK-NEXT: ldbrx 4, 0, 4 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: subc 3, 4, 3 -; CHECK-NEXT: subfe 3, 4, 4 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8) @@ -23,11 +21,11 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado ; CHECK: # %bb.0: ; CHECK-NEXT: lwbrx 3, 0, 3 ; CHECK-NEXT: lwbrx 4, 0, 4 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 +; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4) ret i32 %call diff --git a/llvm/test/CodeGen/PowerPC/ucmp.ll b/llvm/test/CodeGen/PowerPC/ucmp.ll index d2dff6e..4d393dd 100644 --- a/llvm/test/CodeGen/PowerPC/ucmp.ll +++ b/llvm/test/CodeGen/PowerPC/ucmp.ll @@ -4,12 +4,10 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { ; CHECK-LABEL: ucmp_8_8: ; CHECK: # %bb.0: -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i8 %x, i8 %y) ret i8 %1 @@ -18,12 +16,10 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { ; CHECK-LABEL: ucmp_8_16: ; CHECK: # %bb.0: -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i16 %x, i16 %y) ret i8 %1 @@ -32,14 +28,10 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp_8_32: ; CHECK: # %bb.0: -; CHECK-NEXT: clrldi 5, 4, 32 -; CHECK-NEXT: clrldi 6, 3, 32 -; CHECK-NEXT: sub 5, 5, 6 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) ret i8 %1 @@ -48,12 +40,10 @@ define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ucmp_8_64: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: subc 3, 4, 3 -; CHECK-NEXT: subfe 3, 4, 4 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i64 %x, i64 %y) ret i8 %1 @@ -82,14 +72,10 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp_32_32: ; CHECK: # %bb.0: -; CHECK-NEXT: clrldi 5, 4, 32 -; CHECK-NEXT: clrldi 6, 3, 32 -; CHECK-NEXT: sub 5, 5, 6 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) ret i32 %1 @@ -98,12 +84,10 @@ define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ucmp_32_64: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: subc 3, 4, 3 -; CHECK-NEXT: subfe 3, 4, 4 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i32 @llvm.ucmp(i64 %x, i64 %y) ret i32 %1 @@ -112,12 +96,10 @@ define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ucmp_64_64: ; CHECK: # %bb.0: -; CHECK-NEXT: subc 5, 4, 3 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: subfe 5, 4, 4 -; CHECK-NEXT: neg 5, 5 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i64 @llvm.ucmp(i64 %x, i64 %y) ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index 2ebb6e9..d089e36 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -514,6 +514,7 @@ define i32 @fneg_h(half %a, half %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a1, 16 ; RV32I-NEXT: addi s1, a1, -1 ; RV32I-NEXT: and a0, a0, s1 @@ -521,13 +522,12 @@ define i32 @fneg_h(half %a, half %b) nounwind { ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 +; RV32I-NEXT: lui a1, 8 +; RV32I-NEXT: xor s2, a0, a1 ; RV32I-NEXT: and a0, a0, s1 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a0, 524288 -; RV32I-NEXT: xor a0, s0, a0 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, s2, s1 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s0 @@ -536,6 +536,7 @@ define i32 @fneg_h(half %a, half %b) nounwind { ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -545,6 +546,7 @@ define i32 @fneg_h(half %a, half %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: lui a1, 16 ; RV64I-NEXT: addi s1, a1, -1 ; RV64I-NEXT: and a0, a0, s1 @@ -552,13 +554,12 @@ define i32 @fneg_h(half %a, half %b) nounwind { ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: xor s2, a0, a1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lui a0, 524288 -; RV64I-NEXT: xor a0, s0, a0 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: and a0, s2, s1 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s0 @@ -567,6 +568,7 @@ define i32 @fneg_h(half %a, half %b) nounwind { ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; @@ -638,11 +640,7 @@ define half @fsgnjn_h(half %a, half %b) nounwind { ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: call __truncsfhf2 +; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: lui a1, 1048568 ; RV32I-NEXT: slli s1, s1, 17 ; RV32I-NEXT: and a0, a0, a1 @@ -677,11 +675,7 @@ define half @fsgnjn_h(half %a, half %b) nounwind { ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: lui a1, 524288 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: call __truncsfhf2 +; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: lui a1, 1048568 ; RV64I-NEXT: slli s1, s1, 49 ; RV64I-NEXT: and a0, a0, a1 @@ -804,15 +798,14 @@ define half @fabs_h(half %a, half %b) nounwind { ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 +; RV32I-NEXT: slli s0, a0, 17 +; RV32I-NEXT: srli s0, s0, 17 ; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -841,15 +834,14 @@ define half @fabs_h(half %a, half %b) nounwind { ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 +; RV64I-NEXT: slli s0, a0, 49 +; RV64I-NEXT: srli s0, s0, 49 ; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: slli a0, a0, 33 -; RV64I-NEXT: srli a0, a0, 33 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -1217,25 +1209,21 @@ define half @fmsub_h(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s3, a0, -1 -; RV32I-NEXT: and a0, a2, s3 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and a0, a2, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s3 +; RV32I-NEXT: lui a1, 8 +; RV32I-NEXT: xor s3, a0, a1 +; RV32I-NEXT: and a0, s1, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s0, s3 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: and a0, s3, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: mv a0, s1 @@ -1261,25 +1249,21 @@ define half @fmsub_h(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addi s3, a0, -1 -; RV64I-NEXT: and a0, a2, s3 +; RV64I-NEXT: addi s2, a0, -1 +; RV64I-NEXT: and a0, a2, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: lui a1, 524288 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s3 +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: xor s3, a0, a1 +; RV64I-NEXT: and a0, s1, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s3 +; RV64I-NEXT: and a0, s3, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: mv a0, s1 @@ -1355,43 +1339,34 @@ define half @fnmadd_h(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: lui s3, 16 -; RV32I-NEXT: addi s3, s3, -1 +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi s3, a1, -1 ; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 ; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s3 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s3 -; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: lui s4, 524288 -; RV32I-NEXT: xor a0, a0, s4 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lui a1, 8 +; RV32I-NEXT: xor s2, s2, a1 +; RV32I-NEXT: xor s4, a0, a1 ; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: xor a0, a0, s4 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s0, s3 -; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s4, s3 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call fmaf ; RV32I-NEXT: call __truncsfhf2 @@ -1413,43 +1388,34 @@ define half @fnmadd_h(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s1, a2 -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui s3, 16 -; RV64I-NEXT: addi s3, s3, -1 +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addi s3, a1, -1 ; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 ; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s3 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s3 -; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: lui s4, 524288 -; RV64I-NEXT: xor a0, a0, s4 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: xor s2, s2, a1 +; RV64I-NEXT: xor s4, a0, a1 ; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: xor a0, a0, s4 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s0, s3 -; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s4, s3 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call fmaf ; RV64I-NEXT: call __truncsfhf2 @@ -1535,44 +1501,35 @@ define half @fnmadd_h_2(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, a2 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui s3, 16 -; RV32I-NEXT: addi s3, s3, -1 +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi s3, a0, -1 ; RV32I-NEXT: and a0, a1, s3 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 ; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s3 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s3 -; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: lui s4, 524288 -; RV32I-NEXT: xor a0, a0, s4 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lui a1, 8 +; RV32I-NEXT: xor s2, s2, a1 +; RV32I-NEXT: xor s4, a0, a1 ; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: xor a0, a0, s4 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s0, s3 -; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s4, s3 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call fmaf ; RV32I-NEXT: call __truncsfhf2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1593,44 +1550,35 @@ define half @fnmadd_h_2(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s1, a2 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lui s3, 16 -; RV64I-NEXT: addi s3, s3, -1 +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addi s3, a0, -1 ; RV64I-NEXT: and a0, a1, s3 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 ; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s3 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s3 -; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: lui s4, 524288 -; RV64I-NEXT: xor a0, a0, s4 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: xor s2, s2, a1 +; RV64I-NEXT: xor s4, a0, a1 ; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: xor a0, a0, s4 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s0, s3 -; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s4, s3 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fmaf ; RV64I-NEXT: call __truncsfhf2 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1960,25 +1908,21 @@ define half @fnmsub_h(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s3, a1, -1 -; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s3 +; RV32I-NEXT: lui a1, 8 +; RV32I-NEXT: xor s3, a0, a1 +; RV32I-NEXT: and a0, s1, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s0, s3 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: and a0, s3, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a2, s0 @@ -2003,25 +1947,21 @@ define half @fnmsub_h(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addi s3, a1, -1 -; RV64I-NEXT: and a0, a0, s3 +; RV64I-NEXT: addi s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: lui a1, 524288 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s3 +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: xor s3, a0, a1 +; RV64I-NEXT: and a0, s1, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s3 +; RV64I-NEXT: and a0, s3, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: mv a2, s0 @@ -2096,25 +2036,21 @@ define half @fnmsub_h_2(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s3, a0, -1 -; RV32I-NEXT: and a0, a1, s3 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s3 +; RV32I-NEXT: lui a1, 8 +; RV32I-NEXT: xor s3, a0, a1 +; RV32I-NEXT: and a0, s1, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s0, s3 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: and a0, s3, s2 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -2140,25 +2076,21 @@ define half @fnmsub_h_2(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addi s3, a0, -1 -; RV64I-NEXT: and a0, a1, s3 +; RV64I-NEXT: addi s2, a0, -1 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: lui a1, 524288 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s3 +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: xor s3, a0, a1 +; RV64I-NEXT: and a0, s1, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s3 +; RV64I-NEXT: and a0, s3, s2 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -2519,12 +2451,8 @@ define half @fnmadd_h_contract(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3 ; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui a1, 8 +; RV32I-NEXT: xor s1, a0, a1 ; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: mv s0, a0 @@ -2580,12 +2508,8 @@ define half @fnmadd_h_contract(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3 ; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: lui a1, 524288 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: xor s1, a0, a1 ; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: mv s0, a0 diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/RWStructuredBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/RWStructuredBufferNonUniformIdx.ll new file mode 100644 index 0000000..2a12baf --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/RWStructuredBufferNonUniformIdx.ll @@ -0,0 +1,26 @@ +; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpCapability Shader +; CHECK-DAG: OpCapability ShaderNonUniformEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate %[[#access1:]] NonUniformEXT +@ReadWriteStructuredBuf.str = private unnamed_addr constant [23 x i8] c"ReadWriteStructuredBuf\00", align 1 + +define void @main() local_unnamed_addr #0 { +entry: + %0 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0) + %add.i = add i32 %0, 1 + %1 = tail call noundef i32 @llvm.spv.resource.nonuniformindex(i32 %add.i) + %2 = tail call target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_a0v4i32_12_1t(i32 0, i32 0, i32 64, i32 %1, ptr nonnull @ReadWriteStructuredBuf.str) + %3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4i32_12_1t(target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) %2, i32 98) + %4 = load <4 x i32>, ptr addrspace(11) %3, align 16 + %vecins.i = insertelement <4 x i32> %4, i32 99, i64 0 +; CHECK: %[[#access1]] = OpAccessChain {{.*}} +; CHECK: OpStore %[[#access1]] {{%[0-9]+}} Aligned 16 + store <4 x i32> %vecins.i, ptr addrspace(11) %3, align 16 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/StructuredBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/StructuredBufferNonUniformIdx.ll new file mode 100644 index 0000000..92efad9 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/StructuredBufferNonUniformIdx.ll @@ -0,0 +1,24 @@ +; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpCapability Shader +; CHECK-DAG: OpCapability ShaderNonUniformEXT +; CHECK-DAG: OpCapability StorageTexelBufferArrayNonUniformIndexingEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate %[[#access:]] NonUniformEXT +; CHECK-DAG: OpDecorate %[[#load:]] NonUniformEXT +@ReadWriteBuf.str = private unnamed_addr constant [13 x i8] c"ReadWriteBuf\00", align 1 + +define void @main() local_unnamed_addr #0 { +entry: + %0 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0) + %1 = tail call noundef i32 @llvm.spv.resource.nonuniformindex(i32 %0) + %2 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_2_33t(i32 0, i32 0, i32 64, i32 %1, ptr nonnull @ReadWriteBuf.str) + %3 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %2, i32 96) +; CHECK: {{%[0-9]+}} = OpCompositeExtract {{.*}} +; CHECK: %[[#access]] = OpAccessChain {{.*}} +; CHECK: %[[#load]] = OpLoad {{%[0-9]+}} %[[#access]] +; CHECK: OpImageWrite %[[#load]] {{%[0-9]+}} {{%[0-9]+}} + store i32 95, ptr addrspace(11) %3, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll deleted file mode 100644 index 5e15aab..0000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; This test depends on llvm.svp.resource.nonuniformindex support (not yet implemented) -; https://github.com/llvm/llvm-project/issues/160231 -; XFAIL: * - -@.str.b0 = private unnamed_addr constant [3 x i8] c"B0\00", align 1 - -; CHECK-DAG: OpCapability Shader -; CHECK-DAG: OpCapability ShaderNonUniformEXT -; CHECK-DAG: OpCapability StorageImageArrayNonUniformIndexing -; CHECK-DAG: OpCapability Image1D -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 -; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 2 R32i {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0{{$}} -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] - %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24( - i32 3, i32 4, i32 3, i32 0, ptr nonnull @.str.b0) - %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer0, i32 0) - store i32 0, ptr %ptr0, align 4 - -; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] - %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24( - i32 3, i32 4, i32 3, i32 1, ptr nonnull @.str.b0) - %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer1, i32 0) - store i32 0, ptr %ptr1, align 4 - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/Thumb2/mve-vabd.ll b/llvm/test/CodeGen/Thumb2/mve-vabd.ll index 8d52fe5..3c35a29 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vabd.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vabd.ll @@ -63,34 +63,30 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-MVE-NEXT: mov r4, r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q1[0] +; CHECK-MVE-NEXT: vmov.u16 r0, q1[1] ; CHECK-MVE-NEXT: vmov q5, q1 ; CHECK-MVE-NEXT: vmov q4, q0 ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r5, r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q4[0] +; CHECK-MVE-NEXT: vmov.u16 r0, q4[1] ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub ; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: bl __aeabi_h2f -; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 -; CHECK-MVE-NEXT: bl __aeabi_f2h ; CHECK-MVE-NEXT: mov r5, r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q5[1] +; CHECK-MVE-NEXT: vmov.u16 r0, q5[0] ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r6, r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q4[1] +; CHECK-MVE-NEXT: vmov.u16 r0, q4[0] ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r6 ; CHECK-MVE-NEXT: bl __aeabi_fsub ; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: vmov.16 q6[0], r5 -; CHECK-MVE-NEXT: bl __aeabi_h2f -; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 -; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: vmov.16 q6[1], r0 +; CHECK-MVE-NEXT: bfc r0, #15, #17 +; CHECK-MVE-NEXT: bfc r5, #15, #17 +; CHECK-MVE-NEXT: vmov.16 q6[0], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q5[2] +; CHECK-MVE-NEXT: vmov.16 q6[1], r5 ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r5, r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q4[2] @@ -98,9 +94,7 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub ; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: bl __aeabi_h2f -; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 -; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bfc r0, #15, #17 ; CHECK-MVE-NEXT: vmov.16 q6[2], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q5[3] ; CHECK-MVE-NEXT: bl __aeabi_h2f @@ -110,9 +104,7 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub ; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: bl __aeabi_h2f -; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 -; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bfc r0, #15, #17 ; CHECK-MVE-NEXT: vmov.16 q6[3], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q5[4] ; CHECK-MVE-NEXT: bl __aeabi_h2f @@ -122,9 +114,7 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub ; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: bl __aeabi_h2f -; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 -; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bfc r0, #15, #17 ; CHECK-MVE-NEXT: vmov.16 q6[4], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q5[5] ; CHECK-MVE-NEXT: bl __aeabi_h2f @@ -134,9 +124,7 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub ; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: bl __aeabi_h2f -; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 -; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bfc r0, #15, #17 ; CHECK-MVE-NEXT: vmov.16 q6[5], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q5[6] ; CHECK-MVE-NEXT: bl __aeabi_h2f @@ -146,9 +134,7 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub ; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: bl __aeabi_h2f -; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 -; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bfc r0, #15, #17 ; CHECK-MVE-NEXT: vmov.16 q6[6], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q5[7] ; CHECK-MVE-NEXT: bl __aeabi_h2f @@ -158,9 +144,7 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub ; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: bl __aeabi_h2f -; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 -; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bfc r0, #15, #17 ; CHECK-MVE-NEXT: vmov.16 q6[7], r0 ; CHECK-MVE-NEXT: vstrw.32 q6, [r4] ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11, d12, d13} diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll new file mode 100644 index 0000000..a0c243b --- /dev/null +++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll @@ -0,0 +1,43 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;; A minimal test case. llc will crash if global variables already has a section +;; prefix. Subsequent PRs will expand on this test case to test the hotness +;; reconciliation implementation. + +; RUN: not llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +; RUN: -partition-static-data-sections=true \ +; RUN: -data-sections=true -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=ERR + +; ERR: Global variable hot_bss already has a section prefix hot + +@hot_bss = internal global i32 0, !section_prefix !17 + +define void @hot_func() !prof !14 { + %9 = load i32, ptr @hot_bss + %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9) + ret void +} + +declare i32 @func_taking_arbitrary_param(...) + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460183} +!5 = !{!"MaxCount", i64 849024} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849024} +!8 = !{!"NumCounts", i64 23627} +!9 = !{!"NumFunctions", i64 3271} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13} +!12 = !{i32 990000, i64 166, i32 73} +!13 = !{i32 999999, i64 3, i32 1443} +!14 = !{!"function_entry_count", i64 100000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 99999} +!17 = !{!"section_prefix", !"hot"} diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll index 211a7bc..e2b6167 100644 --- a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll +++ b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll @@ -4,6 +4,11 @@ @x = global i32 0, align 4, !dbg !0 +; Function Attrs: mustprogress noinline nounwind optnone ssp uwtable(sync) +define void @_Z4funcv() !dbg !8 { + ret void, !dbg !11 +} + !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!6, !7} @@ -15,3 +20,7 @@ !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !6 = !{i32 7, !"Dwarf Version", i32 5} !7 = !{i32 2, !"Debug Info Version", i32 3} +!8 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcv", scope: !3, file: !3, line: 2, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2) +!9 = !DISubroutineType(types: !10) +!10 = !{null} +!11 = !DILocation(line: 2, column: 14, scope: !8) diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll index 5929c15..84c7df1 100644 --- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll +++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll @@ -1,152 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S %s -passes=atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s ; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and -; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this +; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this ; functionality, please move this test to a target which still is. define float @float_load_expand(ptr %ptr) { -; CHECK-LABEL: @float_load_expand -; CHECK: %1 = load atomic i32, ptr %ptr unordered, align 4 -; CHECK: %2 = bitcast i32 %1 to float -; CHECK: ret float %2 +; CHECK-LABEL: define float @float_load_expand( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; %res = load atomic float, ptr %ptr unordered, align 4 ret float %res } define float @float_load_expand_seq_cst(ptr %ptr) { -; CHECK-LABEL: @float_load_expand_seq_cst -; CHECK: %1 = load atomic i32, ptr %ptr seq_cst, align 4 -; CHECK: %2 = bitcast i32 %1 to float -; CHECK: ret float %2 +; CHECK-LABEL: define float @float_load_expand_seq_cst( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; %res = load atomic float, ptr %ptr seq_cst, align 4 ret float %res } define float @float_load_expand_vol(ptr %ptr) { -; CHECK-LABEL: @float_load_expand_vol -; CHECK: %1 = load atomic volatile i32, ptr %ptr unordered, align 4 -; CHECK: %2 = bitcast i32 %1 to float -; CHECK: ret float %2 +; CHECK-LABEL: define float @float_load_expand_vol( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic volatile i32, ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; %res = load atomic volatile float, ptr %ptr unordered, align 4 ret float %res } define float @float_load_expand_addr1(ptr addrspace(1) %ptr) { -; CHECK-LABEL: @float_load_expand_addr1 -; CHECK: %1 = load atomic i32, ptr addrspace(1) %ptr unordered, align 4 -; CHECK: %2 = bitcast i32 %1 to float -; CHECK: ret float %2 +; CHECK-LABEL: define float @float_load_expand_addr1( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; %res = load atomic float, ptr addrspace(1) %ptr unordered, align 4 ret float %res } define void @float_store_expand(ptr %ptr, float %v) { -; CHECK-LABEL: @float_store_expand -; CHECK: %1 = bitcast float %v to i32 -; CHECK: store atomic i32 %1, ptr %ptr unordered, align 4 +; CHECK-LABEL: define void @float_store_expand( +; CHECK-SAME: ptr [[PTR:%.*]], float [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: ret void +; store atomic float %v, ptr %ptr unordered, align 4 ret void } define void @float_store_expand_seq_cst(ptr %ptr, float %v) { -; CHECK-LABEL: @float_store_expand_seq_cst -; CHECK: %1 = bitcast float %v to i32 -; CHECK: store atomic i32 %1, ptr %ptr seq_cst, align 4 +; CHECK-LABEL: define void @float_store_expand_seq_cst( +; CHECK-SAME: ptr [[PTR:%.*]], float [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; store atomic float %v, ptr %ptr seq_cst, align 4 ret void } define void @float_store_expand_vol(ptr %ptr, float %v) { -; CHECK-LABEL: @float_store_expand_vol -; CHECK: %1 = bitcast float %v to i32 -; CHECK: store atomic volatile i32 %1, ptr %ptr unordered, align 4 +; CHECK-LABEL: define void @float_store_expand_vol( +; CHECK-SAME: ptr [[PTR:%.*]], float [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; CHECK-NEXT: store atomic volatile i32 [[TMP1]], ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: ret void +; store atomic volatile float %v, ptr %ptr unordered, align 4 ret void } define void @float_store_expand_addr1(ptr addrspace(1) %ptr, float %v) { -; CHECK-LABEL: @float_store_expand_addr1 -; CHECK: %1 = bitcast float %v to i32 -; CHECK: store atomic i32 %1, ptr addrspace(1) %ptr unordered, align 4 +; CHECK-LABEL: define void @float_store_expand_addr1( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[PTR]] unordered, align 4 +; CHECK-NEXT: ret void +; store atomic float %v, ptr addrspace(1) %ptr unordered, align 4 ret void } define void @pointer_cmpxchg_expand(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg ptr %ptr, i64 0, i64 %1 seq_cst monotonic -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst monotonic, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg ptr %ptr, ptr null, ptr %v seq_cst monotonic ret void } define void @pointer_cmpxchg_expand2(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand2 -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg ptr %ptr, i64 0, i64 %1 release monotonic -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand2( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] release monotonic, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg ptr %ptr, ptr null, ptr %v release monotonic ret void } define void @pointer_cmpxchg_expand3(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand3 -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg ptr %ptr, i64 0, i64 %1 seq_cst seq_cst -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand3( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg ptr %ptr, ptr null, ptr %v seq_cst seq_cst ret void } define void @pointer_cmpxchg_expand4(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand4 -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg weak ptr %ptr, i64 0, i64 %1 seq_cst seq_cst -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand4( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg weak ptr %ptr, ptr null, ptr %v seq_cst seq_cst ret void } define void @pointer_cmpxchg_expand5(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand5 -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg volatile ptr %ptr, i64 0, i64 %1 seq_cst seq_cst -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand5( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg volatile ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg volatile ptr %ptr, ptr null, ptr %v seq_cst seq_cst ret void } -define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr, - ptr addrspace(2) %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand6 -; CHECK: %1 = ptrtoint ptr addrspace(2) %v to i64 -; CHECK: %2 = cmpxchg ptr addrspace(1) %ptr, i64 0, i64 %1 seq_cst seq_cst -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr addrspace(2) -; CHECK: %6 = insertvalue { ptr addrspace(2), i1 } poison, ptr addrspace(2) %5, 0 -; CHECK: %7 = insertvalue { ptr addrspace(2), i1 } %6, i1 %4, 1 +define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr, ptr addrspace(2) %v) { +; CHECK-LABEL: define void @pointer_cmpxchg_expand6( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2) +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr addrspace(2), i1 } poison, ptr addrspace(2) [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr addrspace(2), i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg ptr addrspace(1) %ptr, ptr addrspace(2) null, ptr addrspace(2) %v seq_cst seq_cst ret void } diff --git a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll index b946bbf..14ee00d 100644 --- a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll +++ b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -p indvars -S %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + declare void @foo() define void @narrow_iv_precondition_order_1(ptr %start, i32 %base, i8 %n) { @@ -96,3 +98,202 @@ loop: exit: ret void } + +define i32 @urem_order1(i32 %n) { +; CHECK-LABEL: define i32 @urem_order1( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[N]], 3 +; CHECK-NEXT: [[UREM_ZERO:%.*]] = icmp eq i32 [[UREM]], 0 +; CHECK-NEXT: br i1 [[UREM_ZERO]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[N_NON_ZERO:%.*]] = icmp ne i32 [[N]], 0 +; CHECK-NEXT: br i1 [[N_NON_ZERO]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 3 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ 2, %[[PH]] ], [ 3, %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %urem = urem i32 %n, 3 + %urem.zero = icmp eq i32 %urem, 0 + br i1 %urem.zero, label %ph, label %exit + +ph: + %n.non.zero = icmp ne i32 %n, 0 + br i1 %n.non.zero, label %loop, label %exit + +loop: + %iv = phi i32 [ 0, %ph ], [ %iv.next, %loop ] + call void @foo() + %iv.next = add i32 %iv, 3 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i32 [ 1, %entry ], [ 2, %ph ], [ 3, %loop ] + ret i32 %res +} + +define i32 @urem_order2(i32 %n) { +; CHECK-LABEL: define i32 @urem_order2( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[N_NON_ZERO:%.*]] = icmp ne i32 [[N]], 0 +; CHECK-NEXT: br i1 [[N_NON_ZERO]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[N]], 3 +; CHECK-NEXT: [[UREM_ZERO:%.*]] = icmp eq i32 [[UREM]], 0 +; CHECK-NEXT: br i1 [[UREM_ZERO]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 3 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ 2, %[[PH]] ], [ 3, %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %n.non.zero = icmp ne i32 %n, 0 + br i1 %n.non.zero, label %ph, label %exit + +ph: + %urem = urem i32 %n, 3 + %urem.zero = icmp eq i32 %urem, 0 + br i1 %urem.zero, label %loop, label %exit + +loop: + %iv = phi i32 [ 0, %ph ], [ %iv.next, %loop ] + call void @foo() + %iv.next = add i32 %iv, 3 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i32 [ 1, %entry ], [ 2, %ph ], [ 3, %loop ] + ret i32 %res +} + +define i64 @test_loop_with_div_order_1(i64 %n) { +; CHECK-LABEL: define i64 @test_loop_with_div_order_1( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IS_ZERO:%.*]] = icmp eq i64 [[N]], 0 +; CHECK-NEXT: br i1 [[IS_ZERO]], label %[[EXIT:.*]], label %[[CHECK_BOUNDS:.*]] +; CHECK: [[CHECK_BOUNDS]]: +; CHECK-NEXT: [[N_PLUS_63:%.*]] = add i64 [[N]], 63 +; CHECK-NEXT: [[UPPER_BOUND:%.*]] = lshr i64 [[N_PLUS_63]], 6 +; CHECK-NEXT: [[BOUNDS_CHECK:%.*]] = icmp ult i64 [[N_PLUS_63]], 64 +; CHECK-NEXT: br i1 [[BOUNDS_CHECK]], label %[[EXIT]], label %[[CHECK_PARITY:.*]] +; CHECK: [[CHECK_PARITY]]: +; CHECK-NEXT: [[IS_ODD:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0 +; CHECK-NEXT: br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[UPPER_BOUND]], i64 1) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[DUMMY:%.*]] = load volatile i64, ptr null, align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i64 0 +; +entry: + %is_zero = icmp eq i64 %n, 0 + br i1 %is_zero, label %exit, label %check_bounds + +check_bounds: + %n_plus_63 = add i64 %n, 63 + %upper_bound = lshr i64 %n_plus_63, 6 + %bounds_check = icmp ult i64 %n_plus_63, 64 + br i1 %bounds_check, label %exit, label %check_parity + +check_parity: + %is_odd = and i64 %n, 1 + %parity_check = icmp eq i64 %is_odd, 0 + br i1 %parity_check, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv_next, %loop ], [ 0, %check_parity ] + %dummy = load volatile i64, ptr null, align 8 + %iv_next = add i64 %iv, 1 + %exit_cond = icmp ult i64 %iv_next, %upper_bound + br i1 %exit_cond, label %loop, label %exit + +exit: + ret i64 0 +} + +define i64 @test_loop_with_div_order_2(i64 %n) { +; CHECK-LABEL: define i64 @test_loop_with_div_order_2( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[N_PLUS_63:%.*]] = add i64 [[N]], 63 +; CHECK-NEXT: [[UPPER_BOUND:%.*]] = lshr i64 [[N_PLUS_63]], 6 +; CHECK-NEXT: [[BOUNDS_CHECK:%.*]] = icmp ult i64 [[N_PLUS_63]], 64 +; CHECK-NEXT: br i1 [[BOUNDS_CHECK]], label %[[EXIT:.*]], label %[[CHECK_BOUNDS:.*]] +; CHECK: [[CHECK_BOUNDS]]: +; CHECK-NEXT: [[IS_ZERO:%.*]] = icmp eq i64 [[N]], 0 +; CHECK-NEXT: br i1 [[IS_ZERO]], label %[[EXIT]], label %[[CHECK_PARITY:.*]] +; CHECK: [[CHECK_PARITY]]: +; CHECK-NEXT: [[IS_ODD:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0 +; CHECK-NEXT: br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[DUMMY:%.*]] = load volatile i64, ptr null, align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UPPER_BOUND]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i64 0 +; +entry: + %n_plus_63 = add i64 %n, 63 + %upper_bound = lshr i64 %n_plus_63, 6 + %bounds_check = icmp ult i64 %n_plus_63, 64 + br i1 %bounds_check, label %exit, label %check_bounds + +check_bounds: + %is_zero = icmp eq i64 %n, 0 + br i1 %is_zero, label %exit, label %check_parity + +check_parity: + %is_odd = and i64 %n, 1 + %parity_check = icmp eq i64 %is_odd, 0 + br i1 %parity_check, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv_next, %loop ], [ 0, %check_parity ] + %dummy = load volatile i64, ptr null, align 8 + %iv_next = add i64 %iv, 1 + %exit_cond = icmp ult i64 %iv_next, %upper_bound + br i1 %exit_cond, label %loop, label %exit + +exit: + ret i64 0 +} diff --git a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll index ff6d9aa..1ba7005 100644 --- a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll +++ b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll @@ -481,7 +481,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_fmul(float %x, float %y) { define float @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul(float %x, float %y) { ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul( ; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan ninf i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 ; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul float [[X]], [[SCALED_X]] ; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]] ; @@ -509,7 +509,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_nnan_fmul(float %x, float %y) { define float @fmul_by_var_if_0_oeq_zero_f32_nnan_ninf_fmul(float %x, float %y) { ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nnan_ninf_fmul( ; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan ninf i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 ; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]] ; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]] ; @@ -558,7 +558,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_nsz_inverted(f define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(float %x, float %y) { ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz( ; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan ninf i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 ; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]] ; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]] ; @@ -571,7 +571,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(float %x, float % define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(float %x, float %y) { ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted( ; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan ninf i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 ; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]] ; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]] ; @@ -585,7 +585,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(float %x define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero(float %x, float nofpclass(nzero) %y) { ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero( ; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan ninf i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 ; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]] ; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]] ; @@ -598,7 +598,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_ne define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero_negsub(float %x, float nofpclass(nzero nsub) %y) { ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero_negsub( ; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan ninf i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00 ; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]] ; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]] ; @@ -705,7 +705,7 @@ define float @fmul_by_self_if_0_oeq_zero_f32(float %x) { define float @fmul_by_self_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(float %x) { ; CHECK-LABEL: @fmul_by_self_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz( ; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00 +; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan ninf i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00 ; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]] ; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]] ; diff --git a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll index 253bc9e7..c14dd46 100644 --- a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll @@ -23,6 +23,50 @@ define float @select_fpclass_fadd(i1 %cond, float nofpclass(nan) %A, float %B) { ret float %D } +define float @select_fpclass_fadd_ninf1(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fadd_ninf1( +; CHECK-NEXT: [[C:%.*]] = select ninf i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fadd float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fadd ninf float %A, %B + %D = select i1 %cond, float %C, float %A + ret float %D +} + +define float @select_fpclass_fadd_ninf2(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fadd_ninf2( +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fadd float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fadd float %A, %B + %D = select ninf i1 %cond, float %C, float %A + ret float %D +} + +define float @select_fpclass_fadd_ninf3(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fadd_ninf3( +; CHECK-NEXT: [[C:%.*]] = select ninf i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fadd ninf float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fadd ninf float %A, %B + %D = select ninf i1 %cond, float %C, float %A + ret float %D +} + +define float @select_fpclass_fadd_nnan_ninf(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fadd_nnan_ninf( +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fadd float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fadd float %A, %B + %D = select nnan ninf i1 %cond, float %C, float %A + ret float %D +} + define float @select_nnan_fadd(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fadd( ; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00 @@ -47,7 +91,7 @@ define float @select_nnan_fadd_swapped(i1 %cond, float %A, float %B) { define float @select_nnan_fadd_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fadd_fast_math( -; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00 +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00 ; CHECK-NEXT: [[D:%.*]] = fadd reassoc nnan arcp contract afn float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; @@ -58,7 +102,7 @@ define float @select_nnan_fadd_fast_math(i1 %cond, float %A, float %B) { define float @select_nnan_fadd_swapped_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fadd_swapped_fast_math( -; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]] ; CHECK-NEXT: [[D:%.*]] = fadd reassoc nnan arcp contract afn float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; @@ -124,7 +168,7 @@ define float @select_nnan_fmul_swapped(i1 %cond, float %A, float %B) { define float @select_nnan_fmul_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fmul_fast_math( -; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 ; CHECK-NEXT: [[D:%.*]] = fmul reassoc nnan arcp contract afn float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; @@ -135,7 +179,7 @@ define float @select_nnan_fmul_fast_math(i1 %cond, float %A, float %B) { define float @select_nnan_fmul_swapped_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fmul_swapped_fast_math( -; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]] ; CHECK-NEXT: [[D:%.*]] = fmul reassoc nnan arcp contract afn float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; @@ -144,6 +188,50 @@ define float @select_nnan_fmul_swapped_fast_math(i1 %cond, float %A, float %B) { ret float %D } +define float @select_fpclass_fmul_ninf1(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fmul_ninf1( +; CHECK-NEXT: [[C:%.*]] = select ninf i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fmul float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fmul ninf float %A, %B + %D = select i1 %cond, float %C, float %A + ret float %D +} + +define float @select_fpclass_fmul_ninf2(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fmul_ninf2( +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fmul float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fmul float %A, %B + %D = select ninf i1 %cond, float %C, float %A + ret float %D +} + +define float @select_fpclass_fmul_ninf3(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fmul_ninf3( +; CHECK-NEXT: [[C:%.*]] = select ninf i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fmul ninf float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fmul ninf float %A, %B + %D = select ninf i1 %cond, float %C, float %A + ret float %D +} + +define float @select_fpclass_fmul_nnan_ninf(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fmul_nnan_ninf( +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fmul float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fmul float %A, %B + %D = select nnan ninf i1 %cond, float %C, float %A + ret float %D +} + define float @select_nnan_fsub(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fsub( ; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00 @@ -168,7 +256,7 @@ define float @select_nnan_fsub_swapped(i1 %cond, float %A, float %B) { define float @select_nnan_fsub_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fsub_fast_math( -; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00 +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00 ; CHECK-NEXT: [[D:%.*]] = fsub reassoc nnan arcp contract afn float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; @@ -179,7 +267,7 @@ define float @select_nnan_fsub_fast_math(i1 %cond, float %A, float %B) { define float @select_nnan_fsub_swapped_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fsub_swapped_fast_math( -; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]] ; CHECK-NEXT: [[D:%.*]] = fsub reassoc nnan arcp contract afn float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; @@ -188,6 +276,50 @@ define float @select_nnan_fsub_swapped_fast_math(i1 %cond, float %A, float %B) { ret float %D } +define float @select_fpclass_fsub_ninf1(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fsub_ninf1( +; CHECK-NEXT: [[C:%.*]] = select ninf i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fsub float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fsub ninf float %A, %B + %D = select i1 %cond, float %C, float %A + ret float %D +} + +define float @select_fpclass_fsub_ninf2(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fsub_ninf2( +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fsub float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fsub float %A, %B + %D = select ninf i1 %cond, float %C, float %A + ret float %D +} + +define float @select_fpclass_fsub_ninf3(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fsub_ninf3( +; CHECK-NEXT: [[C:%.*]] = select ninf i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fsub ninf float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fsub ninf float %A, %B + %D = select ninf i1 %cond, float %C, float %A + ret float %D +} + +define float @select_fpclass_fsub_nnan_ninf(i1 %cond, float nofpclass(nan) %A, float %B) { +; CHECK-LABEL: @select_fpclass_fsub_nnan_ninf( +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fsub float [[A:%.*]], [[C]] +; CHECK-NEXT: ret float [[D]] +; + %C = fsub float %A, %B + %D = select nnan ninf i1 %cond, float %C, float %A + ret float %D +} + define <4 x float> @select_nnan_nsz_fsub_v4f32(<4 x i1> %cond, <4 x float> %A, <4 x float> %B) { ; CHECK-LABEL: @select_nnan_nsz_fsub_v4f32( ; CHECK-NEXT: [[C:%.*]] = select nnan nsz <4 x i1> [[COND:%.*]], <4 x float> [[B:%.*]], <4 x float> zeroinitializer @@ -246,7 +378,7 @@ define float @select_nnan_fdiv_swapped(i1 %cond, float %A, float %B) { define float @select_nnan_fdiv_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fdiv_fast_math( -; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 ; CHECK-NEXT: [[D:%.*]] = fdiv reassoc nnan arcp contract afn float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; @@ -257,7 +389,7 @@ define float @select_nnan_fdiv_fast_math(i1 %cond, float %A, float %B) { define float @select_nnan_fdiv_swapped_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_nnan_fdiv_swapped_fast_math( -; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[C:%.*]] = select nnan ninf i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]] ; CHECK-NEXT: [[D:%.*]] = fdiv reassoc nnan arcp contract afn float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; diff --git a/llvm/test/Transforms/LoopSimplifyCFG/pr117537.ll b/llvm/test/Transforms/LoopSimplifyCFG/pr117537.ll index df1399d..a8db6a0 100644 --- a/llvm/test/Transforms/LoopSimplifyCFG/pr117537.ll +++ b/llvm/test/Transforms/LoopSimplifyCFG/pr117537.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes='print<scalar-evolution>,loop-mssa(licm,loop-simplifycfg,loop-predication)' -verify-scev < %s 2>/dev/null | FileCheck %s +; RUN: opt -S -passes='print<scalar-evolution>,loop-mssa(licm,loop-simplifycfg,loop-predication)' -verify-scev < %s | FileCheck %s ; Make sure we don't assert due to insufficient SCEV invalidation. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll index 757d9e7..803ffa8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll @@ -1,42 +1,81 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 ; RUN: opt -passes="loop-vectorize" -pass-remarks-output=%t.yaml -S %s | FileCheck %s ; RUN: FileCheck --input-file=%t.yaml --check-prefix=REMARKS %s -; REMARKS: the cost-model indicates that vectorization is not beneficial +target triple = "arm64-apple-macosx" -; Test for https://github.com/llvm/llvm-project/issues/116375. -define void @test_i24_load_for(ptr noalias %src, ptr %dst) { -; CHECK-LABEL: define void @test_i24_load_for( -; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi i24 [ 0, %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i24, ptr [[SRC]], i16 [[IV]] -; CHECK-NEXT: [[FOR_NEXT]] = load i24, ptr [[GEP_SRC]], align 1 -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i24, ptr [[DST]], i16 [[IV]] -; CHECK-NEXT: store i24 [[FOR]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; REMARKS: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): load +; Test case for https://github.com/llvm/llvm-project/issues/160792. +define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noalias %c) #0 { +; CHECK-LABEL: define void @replicate_sdiv_conditional( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP6]], i32 4, <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> poison) +; CHECK-NEXT: [[TMP7:%.*]] = sext <vscale x 4 x i32> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = ashr <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[TMP8]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP10:%.*]] = sext <vscale x 4 x i32> [[TMP9]] to <vscale x 4 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i64> [[TMP7]], <vscale x 4 x i64> splat (i64 1) +; CHECK-NEXT: [[TMP12:%.*]] = sdiv <vscale x 4 x i64> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = trunc <vscale x 4 x i64> [[TMP12]] to <vscale x 4 x i32> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <vscale x 4 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: ; entry: - br label %loop + br label %loop.header -loop: - %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] - %for = phi i24 [ 0, %entry ], [ %for.next, %loop ] - %iv.next = add i16 %iv, 1 - %gep.src = getelementptr inbounds i24, ptr %src, i16 %iv - %for.next = load i24, ptr %gep.src, align 1 - %gep.dst = getelementptr inbounds i24, ptr %dst, i16 %iv - store i24 %for, ptr %gep.dst - %ec = icmp eq i16 %iv.next, 1000 - br i1 %ec, label %exit, label %loop +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + %val.c = load i32, ptr %gep.c, align 4 + %cmp = icmp slt i32 %val.c, 0 + br i1 %cmp, label %if.then, label %loop.latch -exit: +if.then: + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %val.b = load i32, ptr %gep.b, align 4 + %sext = sext i32 %val.b to i64 + %shr = ashr i32 %val.b, 1 + %add = add i32 %shr, %val.c + %conv = sext i32 %add to i64 + %div = sdiv i64 %conv, %sext + %trunc = trunc i64 %div to i32 + br label %loop.latch + +loop.latch: + %result = phi i32 [ %trunc, %if.then ], [ %val.c, %loop.header ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %result, ptr %gep.a, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exit = icmp eq i64 %iv.next, 64 + br i1 %exit, label %for.end, label %loop.header + +for.end: ret void } + +attributes #0 = { "target-cpu"="neoverse-512tvb" } diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll index e154883c..9dbbf4c 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll @@ -45,7 +45,7 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 % ; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD7]] ; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP20]], <4 x float> [[TMP9]], <4 x float> splat (float -0.000000e+00) +; CHECK-NEXT: [[TMP10:%.*]] = select ninf <4 x i1> [[TMP20]], <4 x float> [[TMP9]], <4 x float> splat (float -0.000000e+00) ; CHECK-NEXT: [[PREDPHI]] = fadd reassoc arcp contract afn <4 x float> [[VEC_PHI]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/invalid-costs.ll new file mode 100644 index 0000000..757d9e7 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/invalid-costs.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="loop-vectorize" -pass-remarks-output=%t.yaml -S %s | FileCheck %s +; RUN: FileCheck --input-file=%t.yaml --check-prefix=REMARKS %s + +; REMARKS: the cost-model indicates that vectorization is not beneficial + +; Test for https://github.com/llvm/llvm-project/issues/116375. +define void @test_i24_load_for(ptr noalias %src, ptr %dst) { +; CHECK-LABEL: define void @test_i24_load_for( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i24 [ 0, %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i24, ptr [[SRC]], i16 [[IV]] +; CHECK-NEXT: [[FOR_NEXT]] = load i24, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i24, ptr [[DST]], i16 [[IV]] +; CHECK-NEXT: store i24 [[FOR]], ptr [[GEP_DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] + %for = phi i24 [ 0, %entry ], [ %for.next, %loop ] + %iv.next = add i16 %iv, 1 + %gep.src = getelementptr inbounds i24, ptr %src, i16 %iv + %for.next = load i24, ptr %gep.src, align 1 + %gep.dst = getelementptr inbounds i24, ptr %dst, i16 %iv + store i24 %for, ptr %gep.dst + %ec = icmp eq i16 %iv.next, 1000 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll index e8709a5..55adda7 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll @@ -41,12 +41,12 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef ; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt <4 x double> [[TMP7]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x double> [[TMP6]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <4 x double> [[TMP7]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP8]], <4 x double> [[TMP6]], <4 x double> splat (double -0.000000e+00) -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP9]], <4 x double> [[TMP7]], <4 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP12:%.*]] = select ninf <4 x i1> [[TMP8]], <4 x double> [[TMP6]], <4 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP13:%.*]] = select ninf <4 x i1> [[TMP9]], <4 x double> [[TMP7]], <4 x double> splat (double -0.000000e+00) ; CHECK-NEXT: [[TMP14]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI16]], [[TMP12]] ; CHECK-NEXT: [[TMP15]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI17]], [[TMP13]] -; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP8]], <4 x double> [[TMP10]], <4 x double> splat (double -0.000000e+00) -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP9]], <4 x double> [[TMP11]], <4 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP16:%.*]] = select ninf <4 x i1> [[TMP8]], <4 x double> [[TMP10]], <4 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP17:%.*]] = select ninf <4 x i1> [[TMP9]], <4 x double> [[TMP11]], <4 x double> splat (double -0.000000e+00) ; CHECK-NEXT: [[TMP18]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI]], [[TMP16]] ; CHECK-NEXT: [[TMP19]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI15]], [[TMP17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV1]], 8 @@ -75,9 +75,9 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef ; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]] ; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00 ; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]] -; CHECK-NEXT: [[ADD8:%.*]] = select i1 [[CMP1]], double [[SUB]], double -0.000000e+00 +; CHECK-NEXT: [[ADD8:%.*]] = select ninf i1 [[CMP1]], double [[SUB]], double -0.000000e+00 ; CHECK-NEXT: [[V0_2]] = fadd reassoc arcp contract afn double [[V0_011]], [[ADD8]] -; CHECK-NEXT: [[ADD4:%.*]] = select i1 [[CMP1]], double [[MUL3]], double -0.000000e+00 +; CHECK-NEXT: [[ADD4:%.*]] = select ninf i1 [[CMP1]], double [[MUL3]], double -0.000000e+00 ; CHECK-NEXT: [[V1_2]] = fadd reassoc arcp contract afn double [[V1_012]], [[ADD4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] @@ -229,12 +229,12 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt <4 x double> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[TMP10]], [[TMP10]] ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[TMP11]], [[TMP11]] -; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP12]], <4 x double> [[TMP10]], <4 x double> splat (double -0.000000e+00) -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP13]], <4 x double> [[TMP11]], <4 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP16:%.*]] = select ninf <4 x i1> [[TMP12]], <4 x double> [[TMP10]], <4 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP17:%.*]] = select ninf <4 x i1> [[TMP13]], <4 x double> [[TMP11]], <4 x double> splat (double -0.000000e+00) ; CHECK-NEXT: [[TMP18]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI32]], [[TMP16]] ; CHECK-NEXT: [[TMP19]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI33]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP12]], <4 x double> [[TMP14]], <4 x double> splat (double -0.000000e+00) -; CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP13]], <4 x double> [[TMP15]], <4 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP20:%.*]] = select ninf <4 x i1> [[TMP12]], <4 x double> [[TMP14]], <4 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP21:%.*]] = select ninf <4 x i1> [[TMP13]], <4 x double> [[TMP15]], <4 x double> splat (double -0.000000e+00) ; CHECK-NEXT: [[TMP22]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI]], [[TMP20]] ; CHECK-NEXT: [[TMP23]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI31]], [[TMP21]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV1]], 8 @@ -263,9 +263,9 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R ; CHECK-NEXT: [[SUB_US:%.*]] = fsub fast double [[MUL_US]], [[Z]] ; CHECK-NEXT: [[CMP4_US:%.*]] = fcmp fast ogt double [[SUB_US]], 0.000000e+00 ; CHECK-NEXT: [[ADD7_US:%.*]] = fmul fast double [[SUB_US]], [[SUB_US]] -; CHECK-NEXT: [[ADD12_US:%.*]] = select i1 [[CMP4_US]], double [[SUB_US]], double -0.000000e+00 +; CHECK-NEXT: [[ADD12_US:%.*]] = select ninf i1 [[CMP4_US]], double [[SUB_US]], double -0.000000e+00 ; CHECK-NEXT: [[V0_2_US]] = fadd reassoc arcp contract afn double [[V0_115_US]], [[ADD12_US]] -; CHECK-NEXT: [[ADD7_US1:%.*]] = select i1 [[CMP4_US]], double [[ADD7_US]], double -0.000000e+00 +; CHECK-NEXT: [[ADD7_US1:%.*]] = select ninf i1 [[CMP4_US]], double [[ADD7_US]], double -0.000000e+00 ; CHECK-NEXT: [[V1_2_US]] = fadd reassoc arcp contract afn double [[V1_116_US]], [[ADD7_US1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND25_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/x264-satd-8x4.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/x264-satd-8x4.ll new file mode 100644 index 0000000..c1042f18 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/x264-satd-8x4.ll @@ -0,0 +1,526 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=riscv64 -mattr=+m,+v,+unaligned-vector-mem \ +; RUN: -passes=slp-vectorizer -S < %s | FileCheck %s +; Function Attrs: nounwind uwtable vscale_range(8,1024) +define i32 @x264_pixel_satd_8x4(ptr %pix1, i32 %i_pix1, ptr %pix2, i32 %i_pix2) { +; CHECK-LABEL: define i32 @x264_pixel_satd_8x4( +; CHECK-SAME: ptr [[PIX1:%.*]], i32 [[I_PIX1:%.*]], ptr [[PIX2:%.*]], i32 [[I_PIX2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[I_PIX1]] to i64 +; CHECK-NEXT: [[IDX_EXT63:%.*]] = sext i32 [[I_PIX2]] to i64 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[PIX1]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[PIX2]], i64 4 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[PIX1]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[PIX2]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64]], i64 4 +; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR_1]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64_1]], i64 4 +; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR_2]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64_2]], i64 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x i8> [[TMP17]], <16 x i8> [[TMP18]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; CHECK-NEXT: [[TMP20:%.*]] = zext <16 x i8> [[TMP19]] to <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP24]], <16 x i8> [[TMP25]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; CHECK-NEXT: [[TMP29:%.*]] = zext <16 x i8> [[TMP28]] to <16 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i32> [[TMP20]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP31]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; CHECK-NEXT: [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP40]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; CHECK-NEXT: [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = sub nsw <16 x i32> [[TMP39]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = shl nsw <16 x i32> [[TMP49]], splat (i32 16) +; CHECK-NEXT: [[TMP51:%.*]] = add nsw <16 x i32> [[TMP50]], [[TMP30]] +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> +; CHECK-NEXT: [[TMP53:%.*]] = add nsw <16 x i32> [[TMP52]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = sub nsw <16 x i32> [[TMP52]], [[TMP51]] +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> +; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 20, i32 21, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 29, i32 14, i32 15> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11> +; CHECK-NEXT: [[TMP61:%.*]] = sub nsw <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP63]], <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[TMP65:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = sub nsw <16 x i32> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP66]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEXT: [[TMP68:%.*]] = lshr <16 x i32> [[TMP67]], splat (i32 15) +; CHECK-NEXT: [[TMP69:%.*]] = and <16 x i32> [[TMP68]], splat (i32 65537) +; CHECK-NEXT: [[TMP70:%.*]] = mul nuw <16 x i32> [[TMP69]], splat (i32 65535) +; CHECK-NEXT: [[TMP71:%.*]] = add <16 x i32> [[TMP70]], [[TMP67]] +; CHECK-NEXT: [[TMP72:%.*]] = xor <16 x i32> [[TMP71]], [[TMP70]] +; CHECK-NEXT: [[TMP73:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP72]]) +; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP73]], 65535 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP73]], 16 +; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]] +; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 +; CHECK-NEXT: ret i32 [[SHR120]] +; +entry: + %idx.ext = sext i32 %i_pix1 to i64 + %idx.ext63 = sext i32 %i_pix2 to i64 + %0 = load i8, ptr %pix1, align 1 + %conv = zext i8 %0 to i32 + %1 = load i8, ptr %pix2, align 1 + %conv2 = zext i8 %1 to i32 + %sub = sub nsw i32 %conv, %conv2 + %arrayidx3 = getelementptr inbounds nuw i8, ptr %pix1, i64 4 + %2 = load i8, ptr %arrayidx3, align 1 + %conv4 = zext i8 %2 to i32 + %arrayidx5 = getelementptr inbounds nuw i8, ptr %pix2, i64 4 + %3 = load i8, ptr %arrayidx5, align 1 + %conv6 = zext i8 %3 to i32 + %sub7 = sub nsw i32 %conv4, %conv6 + %shl = shl nsw i32 %sub7, 16 + %add = add nsw i32 %shl, %sub + %arrayidx8 = getelementptr inbounds nuw i8, ptr %pix1, i64 1 + %4 = load i8, ptr %arrayidx8, align 1 + %conv9 = zext i8 %4 to i32 + %arrayidx10 = getelementptr inbounds nuw i8, ptr %pix2, i64 1 + %5 = load i8, ptr %arrayidx10, align 1 + %conv11 = zext i8 %5 to i32 + %sub12 = sub nsw i32 %conv9, %conv11 + %arrayidx13 = getelementptr inbounds nuw i8, ptr %pix1, i64 5 + %6 = load i8, ptr %arrayidx13, align 1 + %conv14 = zext i8 %6 to i32 + %arrayidx15 = getelementptr inbounds nuw i8, ptr %pix2, i64 5 + %7 = load i8, ptr %arrayidx15, align 1 + %conv16 = zext i8 %7 to i32 + %sub17 = sub nsw i32 %conv14, %conv16 + %shl18 = shl nsw i32 %sub17, 16 + %add19 = add nsw i32 %shl18, %sub12 + %arrayidx20 = getelementptr inbounds nuw i8, ptr %pix1, i64 2 + %8 = load i8, ptr %arrayidx20, align 1 + %conv21 = zext i8 %8 to i32 + %arrayidx22 = getelementptr inbounds nuw i8, ptr %pix2, i64 2 + %9 = load i8, ptr %arrayidx22, align 1 + %conv23 = zext i8 %9 to i32 + %sub24 = sub nsw i32 %conv21, %conv23 + %arrayidx25 = getelementptr inbounds nuw i8, ptr %pix1, i64 6 + %10 = load i8, ptr %arrayidx25, align 1 + %conv26 = zext i8 %10 to i32 + %arrayidx27 = getelementptr inbounds nuw i8, ptr %pix2, i64 6 + %11 = load i8, ptr %arrayidx27, align 1 + %conv28 = zext i8 %11 to i32 + %sub29 = sub nsw i32 %conv26, %conv28 + %shl30 = shl nsw i32 %sub29, 16 + %add31 = add nsw i32 %shl30, %sub24 + %arrayidx32 = getelementptr inbounds nuw i8, ptr %pix1, i64 3 + %12 = load i8, ptr %arrayidx32, align 1 + %conv33 = zext i8 %12 to i32 + %arrayidx34 = getelementptr inbounds nuw i8, ptr %pix2, i64 3 + %13 = load i8, ptr %arrayidx34, align 1 + %conv35 = zext i8 %13 to i32 + %sub36 = sub nsw i32 %conv33, %conv35 + %arrayidx37 = getelementptr inbounds nuw i8, ptr %pix1, i64 7 + %14 = load i8, ptr %arrayidx37, align 1 + %conv38 = zext i8 %14 to i32 + %arrayidx39 = getelementptr inbounds nuw i8, ptr %pix2, i64 7 + %15 = load i8, ptr %arrayidx39, align 1 + %conv40 = zext i8 %15 to i32 + %sub41 = sub nsw i32 %conv38, %conv40 + %shl42 = shl nsw i32 %sub41, 16 + %add43 = add nsw i32 %shl42, %sub36 + %add44 = add nsw i32 %add19, %add + %sub45 = sub nsw i32 %add, %add19 + %add46 = add nsw i32 %add43, %add31 + %sub47 = sub nsw i32 %add31, %add43 + %add48 = add nsw i32 %add46, %add44 + %sub51 = sub nsw i32 %add44, %add46 + %add55 = add nsw i32 %sub47, %sub45 + %sub59 = sub nsw i32 %sub45, %sub47 + %add.ptr = getelementptr inbounds i8, ptr %pix1, i64 %idx.ext + %add.ptr64 = getelementptr inbounds i8, ptr %pix2, i64 %idx.ext63 + %16 = load i8, ptr %add.ptr, align 1 + %conv.1 = zext i8 %16 to i32 + %17 = load i8, ptr %add.ptr64, align 1 + %conv2.1 = zext i8 %17 to i32 + %sub.1 = sub nsw i32 %conv.1, %conv2.1 + %arrayidx3.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 4 + %18 = load i8, ptr %arrayidx3.1, align 1 + %conv4.1 = zext i8 %18 to i32 + %arrayidx5.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 4 + %19 = load i8, ptr %arrayidx5.1, align 1 + %conv6.1 = zext i8 %19 to i32 + %sub7.1 = sub nsw i32 %conv4.1, %conv6.1 + %shl.1 = shl nsw i32 %sub7.1, 16 + %add.1 = add nsw i32 %shl.1, %sub.1 + %arrayidx8.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 1 + %20 = load i8, ptr %arrayidx8.1, align 1 + %conv9.1 = zext i8 %20 to i32 + %arrayidx10.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 1 + %21 = load i8, ptr %arrayidx10.1, align 1 + %conv11.1 = zext i8 %21 to i32 + %sub12.1 = sub nsw i32 %conv9.1, %conv11.1 + %arrayidx13.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 5 + %22 = load i8, ptr %arrayidx13.1, align 1 + %conv14.1 = zext i8 %22 to i32 + %arrayidx15.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 5 + %23 = load i8, ptr %arrayidx15.1, align 1 + %conv16.1 = zext i8 %23 to i32 + %sub17.1 = sub nsw i32 %conv14.1, %conv16.1 + %shl18.1 = shl nsw i32 %sub17.1, 16 + %add19.1 = add nsw i32 %shl18.1, %sub12.1 + %arrayidx20.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 2 + %24 = load i8, ptr %arrayidx20.1, align 1 + %conv21.1 = zext i8 %24 to i32 + %arrayidx22.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 2 + %25 = load i8, ptr %arrayidx22.1, align 1 + %conv23.1 = zext i8 %25 to i32 + %sub24.1 = sub nsw i32 %conv21.1, %conv23.1 + %arrayidx25.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 6 + %26 = load i8, ptr %arrayidx25.1, align 1 + %conv26.1 = zext i8 %26 to i32 + %arrayidx27.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 6 + %27 = load i8, ptr %arrayidx27.1, align 1 + %conv28.1 = zext i8 %27 to i32 + %sub29.1 = sub nsw i32 %conv26.1, %conv28.1 + %shl30.1 = shl nsw i32 %sub29.1, 16 + %add31.1 = add nsw i32 %shl30.1, %sub24.1 + %arrayidx32.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 3 + %28 = load i8, ptr %arrayidx32.1, align 1 + %conv33.1 = zext i8 %28 to i32 + %arrayidx34.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 3 + %29 = load i8, ptr %arrayidx34.1, align 1 + %conv35.1 = zext i8 %29 to i32 + %sub36.1 = sub nsw i32 %conv33.1, %conv35.1 + %arrayidx37.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 7 + %30 = load i8, ptr %arrayidx37.1, align 1 + %conv38.1 = zext i8 %30 to i32 + %arrayidx39.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 7 + %31 = load i8, ptr %arrayidx39.1, align 1 + %conv40.1 = zext i8 %31 to i32 + %sub41.1 = sub nsw i32 %conv38.1, %conv40.1 + %shl42.1 = shl nsw i32 %sub41.1, 16 + %add43.1 = add nsw i32 %shl42.1, %sub36.1 + %add44.1 = add nsw i32 %add19.1, %add.1 + %sub45.1 = sub nsw i32 %add.1, %add19.1 + %add46.1 = add nsw i32 %add43.1, %add31.1 + %sub47.1 = sub nsw i32 %add31.1, %add43.1 + %add48.1 = add nsw i32 %add46.1, %add44.1 + %sub51.1 = sub nsw i32 %add44.1, %add46.1 + %add55.1 = add nsw i32 %sub47.1, %sub45.1 + %sub59.1 = sub nsw i32 %sub45.1, %sub47.1 + %add.ptr.1 = getelementptr inbounds i8, ptr %add.ptr, i64 %idx.ext + %add.ptr64.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 %idx.ext63 + %32 = load i8, ptr %add.ptr.1, align 1 + %conv.2 = zext i8 %32 to i32 + %33 = load i8, ptr %add.ptr64.1, align 1 + %conv2.2 = zext i8 %33 to i32 + %sub.2 = sub nsw i32 %conv.2, %conv2.2 + %arrayidx3.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 4 + %34 = load i8, ptr %arrayidx3.2, align 1 + %conv4.2 = zext i8 %34 to i32 + %arrayidx5.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 4 + %35 = load i8, ptr %arrayidx5.2, align 1 + %conv6.2 = zext i8 %35 to i32 + %sub7.2 = sub nsw i32 %conv4.2, %conv6.2 + %shl.2 = shl nsw i32 %sub7.2, 16 + %add.2 = add nsw i32 %shl.2, %sub.2 + %arrayidx8.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 1 + %36 = load i8, ptr %arrayidx8.2, align 1 + %conv9.2 = zext i8 %36 to i32 + %arrayidx10.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 1 + %37 = load i8, ptr %arrayidx10.2, align 1 + %conv11.2 = zext i8 %37 to i32 + %sub12.2 = sub nsw i32 %conv9.2, %conv11.2 + %arrayidx13.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 5 + %38 = load i8, ptr %arrayidx13.2, align 1 + %conv14.2 = zext i8 %38 to i32 + %arrayidx15.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 5 + %39 = load i8, ptr %arrayidx15.2, align 1 + %conv16.2 = zext i8 %39 to i32 + %sub17.2 = sub nsw i32 %conv14.2, %conv16.2 + %shl18.2 = shl nsw i32 %sub17.2, 16 + %add19.2 = add nsw i32 %shl18.2, %sub12.2 + %arrayidx20.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 2 + %40 = load i8, ptr %arrayidx20.2, align 1 + %conv21.2 = zext i8 %40 to i32 + %arrayidx22.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 2 + %41 = load i8, ptr %arrayidx22.2, align 1 + %conv23.2 = zext i8 %41 to i32 + %sub24.2 = sub nsw i32 %conv21.2, %conv23.2 + %arrayidx25.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 6 + %42 = load i8, ptr %arrayidx25.2, align 1 + %conv26.2 = zext i8 %42 to i32 + %arrayidx27.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 6 + %43 = load i8, ptr %arrayidx27.2, align 1 + %conv28.2 = zext i8 %43 to i32 + %sub29.2 = sub nsw i32 %conv26.2, %conv28.2 + %shl30.2 = shl nsw i32 %sub29.2, 16 + %add31.2 = add nsw i32 %shl30.2, %sub24.2 + %arrayidx32.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 3 + %44 = load i8, ptr %arrayidx32.2, align 1 + %conv33.2 = zext i8 %44 to i32 + %arrayidx34.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 3 + %45 = load i8, ptr %arrayidx34.2, align 1 + %conv35.2 = zext i8 %45 to i32 + %sub36.2 = sub nsw i32 %conv33.2, %conv35.2 + %arrayidx37.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 7 + %46 = load i8, ptr %arrayidx37.2, align 1 + %conv38.2 = zext i8 %46 to i32 + %arrayidx39.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 7 + %47 = load i8, ptr %arrayidx39.2, align 1 + %conv40.2 = zext i8 %47 to i32 + %sub41.2 = sub nsw i32 %conv38.2, %conv40.2 + %shl42.2 = shl nsw i32 %sub41.2, 16 + %add43.2 = add nsw i32 %shl42.2, %sub36.2 + %add44.2 = add nsw i32 %add19.2, %add.2 + %sub45.2 = sub nsw i32 %add.2, %add19.2 + %add46.2 = add nsw i32 %add43.2, %add31.2 + %sub47.2 = sub nsw i32 %add31.2, %add43.2 + %add48.2 = add nsw i32 %add46.2, %add44.2 + %sub51.2 = sub nsw i32 %add44.2, %add46.2 + %add55.2 = add nsw i32 %sub47.2, %sub45.2 + %sub59.2 = sub nsw i32 %sub45.2, %sub47.2 + %add.ptr.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 %idx.ext + %add.ptr64.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 %idx.ext63 + %48 = load i8, ptr %add.ptr.2, align 1 + %conv.3 = zext i8 %48 to i32 + %49 = load i8, ptr %add.ptr64.2, align 1 + %conv2.3 = zext i8 %49 to i32 + %sub.3 = sub nsw i32 %conv.3, %conv2.3 + %arrayidx3.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 4 + %50 = load i8, ptr %arrayidx3.3, align 1 + %conv4.3 = zext i8 %50 to i32 + %arrayidx5.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 4 + %51 = load i8, ptr %arrayidx5.3, align 1 + %conv6.3 = zext i8 %51 to i32 + %sub7.3 = sub nsw i32 %conv4.3, %conv6.3 + %shl.3 = shl nsw i32 %sub7.3, 16 + %add.3 = add nsw i32 %shl.3, %sub.3 + %arrayidx8.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 1 + %52 = load i8, ptr %arrayidx8.3, align 1 + %conv9.3 = zext i8 %52 to i32 + %arrayidx10.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 1 + %53 = load i8, ptr %arrayidx10.3, align 1 + %conv11.3 = zext i8 %53 to i32 + %sub12.3 = sub nsw i32 %conv9.3, %conv11.3 + %arrayidx13.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 5 + %54 = load i8, ptr %arrayidx13.3, align 1 + %conv14.3 = zext i8 %54 to i32 + %arrayidx15.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 5 + %55 = load i8, ptr %arrayidx15.3, align 1 + %conv16.3 = zext i8 %55 to i32 + %sub17.3 = sub nsw i32 %conv14.3, %conv16.3 + %shl18.3 = shl nsw i32 %sub17.3, 16 + %add19.3 = add nsw i32 %shl18.3, %sub12.3 + %arrayidx20.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 2 + %56 = load i8, ptr %arrayidx20.3, align 1 + %conv21.3 = zext i8 %56 to i32 + %arrayidx22.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 2 + %57 = load i8, ptr %arrayidx22.3, align 1 + %conv23.3 = zext i8 %57 to i32 + %sub24.3 = sub nsw i32 %conv21.3, %conv23.3 + %arrayidx25.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 6 + %58 = load i8, ptr %arrayidx25.3, align 1 + %conv26.3 = zext i8 %58 to i32 + %arrayidx27.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 6 + %59 = load i8, ptr %arrayidx27.3, align 1 + %conv28.3 = zext i8 %59 to i32 + %sub29.3 = sub nsw i32 %conv26.3, %conv28.3 + %shl30.3 = shl nsw i32 %sub29.3, 16 + %add31.3 = add nsw i32 %shl30.3, %sub24.3 + %arrayidx32.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 3 + %60 = load i8, ptr %arrayidx32.3, align 1 + %conv33.3 = zext i8 %60 to i32 + %arrayidx34.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 3 + %61 = load i8, ptr %arrayidx34.3, align 1 + %conv35.3 = zext i8 %61 to i32 + %sub36.3 = sub nsw i32 %conv33.3, %conv35.3 + %arrayidx37.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 7 + %62 = load i8, ptr %arrayidx37.3, align 1 + %conv38.3 = zext i8 %62 to i32 + %arrayidx39.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 7 + %63 = load i8, ptr %arrayidx39.3, align 1 + %conv40.3 = zext i8 %63 to i32 + %sub41.3 = sub nsw i32 %conv38.3, %conv40.3 + %shl42.3 = shl nsw i32 %sub41.3, 16 + %add43.3 = add nsw i32 %shl42.3, %sub36.3 + %add44.3 = add nsw i32 %add19.3, %add.3 + %sub45.3 = sub nsw i32 %add.3, %add19.3 + %add46.3 = add nsw i32 %add43.3, %add31.3 + %sub47.3 = sub nsw i32 %add31.3, %add43.3 + %add48.3 = add nsw i32 %add46.3, %add44.3 + %sub51.3 = sub nsw i32 %add44.3, %add46.3 + %add55.3 = add nsw i32 %sub47.3, %sub45.3 + %sub59.3 = sub nsw i32 %sub45.3, %sub47.3 + %add78 = add nsw i32 %add48.1, %add48 + %sub86 = sub nsw i32 %add48, %add48.1 + %add94 = add nsw i32 %add48.3, %add48.2 + %sub102 = sub nsw i32 %add48.2, %add48.3 + %add103 = add nsw i32 %add94, %add78 + %sub104 = sub nsw i32 %add78, %add94 + %add105 = add nsw i32 %sub102, %sub86 + %sub106 = sub nsw i32 %sub86, %sub102 + %shr.i = lshr i32 %add103, 15 + %and.i = and i32 %shr.i, 65537 + %mul.i = mul nuw i32 %and.i, 65535 + %add.i = add i32 %mul.i, %add103 + %xor.i = xor i32 %add.i, %mul.i + %shr.i169 = lshr i32 %add105, 15 + %and.i170 = and i32 %shr.i169, 65537 + %mul.i171 = mul nuw i32 %and.i170, 65535 + %add.i172 = add i32 %mul.i171, %add105 + %xor.i173 = xor i32 %add.i172, %mul.i171 + %shr.i174 = lshr i32 %sub104, 15 + %and.i175 = and i32 %shr.i174, 65537 + %mul.i176 = mul nuw i32 %and.i175, 65535 + %add.i177 = add i32 %mul.i176, %sub104 + %xor.i178 = xor i32 %add.i177, %mul.i176 + %shr.i179 = lshr i32 %sub106, 15 + %and.i180 = and i32 %shr.i179, 65537 + %mul.i181 = mul nuw i32 %and.i180, 65535 + %add.i182 = add i32 %mul.i181, %sub106 + %xor.i183 = xor i32 %add.i182, %mul.i181 + %add110 = add i32 %xor.i173, %xor.i + %add112 = add i32 %add110, %xor.i178 + %add113 = add i32 %add112, %xor.i183 + %add78.1 = add nsw i32 %add55.1, %add55 + %sub86.1 = sub nsw i32 %add55, %add55.1 + %add94.1 = add nsw i32 %add55.3, %add55.2 + %sub102.1 = sub nsw i32 %add55.2, %add55.3 + %add103.1 = add nsw i32 %add94.1, %add78.1 + %sub104.1 = sub nsw i32 %add78.1, %add94.1 + %add105.1 = add nsw i32 %sub102.1, %sub86.1 + %sub106.1 = sub nsw i32 %sub86.1, %sub102.1 + %shr.i.1 = lshr i32 %add103.1, 15 + %and.i.1 = and i32 %shr.i.1, 65537 + %mul.i.1 = mul nuw i32 %and.i.1, 65535 + %add.i.1 = add i32 %mul.i.1, %add103.1 + %xor.i.1 = xor i32 %add.i.1, %mul.i.1 + %shr.i169.1 = lshr i32 %add105.1, 15 + %and.i170.1 = and i32 %shr.i169.1, 65537 + %mul.i171.1 = mul nuw i32 %and.i170.1, 65535 + %add.i172.1 = add i32 %mul.i171.1, %add105.1 + %xor.i173.1 = xor i32 %add.i172.1, %mul.i171.1 + %shr.i174.1 = lshr i32 %sub104.1, 15 + %and.i175.1 = and i32 %shr.i174.1, 65537 + %mul.i176.1 = mul nuw i32 %and.i175.1, 65535 + %add.i177.1 = add i32 %mul.i176.1, %sub104.1 + %xor.i178.1 = xor i32 %add.i177.1, %mul.i176.1 + %shr.i179.1 = lshr i32 %sub106.1, 15 + %and.i180.1 = and i32 %shr.i179.1, 65537 + %mul.i181.1 = mul nuw i32 %and.i180.1, 65535 + %add.i182.1 = add i32 %mul.i181.1, %sub106.1 + %xor.i183.1 = xor i32 %add.i182.1, %mul.i181.1 + %add108.1 = add i32 %xor.i173.1, %add113 + %add110.1 = add i32 %add108.1, %xor.i.1 + %add112.1 = add i32 %add110.1, %xor.i178.1 + %add113.1 = add i32 %add112.1, %xor.i183.1 + %add78.2 = add nsw i32 %sub51.1, %sub51 + %sub86.2 = sub nsw i32 %sub51, %sub51.1 + %add94.2 = add nsw i32 %sub51.3, %sub51.2 + %sub102.2 = sub nsw i32 %sub51.2, %sub51.3 + %add103.2 = add nsw i32 %add94.2, %add78.2 + %sub104.2 = sub nsw i32 %add78.2, %add94.2 + %add105.2 = add nsw i32 %sub102.2, %sub86.2 + %sub106.2 = sub nsw i32 %sub86.2, %sub102.2 + %shr.i.2 = lshr i32 %add103.2, 15 + %and.i.2 = and i32 %shr.i.2, 65537 + %mul.i.2 = mul nuw i32 %and.i.2, 65535 + %add.i.2 = add i32 %mul.i.2, %add103.2 + %xor.i.2 = xor i32 %add.i.2, %mul.i.2 + %shr.i169.2 = lshr i32 %add105.2, 15 + %and.i170.2 = and i32 %shr.i169.2, 65537 + %mul.i171.2 = mul nuw i32 %and.i170.2, 65535 + %add.i172.2 = add i32 %mul.i171.2, %add105.2 + %xor.i173.2 = xor i32 %add.i172.2, %mul.i171.2 + %shr.i174.2 = lshr i32 %sub104.2, 15 + %and.i175.2 = and i32 %shr.i174.2, 65537 + %mul.i176.2 = mul nuw i32 %and.i175.2, 65535 + %add.i177.2 = add i32 %mul.i176.2, %sub104.2 + %xor.i178.2 = xor i32 %add.i177.2, %mul.i176.2 + %shr.i179.2 = lshr i32 %sub106.2, 15 + %and.i180.2 = and i32 %shr.i179.2, 65537 + %mul.i181.2 = mul nuw i32 %and.i180.2, 65535 + %add.i182.2 = add i32 %mul.i181.2, %sub106.2 + %xor.i183.2 = xor i32 %add.i182.2, %mul.i181.2 + %add108.2 = add i32 %xor.i173.2, %add113.1 + %add110.2 = add i32 %add108.2, %xor.i.2 + %add112.2 = add i32 %add110.2, %xor.i178.2 + %add113.2 = add i32 %add112.2, %xor.i183.2 + %add78.3 = add nsw i32 %sub59.1, %sub59 + %sub86.3 = sub nsw i32 %sub59, %sub59.1 + %add94.3 = add nsw i32 %sub59.3, %sub59.2 + %sub102.3 = sub nsw i32 %sub59.2, %sub59.3 + %add103.3 = add nsw i32 %add94.3, %add78.3 + %sub104.3 = sub nsw i32 %add78.3, %add94.3 + %add105.3 = add nsw i32 %sub102.3, %sub86.3 + %sub106.3 = sub nsw i32 %sub86.3, %sub102.3 + %shr.i.3 = lshr i32 %add103.3, 15 + %and.i.3 = and i32 %shr.i.3, 65537 + %mul.i.3 = mul nuw i32 %and.i.3, 65535 + %add.i.3 = add i32 %mul.i.3, %add103.3 + %xor.i.3 = xor i32 %add.i.3, %mul.i.3 + %shr.i169.3 = lshr i32 %add105.3, 15 + %and.i170.3 = and i32 %shr.i169.3, 65537 + %mul.i171.3 = mul nuw i32 %and.i170.3, 65535 + %add.i172.3 = add i32 %mul.i171.3, %add105.3 + %xor.i173.3 = xor i32 %add.i172.3, %mul.i171.3 + %shr.i174.3 = lshr i32 %sub104.3, 15 + %and.i175.3 = and i32 %shr.i174.3, 65537 + %mul.i176.3 = mul nuw i32 %and.i175.3, 65535 + %add.i177.3 = add i32 %mul.i176.3, %sub104.3 + %xor.i178.3 = xor i32 %add.i177.3, %mul.i176.3 + %shr.i179.3 = lshr i32 %sub106.3, 15 + %and.i180.3 = and i32 %shr.i179.3, 65537 + %mul.i181.3 = mul nuw i32 %and.i180.3, 65535 + %add.i182.3 = add i32 %mul.i181.3, %sub106.3 + %xor.i183.3 = xor i32 %add.i182.3, %mul.i181.3 + %add108.3 = add i32 %xor.i173.3, %add113.2 + %add110.3 = add i32 %add108.3, %xor.i.3 + %add112.3 = add i32 %add110.3, %xor.i178.3 + %add113.3 = add i32 %add112.3, %xor.i183.3 + %conv118 = and i32 %add113.3, 65535 + %shr = lshr i32 %add113.3, 16 + %add119 = add nuw nsw i32 %conv118, %shr + %shr120 = lshr i32 %add119, 1 + ret i32 %shr120 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/poison-within-divisions.ll b/llvm/test/Transforms/SLPVectorizer/X86/poison-within-divisions.ll new file mode 100644 index 0000000..76ef396 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/poison-within-divisions.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt --passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @test(i1 %tobool2.not, i64 %conv21) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i1 [[TOBOOL2_NOT:%.*]], i64 [[CONV21:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[Q24_659:%.*]] = phi i32 [ [[Q24_655:%.*]], %[[IF_END35:.*]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[L15_1:%.*]] = phi i32 [ [[L15_4:%.*]], %[[IF_END35]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br i1 [[TOBOOL2_NOT]], label %[[IF_END4:.*]], label %[[Q:.*]] +; CHECK: [[IF_END4]]: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[Q24_659]], 0 +; CHECK-NEXT: br label %[[AB:.*]] +; CHECK: [[AB]]: +; CHECK-NEXT: [[Q24_658:%.*]] = phi i32 [ [[Q24_660:%.*]], %[[IF_END35]] ], [ 0, %[[IF_END4]] ] +; CHECK-NEXT: [[M_1:%.*]] = phi i1 [ false, %[[IF_END35]] ], [ [[TMP0]], %[[IF_END4]] ] +; CHECK-NEXT: [[O_2:%.*]] = phi i32 [ [[O_7:%.*]], %[[IF_END35]] ], [ 0, %[[IF_END4]] ] +; CHECK-NEXT: [[Q24_2:%.*]] = phi i32 [ [[Q24_7:%.*]], %[[IF_END35]] ], [ 0, %[[IF_END4]] ] +; CHECK-NEXT: br i1 [[M_1]], label %[[AE:.*]], label %[[AC:.*]] +; CHECK: [[Q]]: +; CHECK-NEXT: [[TOBOOL16_NOT:%.*]] = icmp ne i32 [[L15_1]], 0 +; CHECK-NEXT: [[SPEC_SELECT2:%.*]] = zext i1 [[TOBOOL16_NOT]] to i32 +; CHECK-NEXT: br label %[[AE]] +; CHECK: [[AE]]: +; CHECK-NEXT: [[Q24_655]] = phi i32 [ [[Q24_658]], %[[AB]] ], [ 0, %[[Q]] ] +; CHECK-NEXT: [[M_3:%.*]] = phi i64 [ 0, %[[AB]] ], [ 1, %[[Q]] ] +; CHECK-NEXT: [[L15_4]] = phi i32 [ poison, %[[AB]] ], [ [[SPEC_SELECT2]], %[[Q]] ] +; CHECK-NEXT: [[O_4:%.*]] = phi i32 [ [[O_2]], %[[AB]] ], [ 0, %[[Q]] ] +; CHECK-NEXT: [[Q24_4:%.*]] = phi i32 [ [[Q24_2]], %[[AB]] ], [ 0, %[[Q]] ] +; CHECK-NEXT: br i1 [[TOBOOL2_NOT]], label %[[IF_END35]], label %[[IF_THEN20:.*]] +; CHECK: [[IF_THEN20]]: +; CHECK-NEXT: [[DIV22:%.*]] = udiv i64 [[M_3]], [[CONV21]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[DIV22]] to i32 +; CHECK-NEXT: [[CONV23:%.*]] = sub i32 0, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[M_3]] to i32 +; CHECK-NEXT: [[CONV25:%.*]] = xor i32 [[TMP2]], 1 +; CHECK-NEXT: br label %[[IF_END35]] +; CHECK: [[AC]]: +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL2_NOT]], i32 [[Q24_2]], i32 [[O_2]] +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; CHECK: [[IF_END35]]: +; CHECK-NEXT: [[Q24_660]] = phi i32 [ 0, %[[AE]] ], [ [[CONV25]], %[[IF_THEN20]] ] +; CHECK-NEXT: [[O_7]] = phi i32 [ [[O_4]], %[[AE]] ], [ [[CONV23]], %[[IF_THEN20]] ] +; CHECK-NEXT: [[Q24_7]] = phi i32 [ [[Q24_4]], %[[AE]] ], [ [[CONV25]], %[[IF_THEN20]] ] +; CHECK-NEXT: br i1 [[TOBOOL2_NOT]], label %[[WHILE_BODY]], label %[[AB]] +; +entry: + br label %while.body + +while.body: + %q24.659 = phi i32 [ %q24.655, %if.end35 ], [ 0, %entry ] + %l15.1 = phi i32 [ %l15.4, %if.end35 ], [ 0, %entry ] + br i1 %tobool2.not, label %if.end4, label %q + +if.end4: + %0 = icmp eq i32 %q24.659, 0 + br label %ab + +ab: + %q24.658 = phi i32 [ %q24.660, %if.end35 ], [ 0, %if.end4 ] + %m.1 = phi i1 [ false, %if.end35 ], [ %0, %if.end4 ] + %o.2 = phi i32 [ %o.7, %if.end35 ], [ 0, %if.end4 ] + %q24.2 = phi i32 [ %q24.7, %if.end35 ], [ 0, %if.end4 ] + br i1 %m.1, label %ae, label %ac + +q: + %tobool16.not = icmp ne i32 %l15.1, 0 + %spec.select2 = zext i1 %tobool16.not to i32 + br label %ae + +ae: + %q24.655 = phi i32 [ %q24.658, %ab ], [ 0, %q ] + %m.3 = phi i64 [ 0, %ab ], [ 1, %q ] + %l15.4 = phi i32 [ poison, %ab ], [ %spec.select2, %q ] + %o.4 = phi i32 [ %o.2, %ab ], [ 0, %q ] + %q24.4 = phi i32 [ %q24.2, %ab ], [ 0, %q ] + br i1 %tobool2.not, label %if.end35, label %if.then20 + +if.then20: + %div22 = udiv i64 %m.3, %conv21 + %1 = trunc i64 %div22 to i32 + %conv23 = sub i32 0, %1 + %2 = trunc i64 %m.3 to i32 + %conv25 = xor i32 %2, 1 + br label %if.end35 + +ac: + %spec.select = select i1 %tobool2.not, i32 %q24.2, i32 %o.2 + ret i32 %spec.select + +if.end35: + %q24.660 = phi i32 [ 0, %ae ], [ %conv25, %if.then20 ] + %o.7 = phi i32 [ %o.4, %ae ], [ %conv23, %if.then20 ] + %q24.7 = phi i32 [ %q24.4, %ae ], [ %conv25, %if.then20 ] + br i1 %tobool2.not, label %while.body, label %ab +} diff --git a/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll b/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll index c858d07..ead6e02 100644 --- a/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll +++ b/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll @@ -219,4 +219,18 @@ entry: } +define <1 x i32> @test_store_value_size_not_multiple_of_allocated_element_type_size(<1 x i16> %a, <1 x i16> %b) { +entry: + %alloca = alloca [2 x i16] + + %ptr0 = getelementptr inbounds [2 x i16], ptr %alloca, i32 0, i32 0 + store <1 x i16> %a, ptr %ptr0 + + %ptr1 = getelementptr inbounds [2 x i16], ptr %alloca, i32 0, i32 1 + store <1 x i16> %b, ptr %ptr1 + + %result = load <1 x i32>, ptr %alloca + ret <1 x i32> %result +} + declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp index 56a0fa4..3daacfd 100644 --- a/llvm/tools/bugpoint/OptimizerDriver.cpp +++ b/llvm/tools/bugpoint/OptimizerDriver.cpp @@ -38,11 +38,6 @@ namespace llvm { extern cl::opt<std::string> OutputPrefix; } -static cl::opt<bool> PreserveBitcodeUseListOrder( - "preserve-bc-uselistorder", - cl::desc("Preserve use-list order when writing LLVM bitcode."), - cl::init(true), cl::Hidden); - static cl::opt<std::string> OptCmd("opt-command", cl::init(""), cl::desc("Path to opt. (default: search path " @@ -51,7 +46,7 @@ static cl::opt<std::string> /// This writes the current "Program" to the named bitcode file. If an error /// occurs, true is returned. static bool writeProgramToFileAux(ToolOutputFile &Out, const Module &M) { - WriteBitcodeToFile(M, Out.os(), PreserveBitcodeUseListOrder); + WriteBitcodeToFile(M, Out.os(), /* ShouldPreserveUseListOrder */ true); Out.os().close(); if (!Out.os().has_error()) { Out.keep(); @@ -68,7 +63,7 @@ bool BugDriver::writeProgramToFile(const std::string &Filename, int FD, bool BugDriver::writeProgramToFile(int FD, const Module &M) const { raw_fd_ostream OS(FD, /*shouldClose*/ false); - WriteBitcodeToFile(M, OS, PreserveBitcodeUseListOrder); + WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true); OS.flush(); if (!OS.has_error()) return false; @@ -155,7 +150,7 @@ bool BugDriver::runPasses(Module &Program, DiscardTemp Discard{*Temp}; raw_fd_ostream OS(Temp->FD, /*shouldClose*/ false); - WriteBitcodeToFile(Program, OS, PreserveBitcodeUseListOrder); + WriteBitcodeToFile(Program, OS, /* ShouldPreserveUseListOrder */ true); OS.flush(); if (OS.has_error()) { errs() << "Error writing bitcode file: " << Temp->TmpName << "\n"; diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp index 2164867..200e6a5 100644 --- a/llvm/tools/llvm-as/llvm-as.cpp +++ b/llvm/tools/llvm-as/llvm-as.cpp @@ -57,11 +57,6 @@ static cl::opt<bool> cl::desc("Do not run verifier on input LLVM (dangerous!)"), cl::cat(AsCat)); -static cl::opt<bool> PreserveBitcodeUseListOrder( - "preserve-bc-uselistorder", - cl::desc("Preserve use-list order when writing LLVM bitcode."), - cl::init(true), cl::Hidden, cl::cat(AsCat)); - static cl::opt<std::string> ClDataLayout("data-layout", cl::desc("data layout string to use"), cl::value_desc("layout-string"), @@ -100,7 +95,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) { // any non-null Index along with it as a per-module Index. // If both are empty, this will give an empty module block, which is // the expected behavior. - WriteBitcodeToFile(*M, Out->os(), PreserveBitcodeUseListOrder, + WriteBitcodeToFile(*M, Out->os(), /* ShouldPreserveUseListOrder */ true, IndexToWrite, EmitModuleHash); else // Otherwise, with an empty Module but non-empty Index, we write a diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index 2b43d27..35c5409 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -80,11 +80,6 @@ static cl::opt<bool> cl::desc("Add informational comments to the .ll file"), cl::cat(DisCategory)); -static cl::opt<bool> PreserveAssemblyUseListOrder( - "preserve-ll-uselistorder", - cl::desc("Preserve use-list order when writing LLVM assembly."), - cl::init(false), cl::Hidden, cl::cat(DisCategory)); - static cl::opt<bool> MaterializeMetadata("materialize-metadata", cl::desc("Load module without materializing metadata, " @@ -255,7 +250,8 @@ int main(int argc, char **argv) { if (!DontPrint) { if (M) { M->removeDebugIntrinsicDeclarations(); - M->print(Out->os(), Annotator.get(), PreserveAssemblyUseListOrder); + M->print(Out->os(), Annotator.get(), + /* ShouldPreserveUseListOrder */ false); } if (Index) Index->print(Out->os()); diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 69636ca..439a4a4 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -129,16 +129,6 @@ static cl::opt<bool> OutputAssembly("S", cl::desc("Write output as LLVM assembly"), cl::Hidden, cl::cat(ExtractCat)); -static cl::opt<bool> PreserveBitcodeUseListOrder( - "preserve-bc-uselistorder", - cl::desc("Preserve use-list order when writing LLVM bitcode."), - cl::init(true), cl::Hidden, cl::cat(ExtractCat)); - -static cl::opt<bool> PreserveAssemblyUseListOrder( - "preserve-ll-uselistorder", - cl::desc("Preserve use-list order when writing LLVM assembly."), - cl::init(false), cl::Hidden, cl::cat(ExtractCat)); - int main(int argc, char **argv) { InitLLVM X(argc, argv); @@ -421,9 +411,11 @@ int main(int argc, char **argv) { } if (OutputAssembly) - PM.addPass(PrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder)); + PM.addPass( + PrintModulePass(Out.os(), "", /* ShouldPreserveUseListOrder */ false)); else if (Force || !CheckBitcodeOutputToConsole(Out.os())) - PM.addPass(BitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder)); + PM.addPass( + BitcodeWriterPass(Out.os(), /* ShouldPreserveUseListOrder */ true)); PM.run(*M, MAM); diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp index 22ea54e..93b1fb6 100644 --- a/llvm/tools/llvm-link/llvm-link.cpp +++ b/llvm/tools/llvm-link/llvm-link.cpp @@ -110,16 +110,6 @@ static cl::opt<bool> SuppressWarnings("suppress-warnings", cl::desc("Suppress all linking warnings"), cl::init(false), cl::cat(LinkCategory)); -static cl::opt<bool> PreserveBitcodeUseListOrder( - "preserve-bc-uselistorder", - cl::desc("Preserve use-list order when writing LLVM bitcode."), - cl::init(true), cl::Hidden, cl::cat(LinkCategory)); - -static cl::opt<bool> PreserveAssemblyUseListOrder( - "preserve-ll-uselistorder", - cl::desc("Preserve use-list order when writing LLVM assembly."), - cl::init(false), cl::Hidden, cl::cat(LinkCategory)); - static cl::opt<bool> NoVerify("disable-verify", cl::desc("Do not run the verifier"), cl::Hidden, cl::cat(LinkCategory)); @@ -525,9 +515,10 @@ int main(int argc, char **argv) { errs() << "Writing bitcode...\n"; Composite->removeDebugIntrinsicDeclarations(); if (OutputAssembly) { - Composite->print(Out.os(), nullptr, PreserveAssemblyUseListOrder); + Composite->print(Out.os(), nullptr, /* ShouldPreserveUseListOrder */ false); } else if (Force || !CheckBitcodeOutputToConsole(Out.os())) { - WriteBitcodeToFile(*Composite, Out.os(), PreserveBitcodeUseListOrder); + WriteBitcodeToFile(*Composite, Out.os(), + /* ShouldPreserveUseListOrder */ true); } // Declare success. diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index d4fa6eb..2ac8de7 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -232,16 +232,6 @@ static cl::opt<std::string> ClDataLayout("data-layout", cl::value_desc("layout-string"), cl::init("")); -static cl::opt<bool> PreserveBitcodeUseListOrder( - "preserve-bc-uselistorder", - cl::desc("Preserve use-list order when writing LLVM bitcode."), - cl::init(true), cl::Hidden); - -static cl::opt<bool> PreserveAssemblyUseListOrder( - "preserve-ll-uselistorder", - cl::desc("Preserve use-list order when writing LLVM assembly."), - cl::init(false), cl::Hidden); - static cl::opt<bool> RunTwice("run-twice", cl::desc("Run all passes twice, re-using the " "same pass manager (legacy PM only)."), @@ -753,9 +743,9 @@ extern "C" int optMain( return runPassPipeline( argv[0], *M, TM.get(), &TLII, Out.get(), ThinLinkOut.get(), RemarksFile.get(), Pipeline, PluginList, PassBuilderCallbacks, - OK, VK, PreserveAssemblyUseListOrder, - PreserveBitcodeUseListOrder, EmitSummaryIndex, EmitModuleHash, - EnableDebugify, VerifyDebugInfoPreserve, + OK, VK, /* ShouldPreserveAssemblyUseListOrder */ false, + /* ShouldPreserveBitcodeUseListOrder */ true, EmitSummaryIndex, + EmitModuleHash, EnableDebugify, VerifyDebugInfoPreserve, EnableProfileVerification, UnifiedLTO) ? 0 : 1; @@ -877,9 +867,11 @@ extern "C" int optMain( OS = BOS.get(); } if (OutputAssembly) - Passes.add(createPrintModulePass(*OS, "", PreserveAssemblyUseListOrder)); + Passes.add(createPrintModulePass( + *OS, "", /* ShouldPreserveAssemblyUseListOrder */ false)); else - Passes.add(createBitcodeWriterPass(*OS, PreserveBitcodeUseListOrder)); + Passes.add(createBitcodeWriterPass( + *OS, /* ShouldPreserveBitcodeUseListOrder */ true)); } // Before executing passes, print the final values of the LLVM options. diff --git a/llvm/unittests/ExecutionEngine/Orc/SymbolStringPoolTest.cpp b/llvm/unittests/ExecutionEngine/Orc/SymbolStringPoolTest.cpp index cd1cecd..698dda1 100644 --- a/llvm/unittests/ExecutionEngine/Orc/SymbolStringPoolTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/SymbolStringPoolTest.cpp @@ -180,4 +180,14 @@ TEST_F(SymbolStringPoolTest, SymbolStringPoolEntryUnsafe) { EXPECT_EQ(getRefCount(A), 1U); } +TEST_F(SymbolStringPoolTest, Hashing) { + auto A = SP.intern("a"); + auto B = NonOwningSymbolStringPtr(A); + + hash_code AHash = hash_value(A); + hash_code BHash = hash_value(B); + + EXPECT_EQ(AHash, BHash); +} + } // namespace diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index 5bc516d..58a65b9 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/ConstantFPRange.h" +#include "llvm/ADT/APFloat.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "gtest/gtest.h" @@ -818,4 +819,110 @@ TEST_F(ConstantFPRangeTest, getWithout) { APFloat::getLargest(Sem, /*Negative=*/true), APFloat(3.0))); } +TEST_F(ConstantFPRangeTest, cast) { + const fltSemantics &F16Sem = APFloat::IEEEhalf(); + const fltSemantics &BF16Sem = APFloat::BFloat(); + const fltSemantics &F32Sem = APFloat::IEEEsingle(); + const fltSemantics &F8NanOnlySem = APFloat::Float8E4M3FN(); + // normal -> normal (exact) + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0)).cast(F32Sem), + ConstantFPRange::getNonNaN(APFloat(1.0f), APFloat(2.0f))); + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat(-2.0f), APFloat(-1.0f)).cast(Sem), + ConstantFPRange::getNonNaN(APFloat(-2.0), APFloat(-1.0))); + // normal -> normal (inexact) + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat(3.141592653589793), + APFloat(6.283185307179586)) + .cast(F32Sem), + ConstantFPRange::getNonNaN(APFloat(3.14159274f), APFloat(6.28318548f))); + // normal -> subnormal + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-5e-8), APFloat(5e-8)) + .cast(F16Sem) + .classify(), + fcSubnormal | fcZero); + // normal -> zero + EXPECT_EQ(ConstantFPRange::getNonNaN( + APFloat::getSmallestNormalized(Sem, /*Negative=*/true), + APFloat::getSmallestNormalized(Sem, /*Negative=*/false)) + .cast(F32Sem) + .classify(), + fcZero); + // normal -> inf + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-65536.0), APFloat(65536.0)) + .cast(F16Sem), + ConstantFPRange::getNonNaN(F16Sem)); + // nan -> qnan + EXPECT_EQ( + ConstantFPRange::getNaNOnly(Sem, /*MayBeQNaN=*/true, /*MayBeSNaN=*/false) + .cast(F32Sem), + ConstantFPRange::getNaNOnly(F32Sem, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/false)); + EXPECT_EQ( + ConstantFPRange::getNaNOnly(Sem, /*MayBeQNaN=*/false, /*MayBeSNaN=*/true) + .cast(F32Sem), + ConstantFPRange::getNaNOnly(F32Sem, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/false)); + EXPECT_EQ( + ConstantFPRange::getNaNOnly(Sem, /*MayBeQNaN=*/true, /*MayBeSNaN=*/true) + .cast(F32Sem), + ConstantFPRange::getNaNOnly(F32Sem, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/false)); + // For BF16 -> F32, signaling bit is still lost. + EXPECT_EQ(ConstantFPRange::getNaNOnly(BF16Sem, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/true) + .cast(F32Sem), + ConstantFPRange::getNaNOnly(F32Sem, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/false)); + // inf -> nan only (return full set for now) + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat::getInf(Sem, /*Negative=*/true), + APFloat::getInf(Sem, /*Negative=*/false)) + .cast(F8NanOnlySem), + ConstantFPRange::getFull(F8NanOnlySem)); + // other rounding modes + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat::getSmallest(Sem, /*Negative=*/true), + APFloat::getSmallest(Sem, /*Negative=*/false)) + .cast(F32Sem, APFloat::rmTowardNegative), + ConstantFPRange::getNonNaN( + APFloat::getSmallest(F32Sem, /*Negative=*/true), + APFloat::getZero(F32Sem, /*Negative=*/false))); + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat::getSmallest(Sem, /*Negative=*/true), + APFloat::getSmallest(Sem, /*Negative=*/false)) + .cast(F32Sem, APFloat::rmTowardPositive), + ConstantFPRange::getNonNaN( + APFloat::getZero(F32Sem, /*Negative=*/true), + APFloat::getSmallest(F32Sem, /*Negative=*/false))); + EXPECT_EQ( + ConstantFPRange::getNonNaN( + APFloat::getSmallestNormalized(Sem, /*Negative=*/true), + APFloat::getSmallestNormalized(Sem, /*Negative=*/false)) + .cast(F32Sem, APFloat::rmTowardZero), + ConstantFPRange::getNonNaN(APFloat::getZero(F32Sem, /*Negative=*/true), + APFloat::getZero(F32Sem, /*Negative=*/false))); + + EnumerateValuesInConstantFPRange( + ConstantFPRange::getFull(APFloat::Float8E4M3()), + [&](const APFloat &V) { + bool LosesInfo = false; + + APFloat DoubleV = V; + DoubleV.convert(Sem, APFloat::rmNearestTiesToEven, &LosesInfo); + ConstantFPRange DoubleCR = ConstantFPRange(V).cast(Sem); + EXPECT_TRUE(DoubleCR.contains(DoubleV)) + << "Casting " << V << " to double failed. " << DoubleCR + << " doesn't contain " << DoubleV; + + auto &FP4Sem = APFloat::Float4E2M1FN(); + APFloat FP4V = V; + FP4V.convert(FP4Sem, APFloat::rmNearestTiesToEven, &LosesInfo); + ConstantFPRange FP4CR = ConstantFPRange(V).cast(FP4Sem); + EXPECT_TRUE(FP4CR.contains(FP4V)) + << "Casting " << V << " to FP4E2M1FN failed. " << FP4CR + << " doesn't contain " << FP4V; + }, + /*IgnoreNaNPayload=*/true); +} + } // anonymous namespace diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp index fbeff37..8f9ed41 100644 --- a/llvm/unittests/Support/raw_ostream_test.cpp +++ b/llvm/unittests/Support/raw_ostream_test.cpp @@ -626,6 +626,11 @@ TEST(raw_ostreamTest, writeToDevNull) { EXPECT_TRUE(DevNullIsUsed); } +TEST(raw_ostreamTest, nullStreamZeroBufferSize) { + raw_ostream &NullStream = nulls(); + EXPECT_EQ(NullStream.GetBufferSize(), 0u); +} + TEST(raw_ostreamTest, writeToStdOut) { outs().flush(); testing::internal::CaptureStdout(); diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index f101624..bdcb8a3 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -11,7 +11,6 @@ CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll CodeGen/AArch64/selectopt-cast.ll CodeGen/AArch64/selectopt.ll CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll -CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll @@ -74,7 +73,6 @@ CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll CodeGen/Hexagon/loop-idiom/memmove-rt-check.ll CodeGen/NVPTX/lower-ctor-dtor.ll CodeGen/RISCV/zmmul.ll -CodeGen/SPIRV/hlsl-resources/UniqueImplicitBindingNumber.ll CodeGen/WebAssembly/memory-interleave.ll CodeGen/X86/masked_gather_scatter.ll CodeGen/X86/nocfivalue.ll @@ -85,7 +83,6 @@ DebugInfo/KeyInstructions/Generic/loop-unswitch.ll DebugInfo/X86/asan_debug_info.ll Instrumentation/AddressSanitizer/aarch64be.ll Instrumentation/AddressSanitizer/adaptive_global_redzones.ll -Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll Instrumentation/AddressSanitizer/AMDGPU/adaptive_constant_global_redzones.ll Instrumentation/AddressSanitizer/AMDGPU/adaptive_global_redzones.ll Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll @@ -549,12 +546,6 @@ tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test tools/UpdateTestChecks/update_test_checks/stable_ir_values.test tools/UpdateTestChecks/update_test_checks/tbaa-semantics-checks.test tools/UpdateTestChecks/update_test_checks/various_ir_values_dbgrecords.test -Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll -Transforms/AggressiveInstCombine/lower-table-based-cttz-dereferencing-pointer.ll -Transforms/AggressiveInstCombine/lower-table-based-cttz-non-argument-value.ll -Transforms/AggressiveInstCombine/lower-table-based-cttz-zero-element.ll -Transforms/AggressiveInstCombine/trunc_select_cmp.ll -Transforms/AggressiveInstCombine/trunc_select.ll Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll Transforms/AtomicExpand/AArch64/pcsections.ll @@ -819,7 +810,6 @@ Transforms/InstCombine/AMDGPU/addrspacecast.ll Transforms/InstCombine/and2.ll Transforms/InstCombine/and-fcmp.ll Transforms/InstCombine/and.ll -Transforms/InstCombine/and-or-icmp-nullptr.ll Transforms/InstCombine/and-or-icmps.ll Transforms/InstCombine/and-or-implied-cond-not.ll Transforms/InstCombine/apint-div1.ll @@ -1260,7 +1250,6 @@ Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll Transforms/PhaseOrdering/AArch64/predicated-reduction.ll Transforms/PhaseOrdering/AArch64/quant_4x4.ll Transforms/PhaseOrdering/ARM/arm_mean_q7.ll -Transforms/PhaseOrdering/lower-table-based-cttz.ll Transforms/PhaseOrdering/vector-select.ll Transforms/PhaseOrdering/X86/blendv-select.ll Transforms/PhaseOrdering/X86/merge-functions2.ll diff --git a/mlir/docs/Bindings/Python.md b/mlir/docs/Bindings/Python.md index 98ac635..893c6d4 100644 --- a/mlir/docs/Bindings/Python.md +++ b/mlir/docs/Bindings/Python.md @@ -1188,6 +1188,26 @@ which can be `import`ed from the main dialect file, i.e. `python/mlir/dialects/<dialect-namespace>/passes.py` if it is undesirable to make the passes available along with the dialect. +Passes can be defined as Python callables via the `PassManager.add` API. +In such case, the callable is wrapped as an `mlir::Pass` internally and +executed as part of the pass pipeline when `PassManager.run` is invoked. +In the callable, the `op` parameter represents the current operation being transformed, +while the `pass_` parameter provides access to the current `Pass` object, +allowing actions such as `signalPassFailure()`. +The lifetime of the callable is extended at least until the `PassManager` is destroyed. +The following example code demonstrates how to define Python passes. + +```python +def demo_pass(op, pass_): + # do something with the given op + pass + +pm = PassManager('any') +pm.add(demo_pass) +pm.add('some-cpp-defined-passes') +... +pm.run(some_op) +``` ### Other functionality diff --git a/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp index 39ae6a0..a9592bc 100644 --- a/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp @@ -264,8 +264,7 @@ private: // The attribute is a vector with a floating point value per element // (number) in the array, see `collectData()` below for more details. std::vector<double> data; - data.reserve(std::accumulate(lit.getDims().begin(), lit.getDims().end(), 1, - std::multiplies<int>())); + data.reserve(llvm::product_of(lit.getDims())); collectData(lit, data); // The type of this attribute is tensor of 64-bit floating-point with the diff --git a/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp index 0573af6..8c21951 100644 --- a/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp @@ -264,8 +264,7 @@ private: // The attribute is a vector with a floating point value per element // (number) in the array, see `collectData()` below for more details. std::vector<double> data; - data.reserve(std::accumulate(lit.getDims().begin(), lit.getDims().end(), 1, - std::multiplies<int>())); + data.reserve(llvm::product_of(lit.getDims())); collectData(lit, data); // The type of this attribute is tensor of 64-bit floating-point with the diff --git a/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp index 7d676f1..6b7ab40 100644 --- a/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp @@ -268,8 +268,7 @@ private: // The attribute is a vector with a floating point value per element // (number) in the array, see `collectData()` below for more details. std::vector<double> data; - data.reserve(std::accumulate(lit.getDims().begin(), lit.getDims().end(), 1, - std::multiplies<int>())); + data.reserve(llvm::product_of(lit.getDims())); collectData(lit, data); // The type of this attribute is tensor of 64-bit floating-point with the diff --git a/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp index 7d676f1..6b7ab40 100644 --- a/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp @@ -268,8 +268,7 @@ private: // The attribute is a vector with a floating point value per element // (number) in the array, see `collectData()` below for more details. std::vector<double> data; - data.reserve(std::accumulate(lit.getDims().begin(), lit.getDims().end(), 1, - std::multiplies<int>())); + data.reserve(llvm::product_of(lit.getDims())); collectData(lit, data); // The type of this attribute is tensor of 64-bit floating-point with the diff --git a/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp index 7d676f1..6b7ab40 100644 --- a/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp @@ -268,8 +268,7 @@ private: // The attribute is a vector with a floating point value per element // (number) in the array, see `collectData()` below for more details. std::vector<double> data; - data.reserve(std::accumulate(lit.getDims().begin(), lit.getDims().end(), 1, - std::multiplies<int>())); + data.reserve(llvm::product_of(lit.getDims())); collectData(lit, data); // The type of this attribute is tensor of 64-bit floating-point with the diff --git a/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp index 75dbc91..7313324 100644 --- a/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp @@ -405,8 +405,7 @@ private: // The attribute is a vector with a floating point value per element // (number) in the array, see `collectData()` below for more details. std::vector<double> data; - data.reserve(std::accumulate(lit.getDims().begin(), lit.getDims().end(), 1, - std::multiplies<int>())); + data.reserve(llvm::product_of(lit.getDims())); collectData(lit, data); // The type of this attribute is tensor of 64-bit floating-point with the diff --git a/mlir/include/mlir-c/Rewrite.h b/mlir/include/mlir-c/Rewrite.h index 5dd285e..2db1d84 100644 --- a/mlir/include/mlir-c/Rewrite.h +++ b/mlir/include/mlir-c/Rewrite.h @@ -38,6 +38,7 @@ DEFINE_C_API_STRUCT(MlirFrozenRewritePatternSet, void); DEFINE_C_API_STRUCT(MlirGreedyRewriteDriverConfig, void); DEFINE_C_API_STRUCT(MlirRewritePatternSet, void); DEFINE_C_API_STRUCT(MlirPatternRewriter, void); +DEFINE_C_API_STRUCT(MlirRewritePattern, const void); //===----------------------------------------------------------------------===// /// RewriterBase API inherited from OpBuilder @@ -302,11 +303,15 @@ MLIR_CAPI_EXPORTED void mlirIRRewriterDestroy(MlirRewriterBase rewriter); /// FrozenRewritePatternSet API //===----------------------------------------------------------------------===// +/// Freeze the given MlirRewritePatternSet to a MlirFrozenRewritePatternSet. +/// Note that the ownership of the input set is transferred into the frozen set +/// after this call. MLIR_CAPI_EXPORTED MlirFrozenRewritePatternSet -mlirFreezeRewritePattern(MlirRewritePatternSet op); +mlirFreezeRewritePattern(MlirRewritePatternSet set); +/// Destroy the given MlirFrozenRewritePatternSet. MLIR_CAPI_EXPORTED void -mlirFrozenRewritePatternSetDestroy(MlirFrozenRewritePatternSet op); +mlirFrozenRewritePatternSetDestroy(MlirFrozenRewritePatternSet set); MLIR_CAPI_EXPORTED MlirLogicalResult mlirApplyPatternsAndFoldGreedilyWithOp( MlirOperation op, MlirFrozenRewritePatternSet patterns, @@ -325,6 +330,51 @@ MLIR_CAPI_EXPORTED MlirRewriterBase mlirPatternRewriterAsBase(MlirPatternRewriter rewriter); //===----------------------------------------------------------------------===// +/// RewritePattern API +//===----------------------------------------------------------------------===// + +/// Callbacks to construct a rewrite pattern. +typedef struct { + /// Optional constructor for the user data. + /// Set to nullptr to disable it. + void (*construct)(void *userData); + /// Optional destructor for the user data. + /// Set to nullptr to disable it. + void (*destruct)(void *userData); + /// The callback function to match against code rooted at the specified + /// operation, and perform the rewrite if the match is successful, + /// corresponding to RewritePattern::matchAndRewrite. + MlirLogicalResult (*matchAndRewrite)(MlirRewritePattern pattern, + MlirOperation op, + MlirPatternRewriter rewriter, + void *userData); +} MlirRewritePatternCallbacks; + +/// Create a rewrite pattern that matches the operation +/// with the given rootName, corresponding to mlir::OpRewritePattern. +MLIR_CAPI_EXPORTED MlirRewritePattern mlirOpRewritePattenCreate( + MlirStringRef rootName, unsigned benefit, MlirContext context, + MlirRewritePatternCallbacks callbacks, void *userData, + size_t nGeneratedNames, MlirStringRef *generatedNames); + +//===----------------------------------------------------------------------===// +/// RewritePatternSet API +//===----------------------------------------------------------------------===// + +/// Create an empty MlirRewritePatternSet. +MLIR_CAPI_EXPORTED MlirRewritePatternSet +mlirRewritePatternSetCreate(MlirContext context); + +/// Destruct the given MlirRewritePatternSet. +MLIR_CAPI_EXPORTED void mlirRewritePatternSetDestroy(MlirRewritePatternSet set); + +/// Add the given MlirRewritePattern into a MlirRewritePatternSet. +/// Note that the ownership of the pattern is transferred to the set after this +/// call. +MLIR_CAPI_EXPORTED void mlirRewritePatternSetAdd(MlirRewritePatternSet set, + MlirRewritePattern pattern); + +//===----------------------------------------------------------------------===// /// PDLPatternModule API //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/CAPI/Rewrite.h b/mlir/include/mlir/CAPI/Rewrite.h index 1038c0a..9c96d35 100644 --- a/mlir/include/mlir/CAPI/Rewrite.h +++ b/mlir/include/mlir/CAPI/Rewrite.h @@ -18,7 +18,19 @@ #include "mlir-c/Rewrite.h" #include "mlir/CAPI/Wrap.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/Rewrite/FrozenRewritePatternSet.h" DEFINE_C_API_PTR_METHODS(MlirRewriterBase, mlir::RewriterBase) +DEFINE_C_API_PTR_METHODS(MlirRewritePattern, const mlir::RewritePattern) +DEFINE_C_API_PTR_METHODS(MlirRewritePatternSet, mlir::RewritePatternSet) +DEFINE_C_API_PTR_METHODS(MlirFrozenRewritePatternSet, + mlir::FrozenRewritePatternSet) +DEFINE_C_API_PTR_METHODS(MlirPatternRewriter, mlir::PatternRewriter) + +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH +DEFINE_C_API_PTR_METHODS(MlirPDLPatternModule, mlir::PDLPatternModule) +DEFINE_C_API_PTR_METHODS(MlirPDLResultList, mlir::PDLResultList) +DEFINE_C_API_PTR_METHODS(MlirPDLValue, const mlir::PDLValue) +#endif // MLIR_ENABLE_PDL_IN_PATTERNMATCH #endif // MLIR_CAPIREWRITER_H diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index e2a0331..89fbeb7 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -3233,35 +3233,15 @@ def NVVM_CpAsyncBulkGlobalToSharedClusterOp : attr-dict `:` type($dstMem) `,` type($srcMem) }]; + let extraClassDeclaration = [{ + static mlir::NVVM::IDArgPair + getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase& builder); + }]; string llvmBuilder = [{ - // Arguments to the intrinsic: - // dst, mbar, src, size - // multicast_mask, cache_hint, - // flag for multicast_mask, - // flag for cache_hint - llvm::SmallVector<llvm::Value *> translatedOperands; - translatedOperands.push_back($dstMem); - translatedOperands.push_back($mbar); - translatedOperands.push_back($srcMem); - translatedOperands.push_back($size); - - // Multicast, if available - llvm::LLVMContext &ctx = moduleTranslation.getLLVMContext(); - auto *i16Unused = llvm::ConstantInt::get(llvm::Type::getInt16Ty(ctx), 0); - bool isMulticast = op.getMulticastMask() ? true : false; - translatedOperands.push_back(isMulticast ? $multicastMask : i16Unused); - - // Cachehint, if available - auto *i64Unused = llvm::ConstantInt::get(llvm::Type::getInt64Ty(ctx), 0); - bool isCacheHint = op.getL2CacheHint() ? true : false; - translatedOperands.push_back(isCacheHint ? $l2CacheHint : i64Unused); - - // Flag arguments for multicast and cachehint - translatedOperands.push_back(builder.getInt1(isMulticast)); - translatedOperands.push_back(builder.getInt1(isCacheHint)); - - createIntrinsicCall(builder, - llvm::Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster, translatedOperands); + auto [id, args] = NVVM::CpAsyncBulkGlobalToSharedClusterOp::getIntrinsicIDAndArgs( + *op, moduleTranslation, builder); + createIntrinsicCall(builder, id, args); }]; } diff --git a/mlir/lib/Bindings/Python/Rewrite.cpp b/mlir/lib/Bindings/Python/Rewrite.cpp index 9e3d970..47685567 100644 --- a/mlir/lib/Bindings/Python/Rewrite.cpp +++ b/mlir/lib/Bindings/Python/Rewrite.cpp @@ -45,6 +45,16 @@ public: return PyInsertionPoint(PyOperation::forOperation(ctx, op)); } + void replaceOp(MlirOperation op, MlirOperation newOp) { + mlirRewriterBaseReplaceOpWithOperation(base, op, newOp); + } + + void replaceOp(MlirOperation op, const std::vector<MlirValue> &values) { + mlirRewriterBaseReplaceOpWithValues(base, op, values.size(), values.data()); + } + + void eraseOp(MlirOperation op) { mlirRewriterBaseEraseOp(base, op); } + private: MlirRewriterBase base; PyMlirContextRef ctx; @@ -165,13 +175,115 @@ private: MlirFrozenRewritePatternSet set; }; +class PyRewritePatternSet { +public: + PyRewritePatternSet(MlirContext ctx) + : set(mlirRewritePatternSetCreate(ctx)), ctx(ctx) {} + ~PyRewritePatternSet() { + if (set.ptr) + mlirRewritePatternSetDestroy(set); + } + + void add(MlirStringRef rootName, unsigned benefit, + const nb::callable &matchAndRewrite) { + MlirRewritePatternCallbacks callbacks; + callbacks.construct = [](void *userData) { + nb::handle(static_cast<PyObject *>(userData)).inc_ref(); + }; + callbacks.destruct = [](void *userData) { + nb::handle(static_cast<PyObject *>(userData)).dec_ref(); + }; + callbacks.matchAndRewrite = [](MlirRewritePattern, MlirOperation op, + MlirPatternRewriter rewriter, + void *userData) -> MlirLogicalResult { + nb::handle f(static_cast<PyObject *>(userData)); + nb::object res = f(op, PyPatternRewriter(rewriter)); + return logicalResultFromObject(res); + }; + MlirRewritePattern pattern = mlirOpRewritePattenCreate( + rootName, benefit, ctx, callbacks, matchAndRewrite.ptr(), + /* nGeneratedNames */ 0, + /* generatedNames */ nullptr); + mlirRewritePatternSetAdd(set, pattern); + } + + PyFrozenRewritePatternSet freeze() { + MlirRewritePatternSet s = set; + set.ptr = nullptr; + return mlirFreezeRewritePattern(s); + } + +private: + MlirRewritePatternSet set; + MlirContext ctx; +}; + } // namespace /// Create the `mlir.rewrite` here. void mlir::python::populateRewriteSubmodule(nb::module_ &m) { - nb::class_<PyPatternRewriter>(m, "PatternRewriter") - .def_prop_ro("ip", &PyPatternRewriter::getInsertionPoint, - "The current insertion point of the PatternRewriter."); + //---------------------------------------------------------------------------- + // Mapping of the PatternRewriter + //---------------------------------------------------------------------------- + nb:: + class_<PyPatternRewriter>(m, "PatternRewriter") + .def_prop_ro("ip", &PyPatternRewriter::getInsertionPoint, + "The current insertion point of the PatternRewriter.") + .def( + "replace_op", + [](PyPatternRewriter &self, MlirOperation op, + MlirOperation newOp) { self.replaceOp(op, newOp); }, + "Replace an operation with a new operation.", nb::arg("op"), + nb::arg("new_op"), + // clang-format off + nb::sig("def replace_op(self, op: " MAKE_MLIR_PYTHON_QUALNAME("ir.Operation") ", new_op: " MAKE_MLIR_PYTHON_QUALNAME("ir.Operation") ") -> None") + // clang-format on + ) + .def( + "replace_op", + [](PyPatternRewriter &self, MlirOperation op, + const std::vector<MlirValue> &values) { + self.replaceOp(op, values); + }, + "Replace an operation with a list of values.", nb::arg("op"), + nb::arg("values"), + // clang-format off + nb::sig("def replace_op(self, op: " MAKE_MLIR_PYTHON_QUALNAME("ir.Operation") ", values: list[" MAKE_MLIR_PYTHON_QUALNAME("ir.Value") "]) -> None") + // clang-format on + ) + .def("erase_op", &PyPatternRewriter::eraseOp, "Erase an operation.", + nb::arg("op"), + // clang-format off + nb::sig("def erase_op(self, op: " MAKE_MLIR_PYTHON_QUALNAME("ir.Operation") ") -> None") + // clang-format on + ); + + //---------------------------------------------------------------------------- + // Mapping of the RewritePatternSet + //---------------------------------------------------------------------------- + nb::class_<PyRewritePatternSet>(m, "RewritePatternSet") + .def( + "__init__", + [](PyRewritePatternSet &self, DefaultingPyMlirContext context) { + new (&self) PyRewritePatternSet(context.get()->get()); + }, + "context"_a = nb::none()) + .def( + "add", + [](PyRewritePatternSet &self, nb::handle root, const nb::callable &fn, + unsigned benefit) { + std::string opName = + nb::cast<std::string>(root.attr("OPERATION_NAME")); + self.add(mlirStringRefCreate(opName.data(), opName.size()), benefit, + fn); + }, + "root"_a, "fn"_a, "benefit"_a = 1, + "Add a new rewrite pattern on the given root operation with the " + "callable as the matching and rewriting function and the given " + "benefit.") + .def("freeze", &PyRewritePatternSet::freeze, + "Freeze the pattern set into a frozen one."); + //---------------------------------------------------------------------------- // Mapping of the PDLResultList and PDLModule //---------------------------------------------------------------------------- @@ -237,7 +349,7 @@ void mlir::python::populateRewriteSubmodule(nb::module_ &m) { .def( "freeze", [](PyPDLPatternModule &self) { - return new PyFrozenRewritePatternSet(mlirFreezeRewritePattern( + return PyFrozenRewritePatternSet(mlirFreezeRewritePattern( mlirRewritePatternSetFromPDLPatternModule(self.get()))); }, nb::keep_alive<0, 1>()) diff --git a/mlir/lib/CAPI/Transforms/Rewrite.cpp b/mlir/lib/CAPI/Transforms/Rewrite.cpp index c15a73b..46c329d 100644 --- a/mlir/lib/CAPI/Transforms/Rewrite.cpp +++ b/mlir/lib/CAPI/Transforms/Rewrite.cpp @@ -270,35 +270,16 @@ void mlirIRRewriterDestroy(MlirRewriterBase rewriter) { /// RewritePatternSet and FrozenRewritePatternSet API //===----------------------------------------------------------------------===// -static inline mlir::RewritePatternSet &unwrap(MlirRewritePatternSet module) { - assert(module.ptr && "unexpected null module"); - return *(static_cast<mlir::RewritePatternSet *>(module.ptr)); -} - -static inline MlirRewritePatternSet wrap(mlir::RewritePatternSet *module) { - return {module}; -} - -static inline mlir::FrozenRewritePatternSet * -unwrap(MlirFrozenRewritePatternSet module) { - assert(module.ptr && "unexpected null module"); - return static_cast<mlir::FrozenRewritePatternSet *>(module.ptr); -} - -static inline MlirFrozenRewritePatternSet -wrap(mlir::FrozenRewritePatternSet *module) { - return {module}; -} - -MlirFrozenRewritePatternSet mlirFreezeRewritePattern(MlirRewritePatternSet op) { - auto *m = new mlir::FrozenRewritePatternSet(std::move(unwrap(op))); - op.ptr = nullptr; +MlirFrozenRewritePatternSet +mlirFreezeRewritePattern(MlirRewritePatternSet set) { + auto *m = new mlir::FrozenRewritePatternSet(std::move(*unwrap(set))); + set.ptr = nullptr; return wrap(m); } -void mlirFrozenRewritePatternSetDestroy(MlirFrozenRewritePatternSet op) { - delete unwrap(op); - op.ptr = nullptr; +void mlirFrozenRewritePatternSetDestroy(MlirFrozenRewritePatternSet set) { + delete unwrap(set); + set.ptr = nullptr; } MlirLogicalResult @@ -319,33 +300,86 @@ mlirApplyPatternsAndFoldGreedilyWithOp(MlirOperation op, /// PatternRewriter API //===----------------------------------------------------------------------===// -inline mlir::PatternRewriter *unwrap(MlirPatternRewriter rewriter) { - assert(rewriter.ptr && "unexpected null rewriter"); - return static_cast<mlir::PatternRewriter *>(rewriter.ptr); +MlirRewriterBase mlirPatternRewriterAsBase(MlirPatternRewriter rewriter) { + return wrap(static_cast<mlir::RewriterBase *>(unwrap(rewriter))); } -inline MlirPatternRewriter wrap(mlir::PatternRewriter *rewriter) { - return {rewriter}; -} +//===----------------------------------------------------------------------===// +/// RewritePattern API +//===----------------------------------------------------------------------===// -MlirRewriterBase mlirPatternRewriterAsBase(MlirPatternRewriter rewriter) { - return wrap(static_cast<mlir::RewriterBase *>(unwrap(rewriter))); +namespace mlir { + +class ExternalRewritePattern : public mlir::RewritePattern { +public: + ExternalRewritePattern(MlirRewritePatternCallbacks callbacks, void *userData, + StringRef rootName, PatternBenefit benefit, + MLIRContext *context, + ArrayRef<StringRef> generatedNames) + : RewritePattern(rootName, benefit, context, generatedNames), + callbacks(callbacks), userData(userData) { + if (callbacks.construct) + callbacks.construct(userData); + } + + ~ExternalRewritePattern() { + if (callbacks.destruct) + callbacks.destruct(userData); + } + + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + return unwrap(callbacks.matchAndRewrite( + wrap(static_cast<const mlir::RewritePattern *>(this)), wrap(op), + wrap(&rewriter), userData)); + } + +private: + MlirRewritePatternCallbacks callbacks; + void *userData; +}; + +} // namespace mlir + +MlirRewritePattern mlirOpRewritePattenCreate( + MlirStringRef rootName, unsigned benefit, MlirContext context, + MlirRewritePatternCallbacks callbacks, void *userData, + size_t nGeneratedNames, MlirStringRef *generatedNames) { + std::vector<mlir::StringRef> generatedNamesVec; + generatedNamesVec.reserve(nGeneratedNames); + for (size_t i = 0; i < nGeneratedNames; ++i) { + generatedNamesVec.push_back(unwrap(generatedNames[i])); + } + return wrap(new mlir::ExternalRewritePattern( + callbacks, userData, unwrap(rootName), PatternBenefit(benefit), + unwrap(context), generatedNamesVec)); } //===----------------------------------------------------------------------===// -/// PDLPatternModule API +/// RewritePatternSet API //===----------------------------------------------------------------------===// -#if MLIR_ENABLE_PDL_IN_PATTERNMATCH -static inline mlir::PDLPatternModule *unwrap(MlirPDLPatternModule module) { - assert(module.ptr && "unexpected null module"); - return static_cast<mlir::PDLPatternModule *>(module.ptr); +MlirRewritePatternSet mlirRewritePatternSetCreate(MlirContext context) { + return wrap(new mlir::RewritePatternSet(unwrap(context))); +} + +void mlirRewritePatternSetDestroy(MlirRewritePatternSet set) { + delete unwrap(set); } -static inline MlirPDLPatternModule wrap(mlir::PDLPatternModule *module) { - return {module}; +void mlirRewritePatternSetAdd(MlirRewritePatternSet set, + MlirRewritePattern pattern) { + std::unique_ptr<mlir::RewritePattern> patternPtr( + const_cast<mlir::RewritePattern *>(unwrap(pattern))); + pattern.ptr = nullptr; + unwrap(set)->add(std::move(patternPtr)); } +//===----------------------------------------------------------------------===// +/// PDLPatternModule API +//===----------------------------------------------------------------------===// + +#if MLIR_ENABLE_PDL_IN_PATTERNMATCH MlirPDLPatternModule mlirPDLPatternModuleFromModule(MlirModule op) { return wrap(new mlir::PDLPatternModule( mlir::OwningOpRef<mlir::ModuleOp>(unwrap(op)))); @@ -363,22 +397,6 @@ mlirRewritePatternSetFromPDLPatternModule(MlirPDLPatternModule op) { return wrap(m); } -inline const mlir::PDLValue *unwrap(MlirPDLValue value) { - assert(value.ptr && "unexpected null PDL value"); - return static_cast<const mlir::PDLValue *>(value.ptr); -} - -inline MlirPDLValue wrap(const mlir::PDLValue *value) { return {value}; } - -inline mlir::PDLResultList *unwrap(MlirPDLResultList results) { - assert(results.ptr && "unexpected null PDL results"); - return static_cast<mlir::PDLResultList *>(results.ptr); -} - -inline MlirPDLResultList wrap(mlir::PDLResultList *results) { - return {results}; -} - MlirValue mlirPDLValueAsValue(MlirPDLValue value) { return wrap(unwrap(value)->dyn_cast<mlir::Value>()); } diff --git a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp index 2b7bdc9..11f866c 100644 --- a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp +++ b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp @@ -22,6 +22,7 @@ #include "mlir/IR/TypeRange.h" #include "mlir/IR/Value.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/STLExtras.h" #include <cstdint> #include <numeric> @@ -110,9 +111,7 @@ static Value calculateMemrefTotalSizeBytes(Location loc, MemRefType memrefType, {TypeAttr::get(memrefType.getElementType())})); IndexType indexType = builder.getIndexType(); - int64_t numElements = std::accumulate(memrefType.getShape().begin(), - memrefType.getShape().end(), int64_t{1}, - std::multiplies<int64_t>()); + int64_t numElements = llvm::product_of(memrefType.getShape()); emitc::ConstantOp numElementsValue = emitc::ConstantOp::create( builder, loc, indexType, builder.getIndexAttr(numElements)); diff --git a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp index 802691c..9bf9ca3 100644 --- a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp +++ b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp @@ -18,6 +18,7 @@ #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/STLExtras.h" #include <numeric> @@ -70,8 +71,7 @@ TensorType inferReshapeExpandedType(TensorType inputType, // Calculate the product of all elements in 'newShape' except for the -1 // placeholder, which we discard by negating the result. - int64_t totalSizeNoPlaceholder = -std::accumulate( - newShape.begin(), newShape.end(), 1, std::multiplies<int64_t>()); + int64_t totalSizeNoPlaceholder = -llvm::product_of(newShape); // If there is a 0 component in 'newShape', resolve the placeholder as // 0. diff --git a/mlir/lib/Conversion/VectorToAMX/VectorToAMX.cpp b/mlir/lib/Conversion/VectorToAMX/VectorToAMX.cpp index 79c2f23..245a3ef 100644 --- a/mlir/lib/Conversion/VectorToAMX/VectorToAMX.cpp +++ b/mlir/lib/Conversion/VectorToAMX/VectorToAMX.cpp @@ -20,6 +20,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/DebugLog.h" #include <numeric> @@ -265,8 +266,7 @@ loadStoreFromTransfer(PatternRewriter &rewriter, if (isPacked) src = collapseLastDim(rewriter, src); int64_t rows = vecShape[0]; - int64_t cols = std::accumulate(vecShape.begin() + 1, vecShape.end(), 1, - std::multiplies<int64_t>()); + int64_t cols = llvm::product_of(vecShape.drop_front()); auto tileType = amx::TileType::get({rows, cols}, vecTy.getElementType()); Value zeroIndex = rewriter.createOrFold<arith::ConstantIndexOp>(loc, 0); @@ -336,8 +336,7 @@ static TypedValue<amx::TileType> loadTile(PatternRewriter &rewriter, ArrayRef<int64_t> shape = vecTy.getShape(); int64_t rows = shape[0]; - int64_t cols = std::accumulate(shape.begin() + 1, shape.end(), 1, - std::multiplies<int64_t>()); + int64_t cols = llvm::product_of(shape.drop_front()); auto tileType = amx::TileType::get({rows, cols}, vecTy.getElementType()); return amx::TileLoadOp::create(rewriter, loc, tileType, buf, diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index c45c45e..c9eba69 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -26,6 +26,7 @@ #include "mlir/IR/Builders.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" namespace mlir { #define GEN_PASS_DEF_CONVERTVECTORTOSCF @@ -760,8 +761,7 @@ struct DecomposePrintOpConversion : public VectorToSCFPattern<vector::PrintOp> { if (vectorType.getRank() != 1) { // Flatten n-D vectors to 1D. This is done to allow indexing with a // non-constant value. - auto flatLength = std::accumulate(shape.begin(), shape.end(), 1, - std::multiplies<int64_t>()); + int64_t flatLength = llvm::product_of(shape); auto flatVectorType = VectorType::get({flatLength}, vectorType.getElementType()); value = vector::ShapeCastOp::create(rewriter, loc, flatVectorType, value); diff --git a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp index 9ead1d8..71687b1 100644 --- a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp +++ b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp @@ -23,6 +23,7 @@ #include "mlir/Dialect/XeGPU/IR/XeGPU.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/FormatVariadic.h" #include "mlir/IR/BuiltinTypes.h" @@ -774,9 +775,7 @@ struct ConvertXeGPUToXeVMPass if (rank < 1 || type.getNumElements() == 1) return elemType; // Otherwise, convert the vector to a flat vector type. - int64_t sum = - std::accumulate(type.getShape().begin(), type.getShape().end(), - int64_t{1}, std::multiplies<int64_t>()); + int64_t sum = llvm::product_of(type.getShape()); return VectorType::get(sum, elemType); }); typeConverter.addConversion([&](xegpu::TensorDescType type) -> Type { diff --git a/mlir/lib/Dialect/Arith/Utils/Utils.cpp b/mlir/lib/Dialect/Arith/Utils/Utils.cpp index b1fc9aa..f54baff 100644 --- a/mlir/lib/Dialect/Arith/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Arith/Utils/Utils.cpp @@ -351,9 +351,9 @@ Value createProduct(OpBuilder &builder, Location loc, ArrayRef<Value> values, Value one = ConstantOp::create(builder, loc, resultType, builder.getOneAttr(resultType)); ArithBuilder arithBuilder(builder, loc); - return std::accumulate( - values.begin(), values.end(), one, - [&arithBuilder](Value acc, Value v) { return arithBuilder.mul(acc, v); }); + return llvm::accumulate(values, one, [&arithBuilder](Value acc, Value v) { + return arithBuilder.mul(acc, v); + }); } /// Map strings to float types. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp index a50ddbe..624519f 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp @@ -55,16 +55,6 @@ static func::ReturnOp getAssumedUniqueReturnOp(func::FuncOp funcOp) { return returnOp; } -/// Return the func::FuncOp called by `callOp`. -static func::FuncOp getCalledFunction(CallOpInterface callOp) { - SymbolRefAttr sym = - llvm::dyn_cast_if_present<SymbolRefAttr>(callOp.getCallableForCallee()); - if (!sym) - return nullptr; - return dyn_cast_or_null<func::FuncOp>( - SymbolTable::lookupNearestSymbolFrom(callOp, sym)); -} - LogicalResult mlir::bufferization::dropEquivalentBufferResults(ModuleOp module) { IRRewriter rewriter(module.getContext()); @@ -72,7 +62,8 @@ mlir::bufferization::dropEquivalentBufferResults(ModuleOp module) { DenseMap<func::FuncOp, DenseSet<func::CallOp>> callerMap; // Collect the mapping of functions to their call sites. module.walk([&](func::CallOp callOp) { - if (func::FuncOp calledFunc = getCalledFunction(callOp)) { + if (func::FuncOp calledFunc = + dyn_cast_or_null<func::FuncOp>(callOp.resolveCallable())) { callerMap[calledFunc].insert(callOp); } }); diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 19eba6b..b5f8dda 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2460,8 +2460,7 @@ static LogicalResult verifyDistributedType(Type expanded, Type distributed, << dDim << ")"; scales[i] = eDim / dDim; } - if (std::accumulate(scales.begin(), scales.end(), 1, - std::multiplies<int64_t>()) != warpSize) + if (llvm::product_of(scales) != warpSize) return op->emitOpError() << "incompatible distribution dimensions from " << expandedVecType << " to " << distributedVecType << " with warp size = " << warpSize; diff --git a/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp b/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp index 88f531f..572b746 100644 --- a/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp +++ b/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/IR/Value.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include <numeric> @@ -118,8 +119,7 @@ bool WarpDistributionPattern::delinearizeLaneId( return false; sizes.push_back(large / small); } - if (std::accumulate(sizes.begin(), sizes.end(), 1, - std::multiplies<int64_t>()) != warpSize) + if (llvm::product_of(sizes) != warpSize) return false; AffineExpr s0, s1; diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 7f419a0..5edcc40b 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -1593,6 +1593,39 @@ mlir::NVVM::IDArgPair CpAsyncBulkPrefetchOp::getIntrinsicIDAndArgs( return {id, std::move(args)}; } +mlir::NVVM::IDArgPair CpAsyncBulkGlobalToSharedClusterOp::getIntrinsicIDAndArgs( + Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { + auto thisOp = cast<NVVM::CpAsyncBulkGlobalToSharedClusterOp>(op); + llvm::SmallVector<llvm::Value *> args; + + // Fill the Intrinsic Args: dst, mbar, src, size. + args.push_back(mt.lookupValue(thisOp.getDstMem())); + args.push_back(mt.lookupValue(thisOp.getMbar())); + args.push_back(mt.lookupValue(thisOp.getSrcMem())); + args.push_back(mt.lookupValue(thisOp.getSize())); + + // Multicast mask, if available. + mlir::Value multicastMask = thisOp.getMulticastMask(); + const bool hasMulticastMask = static_cast<bool>(multicastMask); + llvm::Value *i16Unused = llvm::ConstantInt::get(builder.getInt16Ty(), 0); + args.push_back(hasMulticastMask ? mt.lookupValue(multicastMask) : i16Unused); + + // Cache hint, if available. + mlir::Value cacheHint = thisOp.getL2CacheHint(); + const bool hasCacheHint = static_cast<bool>(cacheHint); + llvm::Value *i64Unused = llvm::ConstantInt::get(builder.getInt64Ty(), 0); + args.push_back(hasCacheHint ? mt.lookupValue(cacheHint) : i64Unused); + + // Flag arguments for multicast and cachehint. + args.push_back(builder.getInt1(hasMulticastMask)); + args.push_back(builder.getInt1(hasCacheHint)); + + llvm::Intrinsic::ID id = + llvm::Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster; + + return {id, std::move(args)}; +} + mlir::NVVM::IDArgPair CpAsyncBulkSharedCTAToGlobalOp::getIntrinsicIDAndArgs( Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { auto thisOp = cast<NVVM::CpAsyncBulkSharedCTAToGlobalOp>(op); diff --git a/mlir/lib/Dialect/Linalg/Transforms/ShardingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/ShardingInterfaceImpl.cpp index f277c5f..0ae2a9c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ShardingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ShardingInterfaceImpl.cpp @@ -266,9 +266,8 @@ struct StructuredOpShardingInterface LinalgOp linalgOp = llvm::cast<LinalgOp>(op); SmallVector<utils::IteratorType> iteratorTypes = linalgOp.getIteratorTypesArray(); - unsigned reductionItersCount = std::accumulate( - iteratorTypes.begin(), iteratorTypes.end(), 0, - [](unsigned count, utils::IteratorType iter) { + unsigned reductionItersCount = llvm::accumulate( + iteratorTypes, 0u, [](unsigned count, utils::IteratorType iter) { return count + (iter == utils::IteratorType::reduction); }); shard::ReductionKind reductionKind = getReductionKindOfLinalgOp(linalgOp); diff --git a/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp b/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp index b663908..8c4f80f 100644 --- a/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp +++ b/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp @@ -8,6 +8,7 @@ #include "mlir/Dialect/Quant/Utils/UniformSupport.h" #include "mlir/IR/BuiltinTypes.h" +#include "llvm/ADT/STLExtras.h" #include <numeric> using namespace mlir; @@ -76,9 +77,7 @@ UniformQuantizedPerAxisValueConverter::convert(DenseFPElementsAttr attr) { // using the right quantization parameters. int64_t flattenIndex = 0; auto shape = type.getShape(); - int64_t chunkSize = - std::accumulate(std::next(shape.begin(), quantizationDim + 1), - shape.end(), 1, std::multiplies<int64_t>()); + int64_t chunkSize = llvm::product_of(shape.drop_front(quantizationDim + 1)); Type newElementType = IntegerType::get(attr.getContext(), storageBitWidth); return attr.mapValues(newElementType, [&](const APFloat &old) { int chunkIndex = (flattenIndex++) / chunkSize; diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp index 5511998..fe50865 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -400,7 +400,7 @@ LogicalResult spirv::CompositeConstructOp::verify() { return emitOpError("operand element type mismatch: expected to be ") << resultType.getElementType() << ", but provided " << elementType; } - unsigned totalCount = std::accumulate(sizes.begin(), sizes.end(), 0); + unsigned totalCount = llvm::sum_of(sizes); if (totalCount != cType.getNumElements()) return emitOpError("has incorrect number of operands: expected ") << cType.getNumElements() << ", but provided " << totalCount; diff --git a/mlir/lib/Dialect/Shard/IR/ShardOps.cpp b/mlir/lib/Dialect/Shard/IR/ShardOps.cpp index 08fccfa..135c033 100644 --- a/mlir/lib/Dialect/Shard/IR/ShardOps.cpp +++ b/mlir/lib/Dialect/Shard/IR/ShardOps.cpp @@ -1010,18 +1010,6 @@ static LogicalResult verifyInGroupDevice(Location loc, StringRef deviceName, return success(); } -template <typename It> -static auto product(It begin, It end) { - using ElementType = std::decay_t<decltype(*begin)>; - return std::accumulate(begin, end, static_cast<ElementType>(1), - std::multiplies<ElementType>()); -} - -template <typename R> -static auto product(R &&range) { - return product(adl_begin(range), adl_end(range)); -} - static LogicalResult verifyDimensionCompatibility(Location loc, int64_t expectedDimSize, int64_t resultDimSize, diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index c51b5e9..00f84bc 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -2368,9 +2368,10 @@ llvm::LogicalResult tosa::ReshapeOp::verify() { } } - int64_t newShapeElementsNum = std::accumulate( - shapeValues.begin(), shapeValues.end(), 1LL, - [](int64_t acc, int64_t dim) { return (dim > 0) ? acc * dim : acc; }); + int64_t newShapeElementsNum = + llvm::accumulate(shapeValues, int64_t(1), [](int64_t acc, int64_t dim) { + return (dim > 0) ? acc * dim : acc; + }); bool isStaticNewShape = llvm::all_of(shapeValues, [](int64_t s) { return s > 0; }); if ((isStaticNewShape && inputElementsNum != newShapeElementsNum) || diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp index d33ebe3..5786f53 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/DialectResourceBlobManager.h" #include "mlir/IR/Matchers.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" using namespace mlir; @@ -375,8 +376,7 @@ llvm::APInt calculateReducedValue(const mlir::ElementsAttr &oldTensorAttr, for (int64_t reductionAxisVal = 1; reductionAxisVal < oldShape[reductionAxis]; ++reductionAxisVal) { - int64_t stride = std::accumulate(oldShape.begin() + reductionAxis + 1, - oldShape.end(), 1, std::multiplies<int>()); + int64_t stride = llvm::product_of(oldShape.drop_front(reductionAxis + 1)); int64_t index = indexAtOldTensor + stride * reductionAxisVal; reducedValue = OperationType::calcOneElement(reducedValue, oldTensor[index]); @@ -424,8 +424,7 @@ struct ReduceConstantOptimization : public OpRewritePattern<OperationType> { auto oldShape = shapedOldElementsValues.getShape(); auto newShape = resultType.getShape(); - auto newNumOfElements = std::accumulate(newShape.begin(), newShape.end(), 1, - std::multiplies<int>()); + int64_t newNumOfElements = llvm::product_of(newShape); llvm::SmallVector<APInt> newReducedTensor(newNumOfElements); for (int64_t reductionIndex = 0; reductionIndex < newNumOfElements; diff --git a/mlir/lib/Dialect/Utils/IndexingUtils.cpp b/mlir/lib/Dialect/Utils/IndexingUtils.cpp index e1648ab9..305b06eb 100644 --- a/mlir/lib/Dialect/Utils/IndexingUtils.cpp +++ b/mlir/lib/Dialect/Utils/IndexingUtils.cpp @@ -81,21 +81,10 @@ SmallVector<int64_t> mlir::computeElementwiseMul(ArrayRef<int64_t> v1, return computeElementwiseMulImpl(v1, v2); } -int64_t mlir::computeSum(ArrayRef<int64_t> basis) { - assert(llvm::all_of(basis, [](int64_t s) { return s > 0; }) && - "basis must be nonnegative"); - if (basis.empty()) - return 0; - return std::accumulate(basis.begin(), basis.end(), 1, std::plus<int64_t>()); -} - int64_t mlir::computeProduct(ArrayRef<int64_t> basis) { assert(llvm::all_of(basis, [](int64_t s) { return s > 0; }) && "basis must be nonnegative"); - if (basis.empty()) - return 1; - return std::accumulate(basis.begin(), basis.end(), 1, - std::multiplies<int64_t>()); + return llvm::product_of(basis); } int64_t mlir::linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis) { @@ -158,19 +147,11 @@ SmallVector<AffineExpr> mlir::computeElementwiseMul(ArrayRef<AffineExpr> v1, } AffineExpr mlir::computeSum(MLIRContext *ctx, ArrayRef<AffineExpr> basis) { - if (basis.empty()) - return getAffineConstantExpr(0, ctx); - return std::accumulate(basis.begin(), basis.end(), - getAffineConstantExpr(0, ctx), - std::plus<AffineExpr>()); + return llvm::sum_of(basis, getAffineConstantExpr(0, ctx)); } AffineExpr mlir::computeProduct(MLIRContext *ctx, ArrayRef<AffineExpr> basis) { - if (basis.empty()) - return getAffineConstantExpr(1, ctx); - return std::accumulate(basis.begin(), basis.end(), - getAffineConstantExpr(1, ctx), - std::multiplies<AffineExpr>()); + return llvm::product_of(basis, getAffineConstantExpr(1, ctx)); } AffineExpr mlir::linearize(MLIRContext *ctx, ArrayRef<AffineExpr> offsets, diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp index 7b2734d..6e9118e 100644 --- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp +++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp @@ -374,11 +374,11 @@ mlir::composeReassociationIndices( if (consumerReassociations.empty()) return composedIndices; - size_t consumerDims = std::accumulate( - consumerReassociations.begin(), consumerReassociations.end(), 0, - [](size_t all, ReassociationIndicesRef indices) { - return all + indices.size(); - }); + size_t consumerDims = + llvm::accumulate(consumerReassociations, size_t(0), + [](size_t all, ReassociationIndicesRef indices) { + return all + indices.size(); + }); if (producerReassociations.size() != consumerDims) return std::nullopt; diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index a7e3ba8..58256b0 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -2496,8 +2496,7 @@ struct ToElementsOfBroadcast final : OpRewritePattern<ToElementsOp> { auto srcElems = vector::ToElementsOp::create( rewriter, toElementsOp.getLoc(), bcastOp.getSource()); - int64_t dstCount = std::accumulate(dstShape.begin(), dstShape.end(), 1, - std::multiplies<int64_t>()); + int64_t dstCount = llvm::product_of(dstShape); SmallVector<Value> replacements; replacements.reserve(dstCount); diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp index c5f22b2..0eba0b1 100644 --- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp @@ -21,6 +21,7 @@ #include "mlir/IR/Location.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeUtilities.h" +#include "llvm/ADT/STLExtras.h" #include <numeric> #define DEBUG_TYPE "vector-shape-cast-lowering" @@ -166,10 +167,7 @@ class ShapeCastOpRewritePattern : public OpRewritePattern<vector::ShapeCastOp> { const VectorType resultType = shapeCast.getResultVectorType(); const ArrayRef<int64_t> resultShape = resultType.getShape(); - const int64_t nSlices = - std::accumulate(sourceShape.begin(), sourceShape.begin() + sourceDim, 1, - std::multiplies<int64_t>()); - + const int64_t nSlices = llvm::product_of(sourceShape.take_front(sourceDim)); SmallVector<int64_t> extractIndex(sourceDim, 0); SmallVector<int64_t> insertIndex(resultDim, 0); Value result = ub::PoisonOp::create(rewriter, loc, resultType); diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp index 963b2c8..aa2dd89 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/Vector/Utils/VectorUtils.h" #include "mlir/IR/Builders.h" #include "mlir/IR/TypeUtilities.h" +#include "llvm/ADT/STLExtras.h" #define DEBUG_TYPE "vector-drop-unit-dim" @@ -557,8 +558,7 @@ struct CastAwayConstantMaskLeadingOneDim // If any of the dropped unit dims has a size of `0`, the entire mask is a // zero mask, else the unit dim has no effect on the mask. int64_t flatLeadingSize = - std::accumulate(dimSizes.begin(), dimSizes.begin() + dropDim + 1, - static_cast<int64_t>(1), std::multiplies<int64_t>()); + llvm::product_of(dimSizes.take_front(dropDim + 1)); SmallVector<int64_t> newDimSizes = {flatLeadingSize}; newDimSizes.append(dimSizes.begin() + dropDim + 1, dimSizes.end()); diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp index b72d564..2c56a43 100644 --- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp +++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp @@ -52,8 +52,7 @@ mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) { // compute sgSize by multiply elements of laneLayout // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1] // e.g. for 1D layout, sgSize = laneLayout[0] - auto sgSize = std::accumulate(laneLayout.begin(), laneLayout.end(), 1, - std::multiplies<int64_t>()); + int64_t sgSize = llvm::product_of(laneLayout); // Case 1: regular loads/stores auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>(); diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index 8bcfa46..ce421f4 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/Interfaces/FoldInterfaces.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" #include <numeric> @@ -1274,10 +1275,7 @@ LogicalResult OpTrait::impl::verifyValueSizeAttr(Operation *op, return op->emitOpError("'") << attrName << "' attribute cannot have negative elements"; - size_t totalCount = - std::accumulate(sizes.begin(), sizes.end(), 0, - [](unsigned all, int32_t one) { return all + one; }); - + size_t totalCount = llvm::sum_of(sizes, size_t(0)); if (totalCount != expectedCount) return op->emitOpError() << valueGroupName << " count (" << expectedCount diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp index 394ac77..2a37f38 100644 --- a/mlir/lib/IR/OperationSupport.cpp +++ b/mlir/lib/IR/OperationSupport.cpp @@ -406,15 +406,13 @@ OperandRangeRange::OperandRangeRange(OperandRange operands, OperandRange OperandRangeRange::join() const { const OwnerT &owner = getBase(); ArrayRef<int32_t> sizeData = llvm::cast<DenseI32ArrayAttr>(owner.second); - return OperandRange(owner.first, - std::accumulate(sizeData.begin(), sizeData.end(), 0)); + return OperandRange(owner.first, llvm::sum_of(sizeData)); } OperandRange OperandRangeRange::dereference(const OwnerT &object, ptrdiff_t index) { ArrayRef<int32_t> sizeData = llvm::cast<DenseI32ArrayAttr>(object.second); - uint32_t startIndex = - std::accumulate(sizeData.begin(), sizeData.begin() + index, 0); + uint32_t startIndex = llvm::sum_of(sizeData.take_front(index)); return OperandRange(object.first + startIndex, *(sizeData.begin() + index)); } @@ -565,8 +563,7 @@ MutableOperandRange MutableOperandRangeRange::dereference(const OwnerT &object, ptrdiff_t index) { ArrayRef<int32_t> sizeData = llvm::cast<DenseI32ArrayAttr>(object.second.getValue()); - uint32_t startIndex = - std::accumulate(sizeData.begin(), sizeData.begin() + index, 0); + uint32_t startIndex = llvm::sum_of(sizeData.take_front(index)); return object.first.slice( startIndex, *(sizeData.begin() + index), MutableOperandRange::OperandSegment(index, object.second)); diff --git a/mlir/lib/IR/TypeUtilities.cpp b/mlir/lib/IR/TypeUtilities.cpp index d2d115e..e438631 100644 --- a/mlir/lib/IR/TypeUtilities.cpp +++ b/mlir/lib/IR/TypeUtilities.cpp @@ -104,8 +104,8 @@ LogicalResult mlir::verifyCompatibleShapes(TypeRange types1, TypeRange types2) { LogicalResult mlir::verifyCompatibleDims(ArrayRef<int64_t> dims) { if (dims.empty()) return success(); - auto staticDim = std::accumulate( - dims.begin(), dims.end(), dims.front(), [](auto fold, auto dim) { + auto staticDim = + llvm::accumulate(dims, dims.front(), [](auto fold, auto dim) { return ShapedType::isDynamic(dim) ? fold : dim; }); return success(llvm::all_of(dims, [&](auto dim) { diff --git a/mlir/lib/Rewrite/ByteCode.cpp b/mlir/lib/Rewrite/ByteCode.cpp index 33fbd2a..42843ea 100644 --- a/mlir/lib/Rewrite/ByteCode.cpp +++ b/mlir/lib/Rewrite/ByteCode.cpp @@ -1835,8 +1835,7 @@ executeGetOperandsResults(RangeT values, Operation *op, unsigned index, return nullptr; ArrayRef<int32_t> segments = segmentAttr; - unsigned startIndex = - std::accumulate(segments.begin(), segments.begin() + index, 0); + unsigned startIndex = llvm::sum_of(segments.take_front(index)); values = values.slice(startIndex, *std::next(segments.begin(), index)); LDBG() << " * Extracting range[" << startIndex << ", " diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 5a3eb20..845a14f 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -922,8 +922,7 @@ llvm::CallInst *mlir::LLVM::detail::createIntrinsicCall( assert(opBundleSizes.size() == opBundleTagsAttr.size() && "operand bundles and tags do not match"); - numOpBundleOperands = - std::accumulate(opBundleSizes.begin(), opBundleSizes.end(), size_t(0)); + numOpBundleOperands = llvm::sum_of(opBundleSizes); assert(numOpBundleOperands <= intrOp->getNumOperands() && "operand bundle operands is more than the number of operands"); diff --git a/mlir/test/python/rewrite.py b/mlir/test/python/rewrite.py new file mode 100644 index 0000000..acf7db2 --- /dev/null +++ b/mlir/test/python/rewrite.py @@ -0,0 +1,69 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +from mlir.ir import * +from mlir.passmanager import * +from mlir.dialects.builtin import ModuleOp +from mlir.dialects import arith +from mlir.rewrite import * + + +def run(f): + print("\nTEST:", f.__name__) + f() + + +# CHECK-LABEL: TEST: testRewritePattern +@run +def testRewritePattern(): + def to_muli(op, rewriter): + with rewriter.ip: + new_op = arith.muli(op.operands[0], op.operands[1], loc=op.location) + rewriter.replace_op(op, new_op.owner) + + def constant_1_to_2(op, rewriter): + c = op.attributes["value"].value + if c != 1: + return True # failed to match + with rewriter.ip: + new_op = arith.constant(op.result.type, 2, loc=op.location) + rewriter.replace_op(op, [new_op]) + + with Context(): + patterns = RewritePatternSet() + patterns.add(arith.AddIOp, to_muli) + patterns.add(arith.ConstantOp, constant_1_to_2) + frozen = patterns.freeze() + + module = ModuleOp.parse( + r""" + module { + func.func @add(%a: i64, %b: i64) -> i64 { + %sum = arith.addi %a, %b : i64 + return %sum : i64 + } + } + """ + ) + + apply_patterns_and_fold_greedily(module, frozen) + # CHECK: %0 = arith.muli %arg0, %arg1 : i64 + # CHECK: return %0 : i64 + print(module) + + module = ModuleOp.parse( + r""" + module { + func.func @const() -> (i64, i64) { + %0 = arith.constant 1 : i64 + %1 = arith.constant 3 : i64 + return %0, %1 : i64, i64 + } + } + """ + ) + + apply_patterns_and_fold_greedily(module, frozen) + # CHECK: %c2_i64 = arith.constant 2 : i64 + # CHECK: %c3_i64 = arith.constant 3 : i64 + # CHECK: return %c2_i64, %c3_i64 : i64, i64 + print(module) diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 9690115..daae3c7 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -3513,9 +3513,9 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder( body << "(" << operandName << " ? 1 : 0)"; } else if (operand.isVariadicOfVariadic()) { body << llvm::formatv( - "static_cast<int32_t>(std::accumulate({0}.begin(), {0}.end(), 0, " + "llvm::accumulate({0}, int32_t(0), " "[](int32_t curSum, ::mlir::ValueRange range) {{ return curSum + " - "static_cast<int32_t>(range.size()); }))", + "static_cast<int32_t>(range.size()); })", operandName); } else { body << "static_cast<int32_t>(" << getArgumentName(op, i) << ".size())"; diff --git a/orc-rt/include/orc-rt/SPSWrapperFunction.h b/orc-rt/include/orc-rt/SPSWrapperFunction.h index dc68822..46c08a0 100644 --- a/orc-rt/include/orc-rt/SPSWrapperFunction.h +++ b/orc-rt/include/orc-rt/SPSWrapperFunction.h @@ -42,12 +42,6 @@ private: static T &&from(T &&Arg) noexcept { return std::forward<T>(Arg); } }; - template <typename T> struct Serializable<T *> { - typedef ExecutorAddr serializable_type; - static ExecutorAddr to(T *Arg) { return ExecutorAddr::fromPtr(Arg); } - static T *from(ExecutorAddr A) { return A.toPtr<T *>(); } - }; - template <> struct Serializable<Error> { typedef SPSSerializableError serializable_type; static SPSSerializableError to(Error Err) { @@ -66,21 +60,6 @@ private: } }; - template <typename T> struct Serializable<Expected<T *>> { - typedef SPSSerializableExpected<ExecutorAddr> serializable_type; - static SPSSerializableExpected<ExecutorAddr> to(Expected<T *> Val) { - return SPSSerializableExpected<ExecutorAddr>( - Val ? Expected<ExecutorAddr>(ExecutorAddr::fromPtr(*Val)) - : Expected<ExecutorAddr>(Val.takeError())); - } - static Expected<T *> from(SPSSerializableExpected<ExecutorAddr> Val) { - if (auto Tmp = Val.toExpected()) - return Tmp->toPtr<T *>(); - else - return Tmp.takeError(); - } - }; - template <typename... Ts> struct DeserializableTuple; template <typename... Ts> struct DeserializableTuple<std::tuple<Ts...>> { diff --git a/orc-rt/include/orc-rt/SimpleNativeMemoryMap.h b/orc-rt/include/orc-rt/SimpleNativeMemoryMap.h index 77d3339..6dbc0c0 100644 --- a/orc-rt/include/orc-rt/SimpleNativeMemoryMap.h +++ b/orc-rt/include/orc-rt/SimpleNativeMemoryMap.h @@ -54,18 +54,10 @@ public: struct FinalizeRequest { struct Segment { - enum class ContentType : uint8_t { Uninitialized, ZeroFill, Regular }; - - Segment() = default; - Segment(void *Address, size_t Size, AllocGroup G, ContentType C) - : Address(Address), Size(Size), G(G), C(C) {} - - void *Address = nullptr; + AllocGroup AG; + char *Address = nullptr; size_t Size = 0; - AllocGroup G; - ContentType C = ContentType::Uninitialized; - char *data() { return reinterpret_cast<char *>(Address); } - size_t size() const { return Size; } + span<const char> Content; }; std::vector<Segment> Segments; diff --git a/orc-rt/include/orc-rt/SimplePackedSerialization.h b/orc-rt/include/orc-rt/SimplePackedSerialization.h index f60ccad..0f291c4 100644 --- a/orc-rt/include/orc-rt/SimplePackedSerialization.h +++ b/orc-rt/include/orc-rt/SimplePackedSerialization.h @@ -556,6 +556,26 @@ public: } }; +/// Allow SPSExectorAddr serialization to/from T*. +template <typename T> class SPSSerializationTraits<SPSExecutorAddr, T *> { +public: + static size_t size(T *const &P) { + return SPSArgList<SPSExecutorAddr>::size(ExecutorAddr::fromPtr(P)); + } + + static bool serialize(SPSOutputBuffer &OB, T *const &P) { + return SPSArgList<SPSExecutorAddr>::serialize(OB, ExecutorAddr::fromPtr(P)); + } + + static bool deserialize(SPSInputBuffer &IB, T *&P) { + ExecutorAddr Value; + if (!SPSArgList<SPSExecutorAddr>::deserialize(IB, Value)) + return false; + P = Value.toPtr<T *>(); + return true; + } +}; + /// Helper type for serializing Errors. /// /// llvm::Errors are move-only, and not inspectable except by consuming them. diff --git a/orc-rt/lib/executor/SimpleNativeMemoryMap.cpp b/orc-rt/lib/executor/SimpleNativeMemoryMap.cpp index 603ef8b..10cdcf5 100644 --- a/orc-rt/lib/executor/SimpleNativeMemoryMap.cpp +++ b/orc-rt/lib/executor/SimpleNativeMemoryMap.cpp @@ -31,36 +31,22 @@ struct SPSSimpleNativeMemoryMapSegment; template <> class SPSSerializationTraits<SPSSimpleNativeMemoryMapSegment, SimpleNativeMemoryMap::FinalizeRequest::Segment> { - using SPSType = SPSTuple<SPSExecutorAddr, uint64_t, SPSAllocGroup, uint8_t>; + using SPSType = + SPSTuple<SPSAllocGroup, SPSExecutorAddr, uint64_t, SPSSequence<char>>; public: static bool deserialize(SPSInputBuffer &IB, SimpleNativeMemoryMap::FinalizeRequest::Segment &S) { - using ContentType = - SimpleNativeMemoryMap::FinalizeRequest::Segment::ContentType; - + AllocGroup AG; ExecutorAddr Address; uint64_t Size; - AllocGroup G; - uint8_t C; - if (!SPSType::AsArgList::deserialize(IB, Address, Size, G, C)) + span<const char> Content; + if (!SPSType::AsArgList::deserialize(IB, AG, Address, Size, Content)) return false; - if (Size >= std::numeric_limits<size_t>::max()) + if (Size > std::numeric_limits<size_t>::max()) return false; - S.Address = Address.toPtr<void *>(); - S.Size = Size; - S.G = G; - S.C = static_cast<ContentType>(C); - switch (S.C) { - case ContentType::Uninitialized: - return true; - case ContentType::ZeroFill: - memset(reinterpret_cast<char *>(S.Address), 0, S.Size); - return true; - case ContentType::Regular: - // Read content directly into target address. - return IB.read(reinterpret_cast<char *>(S.Address), S.Size); - } + S = {AG, Address.toPtr<char *>(), static_cast<size_t>(Size), Content}; + return true; } }; @@ -138,10 +124,31 @@ void SimpleNativeMemoryMap::finalize(OnFinalizeCompleteFn &&OnComplete, // TODO: Record finalize segments for release. // std::vector<std::pair<void*, size_t>> FinalizeSegments; + // Check segment validity before proceeding. for (auto &S : FR.Segments) { - if (auto Err = hostOSMemoryProtect(S.Address, S.Size, S.G.getMemProt())) + + if (S.Content.size() > S.Size) { + return OnComplete(make_error<StringError>( + (std::ostringstream() + << "For segment [" << (void *)S.Address << ".." + << (void *)(S.Address + S.Size) << "), " + << " content size (" << std::hex << S.Content.size() + << ") exceeds segment size (" << S.Size << ")") + .str())); + } + + // Copy any requested content. + if (!S.Content.empty()) + memcpy(S.Address, S.Content.data(), S.Content.size()); + + // Zero-fill the rest of the section. + if (size_t ZeroFillSize = S.Size - S.Content.size()) + memset(S.Address + S.Content.size(), 0, ZeroFillSize); + + if (auto Err = hostOSMemoryProtect(S.Address, S.Size, S.AG.getMemProt())) return OnComplete(std::move(Err)); - switch (S.G.getMemLifetime()) { + + switch (S.AG.getMemLifetime()) { case MemLifetime::Standard: if (!Base || S.Address < Base) Base = S.Address; diff --git a/orc-rt/unittests/SPSWrapperFunctionTest.cpp b/orc-rt/unittests/SPSWrapperFunctionTest.cpp index ed085f2..81e5755 100644 --- a/orc-rt/unittests/SPSWrapperFunctionTest.cpp +++ b/orc-rt/unittests/SPSWrapperFunctionTest.cpp @@ -192,62 +192,6 @@ TEST(SPSWrapperFunctionUtilsTest, TransparentConversionExpectedFailureCase) { EXPECT_EQ(ErrMsg, "N is not a multiple of 2"); } -static void -round_trip_int_pointer_sps_wrapper(orc_rt_SessionRef Session, void *CallCtx, - orc_rt_WrapperFunctionReturn Return, - orc_rt_WrapperFunctionBuffer ArgBytes) { - SPSWrapperFunction<SPSExecutorAddr(SPSExecutorAddr)>::handle( - Session, CallCtx, Return, ArgBytes, - [](move_only_function<void(int32_t *)> Return, int32_t *P) { - Return(P); - }); -} - -TEST(SPSWrapperFunctionUtilsTest, TransparentConversionPointers) { - int X = 42; - int *P = nullptr; - SPSWrapperFunction<SPSExecutorAddr(SPSExecutorAddr)>::call( - DirectCaller(nullptr, round_trip_int_pointer_sps_wrapper), - [&](Expected<int32_t *> R) { P = cantFail(std::move(R)); }, &X); - - EXPECT_EQ(P, &X); -} - -TEST(SPSWrapperFunctionUtilsTest, TransparentConversionReferenceArguments) { - int X = 42; - int *P = nullptr; - SPSWrapperFunction<SPSExecutorAddr(SPSExecutorAddr)>::call( - DirectCaller(nullptr, round_trip_int_pointer_sps_wrapper), - [&](Expected<int32_t *> R) { P = cantFail(std::move(R)); }, - static_cast<int *const &>(&X)); - - EXPECT_EQ(P, &X); -} - -static void -expected_int_pointer_sps_wrapper(orc_rt_SessionRef Session, void *CallCtx, - orc_rt_WrapperFunctionReturn Return, - orc_rt_WrapperFunctionBuffer ArgBytes) { - SPSWrapperFunction<SPSExpected<SPSExecutorAddr>(SPSExecutorAddr)>::handle( - Session, CallCtx, Return, ArgBytes, - [](move_only_function<void(Expected<int32_t *>)> Return, int32_t *P) { - Return(P); - }); -} - -TEST(SPSWrapperFunctionUtilsTest, TransparentConversionExpectedPointers) { - int X = 42; - int *P = nullptr; - SPSWrapperFunction<SPSExpected<SPSExecutorAddr>(SPSExecutorAddr)>::call( - DirectCaller(nullptr, expected_int_pointer_sps_wrapper), - [&](Expected<Expected<int32_t *>> R) { - P = cantFail(cantFail(std::move(R))); - }, - &X); - - EXPECT_EQ(P, &X); -} - template <size_t N> struct SPSOpCounter {}; namespace orc_rt { diff --git a/orc-rt/unittests/SimpleNativeMemoryMapTest.cpp b/orc-rt/unittests/SimpleNativeMemoryMapTest.cpp index ebd9bc3..b7ef7f0 100644 --- a/orc-rt/unittests/SimpleNativeMemoryMapTest.cpp +++ b/orc-rt/unittests/SimpleNativeMemoryMapTest.cpp @@ -31,49 +31,32 @@ struct SPSSimpleNativeMemoryMapSegment; struct TestSNMMSegment : public SimpleNativeMemoryMap::FinalizeRequest::Segment { - enum TestSNMMSegmentContent { Uninitialized, ZeroFill }; - - TestSNMMSegment(void *Address, AllocGroup G, std::string Content) - : SimpleNativeMemoryMap::FinalizeRequest::Segment( - Address, Content.size(), G, ContentType::Regular), - Content(std::move(Content)) {} - - TestSNMMSegment(void *Address, size_t Size, AllocGroup G, - TestSNMMSegmentContent Content) + TestSNMMSegment(AllocGroup AG, char *Address, size_t Size, + std::vector<char> C = {}) : SimpleNativeMemoryMap::FinalizeRequest::Segment( - Address, Size, G, - Content == ZeroFill ? ContentType::ZeroFill - : ContentType::Uninitialized) {} + {AG, Address, Size, {}}), + OwnedContent(std::move(C)) { + this->Content = {OwnedContent.data(), OwnedContent.size()}; + } - std::string Content; + std::vector<char> OwnedContent; }; template <> class SPSSerializationTraits<SPSSimpleNativeMemoryMapSegment, TestSNMMSegment> { - using SPSType = SPSTuple<SPSExecutorAddr, uint64_t, SPSAllocGroup, uint8_t>; + using SPSType = + SPSTuple<SPSAllocGroup, SPSExecutorAddr, uint64_t, SPSSequence<char>>; public: static size_t size(const TestSNMMSegment &S) { - using ContentType = - SimpleNativeMemoryMap::FinalizeRequest::Segment::ContentType; - assert((S.C != ContentType::Regular || S.Size == S.Content.size())); - return SPSType::AsArgList::size(ExecutorAddr::fromPtr(S.Address), - static_cast<uint64_t>(S.Size), S.G, - static_cast<uint8_t>(S.C)) + - (S.C == ContentType::Regular ? S.Size : 0); + return SPSType::AsArgList::size(S.AG, ExecutorAddr::fromPtr(S.Address), + static_cast<uint64_t>(S.Size), S.Content); } static bool serialize(SPSOutputBuffer &OB, const TestSNMMSegment &S) { - using ContentType = - SimpleNativeMemoryMap::FinalizeRequest::Segment::ContentType; - assert((S.C != ContentType::Regular || S.Size == S.Content.size())); - if (!SPSType::AsArgList::serialize(OB, ExecutorAddr::fromPtr(S.Address), - static_cast<uint64_t>(S.Size), S.G, - static_cast<uint8_t>(S.C))) - return false; - if (S.C == ContentType::Regular) - return OB.write(S.Content.data(), S.Content.size()); - return true; + return SPSType::AsArgList::serialize( + OB, S.AG, ExecutorAddr::fromPtr(S.Address), + static_cast<uint64_t>(S.Size), S.Content); } }; @@ -207,30 +190,61 @@ TEST(SimpleNativeMemoryMap, FullPipelineForOneRWSegment) { std::future<Expected<Expected<void *>>> FinalizeKey; TestSNMMFinalizeRequest FR; - void *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. - reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(Addr) + 64 * 1024); - uint64_t SentinelValue = 0; - - FR.Segments.push_back({FinalizeBase, 64 * 1024, - MemProt::Read | MemProt::Write, - TestSNMMSegment::ZeroFill}); + char *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. + reinterpret_cast<char *>(Addr) + 64 * 1024; + uint64_t SentinelValue1 = 0; // Read from pre-filled content + uint64_t SentinelValue2 = 0; // Written in finalize, read back during dealloc. + uint64_t SentinelValue3 = 42; // Read from zero-filled region. + + // Build initial content vector. + std::vector<char> Content; + Content.resize(sizeof(uint64_t) * 2); + memcpy(Content.data(), &SentinelValue3, sizeof(uint64_t)); + memcpy(Content.data() + sizeof(uint64_t), &SentinelValue1, sizeof(uint64_t)); + + FR.Segments.push_back({MemProt::Read | MemProt::Write, FinalizeBase, + 64 * 1024, std::move(Content)}); + + // Read initial content into Sentinel 1. + FR.AAPs.push_back({ + *MakeAllocAction<SPSExecutorAddr, SPSExecutorAddr>::from( + read_value_sps_allocaction, ExecutorAddr::fromPtr(&SentinelValue1), + ExecutorAddr::fromPtr(FinalizeBase)), + {} // No dealloc action. + }); + + // Write value in finalize action, then read back into Sentinel 2. FR.AAPs.push_back( {*MakeAllocAction<SPSExecutorAddr, uint64_t>::from( - write_value_sps_allocaction, ExecutorAddr::fromPtr(FinalizeBase), + write_value_sps_allocaction, + ExecutorAddr::fromPtr(FinalizeBase) + sizeof(uint64_t), uint64_t(42)), *MakeAllocAction<SPSExecutorAddr, SPSExecutorAddr>::from( - read_value_sps_allocaction, ExecutorAddr::fromPtr(&SentinelValue), - ExecutorAddr::fromPtr(FinalizeBase))}); + read_value_sps_allocaction, ExecutorAddr::fromPtr(&SentinelValue2), + ExecutorAddr::fromPtr(FinalizeBase) + sizeof(uint64_t))}); + + // Read first 64 bits of the zero-fill region. + FR.AAPs.push_back({ + *MakeAllocAction<SPSExecutorAddr, SPSExecutorAddr>::from( + read_value_sps_allocaction, ExecutorAddr::fromPtr(&SentinelValue3), + ExecutorAddr::fromPtr(FinalizeBase) + sizeof(uint64_t) * 2), + {} // No dealloc action. + }); + snmm_finalize(waitFor(FinalizeKey), SNMM.get(), std::move(FR)); void *FinalizeKeyAddr = cantFail(cantFail(FinalizeKey.get())); - EXPECT_EQ(SentinelValue, 0U); + EXPECT_EQ(SentinelValue1, 42U); + EXPECT_EQ(SentinelValue2, 0U); + EXPECT_EQ(SentinelValue3, 0U); std::future<Expected<Error>> DeallocResult; snmm_deallocate(waitFor(DeallocResult), SNMM.get(), FinalizeKeyAddr); cantFail(cantFail(DeallocResult.get())); - EXPECT_EQ(SentinelValue, 42); + EXPECT_EQ(SentinelValue1, 42U); + EXPECT_EQ(SentinelValue2, 42U); + EXPECT_EQ(SentinelValue3, 0U); std::future<Expected<Error>> ReleaseResult; snmm_release(waitFor(ReleaseResult), SNMM.get(), Addr); @@ -248,13 +262,13 @@ TEST(SimpleNativeMemoryMap, ReserveFinalizeShutdown) { std::future<Expected<Expected<void *>>> FinalizeKey; TestSNMMFinalizeRequest FR; - void *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. - reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(Addr) + 64 * 1024); + char *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. + reinterpret_cast<char *>(Addr) + 64 * 1024; uint64_t SentinelValue = 0; - FR.Segments.push_back({FinalizeBase, 64 * 1024, - MemProt::Read | MemProt::Write, - TestSNMMSegment::ZeroFill}); + FR.Segments.push_back( + {MemProt::Read | MemProt::Write, FinalizeBase, 64 * 1024}); + FR.AAPs.push_back( {*MakeAllocAction<SPSExecutorAddr, uint64_t>::from( write_value_sps_allocaction, ExecutorAddr::fromPtr(FinalizeBase), @@ -285,13 +299,13 @@ TEST(SimpleNativeMemoryMap, ReserveFinalizeDetachShutdown) { std::future<Expected<Expected<void *>>> FinalizeKey; TestSNMMFinalizeRequest FR; - void *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. - reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(Addr) + 64 * 1024); + char *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. + reinterpret_cast<char *>(Addr) + 64 * 1024; uint64_t SentinelValue = 0; - FR.Segments.push_back({FinalizeBase, 64 * 1024, - MemProt::Read | MemProt::Write, - TestSNMMSegment::ZeroFill}); + FR.Segments.push_back( + {MemProt::Read | MemProt::Write, FinalizeBase, 64 * 1024}); + FR.AAPs.push_back( {*MakeAllocAction<SPSExecutorAddr, uint64_t>::from( write_value_sps_allocaction, ExecutorAddr::fromPtr(FinalizeBase), diff --git a/orc-rt/unittests/SimplePackedSerializationTest.cpp b/orc-rt/unittests/SimplePackedSerializationTest.cpp index c3df499..17f0e9c 100644 --- a/orc-rt/unittests/SimplePackedSerializationTest.cpp +++ b/orc-rt/unittests/SimplePackedSerializationTest.cpp @@ -169,6 +169,12 @@ TEST(SimplePackedSerializationTest, StdOptionalValueSerialization) { blobSerializationRoundTrip<SPSOptional<int64_t>>(Value); } +TEST(SimplePackedSerializationTest, Pointers) { + int X = 42; + int *P = &X; + blobSerializationRoundTrip<SPSExecutorAddr>(P); +} + TEST(SimplePackedSerializationTest, ArgListSerialization) { using BAL = SPSArgList<bool, int32_t, SPSString>; |