diff options
238 files changed, 7060 insertions, 3926 deletions
diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index efdc42d..c49fd1d 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1096,8 +1096,8 @@ clang:openmp: - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - clang/include/clang/Analysis/Analyses/LifetimeSafety/** - - clang/lib/Analysis/LifetimeSafety/** + - clang/include/clang/Analysis/Analyses/LifetimeSafety* + - clang/lib/Analysis/LifetimeSafety* - clang/unittests/Analysis/LifetimeSafety* - clang/test/Sema/*lifetime-safety* - clang/test/Sema/*lifetime-analysis* diff --git a/.github/workflows/release-binaries-save-stage/action.yml b/.github/workflows/release-binaries-save-stage/action.yml deleted file mode 100644 index 84ccf98..0000000 --- a/.github/workflows/release-binaries-save-stage/action.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Save Stage -description: >- - Upload the source and binary directories from a build stage so that they - can be re-used in the next stage. This action is used to the release - binaries workflow into multiple stages to avoid the 6 hour timeout on - the GitHub hosted runners. -inputs: - build-prefix: - description: "Directory containing the build directory." - required: true - type: 'string' - -permissions: - contents: read - -runs: - using: "composite" - steps: - # We need to create an archive of the build directory, because it has too - # many files to upload. - - name: Package Build and Source Directories - shell: bash - run: | - # Remove .git/config to avoid leaking GITHUB_TOKEN stored there. - # See https://unit42.paloaltonetworks.com/github-repo-artifacts-leak-tokens/ - rm -Rf .git/config - # Windows does not support symlinks, so we need to dereference them. - tar --exclude build/ ${{ (runner.os == 'Windows' && '-h') || '' }} -c . | zstd -T0 -c > ../llvm-project.tar.zst - mv ../llvm-project.tar.zst . - tar -C ${{ inputs.build-prefix }} -c build/ | zstd -T0 -c > build.tar.zst - - - name: Upload Stage 1 Source - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: ${{ runner.os }}-${{ runner.arch }}-${{ github.job }}-source - path: llvm-project.tar.zst - retention-days: 2 - - - name: Upload Stage 1 Build Dir - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: ${{ runner.os}}-${{ runner.arch }}-${{ github.job }}-build - path: build.tar.zst - retention-days: 2 diff --git a/.github/workflows/release-binaries-setup-stage/action.yml b/.github/workflows/release-binaries-setup-stage/action.yml deleted file mode 100644 index 475a25f..0000000 --- a/.github/workflows/release-binaries-setup-stage/action.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Setup Stage -description: >- - Setup the next stage of the release binaries workflow. This sets up the - environment correctly for a new stage of the release binaries workflow - and also restores the source and build directory from the previous stage. - -inputs: - previous-artifact: - description: >- - A unique descriptor for the artifact from the previous stage. This will - be used to construct the final artifact pattern, which is: - $RUNNER_OS-$RUNNER_ARCH-$PREVIOUS_ARTIFACT-* - required: false - type: 'string' - -outputs: - build-prefix: - description: "Directory containing the build directory." - value: ${{ steps.build-prefix.outputs.build-prefix }} - -runs: - using: "composite" - steps: - - name: Install Ninja - uses: llvm/actions/install-ninja@a1ea791b03c8e61f53a0e66f2f73db283aa0f01e # main - - - name: Setup Windows - if: startsWith(runner.os, 'Windows') - uses: llvm/actions/setup-windows@main - with: - arch: amd64 - - - name: Set Build Prefix - id: build-prefix - shell: bash - run: | - build_prefix=`pwd` - if [ "${{ runner.os }}" = "Linux" ]; then - sudo chown $USER:$USER /mnt/ - build_prefix=/mnt/ - fi - echo "build-prefix=$build_prefix" >> $GITHUB_OUTPUT - - - name: Download Previous Stage Artifact - if: ${{ inputs.previous-artifact }} - id: download - uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 - with: - pattern: ${{ runner.os }}-${{ runner.arch }}-${{ inputs.previous-artifact }}-* - merge-multiple: true - - - name: Unpack Artifact - if: ${{ steps.download.outputs.download-path }} - shell: bash - run: | - tar --zstd -xf llvm-project.tar.zst - rm llvm-project.tar.zst - tar --zstd -C ${{ steps.build-prefix.outputs.build-prefix}} -xf build.tar.zst - rm build.tar.zst diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index cba48e4..83969b5 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -194,40 +194,30 @@ jobs: runs-on: ${{ needs.prepare.outputs.build-runs-on }} steps: - - name: Checkout Actions - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }} - sparse-checkout: | - .github/workflows/ - sparse-checkout-cone-mode: false - # Check out outside of working directory so the source checkout doesn't - # remove it. - path: workflows - - # actions/checkout does not support paths outside of the GITHUB_WORKSPACE. - # Also, anything that we put inside of GITHUB_WORKSPACE will be overwritten - # by future actions/checkout steps. Therefore, in order to checkout the - # latest actions from main, we need to first checkout out the actions inside of - # GITHUB_WORKSPACE (see previous step), then use actions/checkout to checkout - # the code being built and the move the actions from main back into GITHUB_WORKSPACE, - # becasue the uses on composite actions only reads workflows from inside GITHUB_WORKSPACE. - - shell: bash - run: mv workflows ../workflows-main - - name: Checkout LLVM uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: ref: ${{ needs.prepare.outputs.ref }} - - name: Copy main workflows - shell: bash - run: | - mv ../workflows-main . + - name: Install Ninja + uses: llvm/actions/install-ninja@a1ea791b03c8e61f53a0e66f2f73db283aa0f01e # main + + - name: Setup Windows + if: startsWith(runner.os, 'Windows') + uses: llvm/actions/setup-windows@main + with: + arch: amd64 - - name: Setup Stage + - name: Set Build Prefix id: setup-stage - uses: ./workflows-main/.github/workflows/release-binaries-setup-stage + shell: bash + run: | + build_prefix=`pwd` + if [ "${{ runner.os }}" = "Linux" ]; then + sudo chown $USER:$USER /mnt/ + build_prefix=/mnt/ + fi + echo "build-prefix=$build_prefix" >> $GITHUB_OUTPUT - name: Configure id: build @@ -258,17 +248,11 @@ jobs: path: | ${{ needs.prepare.outputs.release-binary-filename }} - # Clean up some build files to reduce size of artifact. - - name: Clean Up Build Directory - shell: bash + - name: Run Tests + # These almost always fail so don't let them fail the build and prevent the uploads. + continue-on-error: true run: | - find ${{ steps.setup-stage.outputs.build-prefix }}/build -iname ${{ needs.prepare.outputs.release-binary-filename }} -delete - find ${{ steps.setup-stage.outputs.build-prefix }}/build -iname _CPack_Packages -prune -exec rm -r {} + - - - name: Save Stage - uses: ./workflows-main/.github/workflows/release-binaries-save-stage - with: - build-prefix: ${{ steps.setup-stage.outputs.build-prefix }} + ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build stage2-check-all upload-release-binaries: name: "Upload Release Binaries" @@ -327,31 +311,3 @@ jobs: --release ${{ needs.prepare.outputs.release-version }} \ upload \ --files ${{ needs.prepare.outputs.release-binary-filename }}* - - test-release: - name: "Test Release" - needs: - - prepare - - build-release-package - if: >- - github.repository_owner == 'llvm' - runs-on: ${{ needs.prepare.outputs.test-runs-on }} - steps: - - name: Checkout Actions - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }} - sparse-checkout: | - .github/workflows/ - sparse-checkout-cone-mode: false - path: workflows - - name: Setup Stage - id: setup-stage - uses: ./workflows/.github/workflows/release-binaries-setup-stage - with: - previous-artifact: build-release-package - - - name: Run Tests - shell: bash - run: | - ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build stage2-check-all diff --git a/clang-tools-extra/clang-doc/JSONGenerator.cpp b/clang-tools-extra/clang-doc/JSONGenerator.cpp index 26794a5..6fba211 100644 --- a/clang-tools-extra/clang-doc/JSONGenerator.cpp +++ b/clang-tools-extra/clang-doc/JSONGenerator.cpp @@ -582,11 +582,10 @@ static SmallString<16> determineFileName(Info *I, SmallString<128> &Path) { if (I->IT == InfoType::IT_record) { auto *RecordSymbolInfo = static_cast<SymbolInfo *>(I); FileName = RecordSymbolInfo->MangledName; - } else if (I->IT == InfoType::IT_namespace && I->Name != "") - // Serialize the global namespace as index.json - FileName = I->Name; + } else if (I->USR == GlobalNamespaceID) + FileName = "index"; else - FileName = I->getFileBaseName(); + FileName = I->Name; sys::path::append(Path, FileName + ".json"); return FileName; } diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h index 2a75f89..d8c2b9c 100644 --- a/clang-tools-extra/clang-doc/Representation.h +++ b/clang-tools-extra/clang-doc/Representation.h @@ -30,6 +30,9 @@ namespace doc { // SHA1'd hash of a USR. using SymbolID = std::array<uint8_t, 20>; +constexpr SymbolID GlobalNamespaceID = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + struct BaseRecordInfo; struct EnumInfo; struct FunctionInfo; diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/pro-bounds-constant-array-index-c++03.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/pro-bounds-constant-array-index-c++03.cpp index ad1cc5ab..fe5b5b1 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/pro-bounds-constant-array-index-c++03.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/pro-bounds-constant-array-index-c++03.cpp @@ -1,7 +1,5 @@ // RUN: %check_clang_tidy -std=c++98-or-later %s cppcoreguidelines-pro-bounds-constant-array-index %t -// Note: this test expects no diagnostics, but FileCheck cannot handle that, -// hence the use of | count 0. template <int index> struct B { int get() { // The next line used to crash the check (in C++03 mode only). diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 99aa545..65b086c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -180,6 +180,9 @@ C Language Changes C2y Feature Support ^^^^^^^^^^^^^^^^^^^ +- No longer triggering ``-Wstatic-in-inline`` in C2y mode; use of a static + function or variable within an extern inline function is no longer a + constraint per `WG14 N3622 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3622.txt>`_. - Clang now supports `N3355 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3355.htm>`_ Named Loops. C23 Feature Support diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index e1a4005..573cc72 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -7160,6 +7160,18 @@ public: /// Return original type of the base expression for array section. static QualType getBaseOriginalType(const Expr *Base); + /// Return the effective 'element' type of this array section. As the array + /// section itself returns a collection of elements (closer to its `getBase` + /// type), this is only useful for figuring out the effective type of this if + /// it were a normal Array subscript expr. + QualType getElementType() const; + + /// Returns the effective 'type' of the base of this array section. This + /// should be the array/pointer type that this operates on. Just + /// getBase->getType isn't sufficient, since it doesn't look through existing + /// Array sections to figure out the actual 'base' of this. + QualType getBaseType() const; + static bool classof(const Stmt *T) { return T->getStmtClass() == ArraySectionExprClass; } diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h b/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h index f02969e..229d16c 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h @@ -12,7 +12,8 @@ #include "clang/AST/DeclCXX.h" -namespace clang ::lifetimes { +namespace clang { +namespace lifetimes { /// Returns the most recent declaration of the method to ensure all /// lifetime-bound attributes from redeclarations are considered. @@ -37,7 +38,7 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD); /// lifetimebound, either due to an explicit lifetimebound attribute on the /// method or because it's a normal assignment operator. bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); - -} // namespace clang::lifetimes +} // namespace lifetimes +} // namespace clang #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h new file mode 100644 index 0000000..e54fc26 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -0,0 +1,183 @@ +//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the entry point for a dataflow-based static analysis +// that checks for C++ lifetime violations. +// +// The analysis is based on the concepts of "origins" and "loans" to track +// pointer lifetimes and detect issues like use-after-free and dangling +// pointers. See the RFC for more details: +// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/StringMap.h" +#include <memory> + +namespace clang::lifetimes { + +/// Enum to track the confidence level of a potential error. +enum class Confidence : uint8_t { + None, + Maybe, // Reported as a potential error (-Wlifetime-safety-strict) + Definite // Reported as a definite error (-Wlifetime-safety-permissive) +}; + +enum class LivenessKind : uint8_t { + Dead, // Not alive + Maybe, // Live on some path but not all paths (may-be-live) + Must // Live on all paths (must-be-live) +}; + +class LifetimeSafetyReporter { +public: + LifetimeSafetyReporter() = default; + virtual ~LifetimeSafetyReporter() = default; + + virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, + SourceLocation FreeLoc, + Confidence Confidence) {} +}; + +/// The main entry point for the analysis. +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + +namespace internal { +// Forward declarations of internal types. +class Fact; +class FactManager; +class LoanPropagationAnalysis; +class ExpiredLoansAnalysis; +class LiveOriginAnalysis; +struct LifetimeFactory; + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template <typename Tag> struct ID { + uint32_t Value = 0; + + bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; } + bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); } + bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; } + ID<Tag> operator++(int) { + ID<Tag> Tmp = *this; + ++Value; + return Tmp; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } +}; + +using LoanID = ID<struct LoanTag>; +using OriginID = ID<struct OriginTag>; +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { + return OS << ID.Value; +} +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { + return OS << ID.Value; +} + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet<LoanID>; +using OriginSet = llvm::ImmutableSet<OriginID>; +using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// Running the lifetime safety analysis and querying its results. It +/// encapsulates the various dataflow analyses. +class LifetimeSafetyAnalysis { +public: + LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + ~LifetimeSafetyAnalysis(); + + void run(); + + /// Returns the set of loans an origin holds at a specific program point. + LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const; + + /// Returns the set of origins that are live at a specific program point, + /// along with the confidence level of their liveness. + /// + /// An origin is considered live if there are potential future uses of that + /// origin after the given program point. The confidence level indicates + /// whether the origin is definitely live (Definite) due to being domintated + /// by a set of uses or only possibly live (Maybe) only on some but not all + /// control flow paths. + std::vector<std::pair<OriginID, LivenessKind>> + getLiveOriginsAtPoint(ProgramPoint PP) const; + + /// Finds the OriginID for a given declaration. + /// Returns a null optional if not found. + std::optional<OriginID> getOriginIDForDecl(const ValueDecl *D) const; + + /// Finds the LoanID's for the loan created with the specific variable as + /// their Path. + std::vector<LoanID> getLoanIDForVar(const VarDecl *VD) const; + + /// Retrieves program points that were specially marked in the source code + /// for testing. + /// + /// The analysis recognizes special function calls of the form + /// `void("__lifetime_test_point_<name>")` as test points. This method returns + /// a map from the annotation string (<name>) to the corresponding + /// `ProgramPoint`. This allows test harnesses to query the analysis state at + /// user-defined locations in the code. + /// \note This is intended for testing only. + llvm::StringMap<ProgramPoint> getTestPoints() const; + +private: + AnalysisDeclContext &AC; + LifetimeSafetyReporter *Reporter; + std::unique_ptr<LifetimeFactory> Factory; + std::unique_ptr<FactManager> FactMgr; + std::unique_ptr<LoanPropagationAnalysis> LoanPropagation; + std::unique_ptr<LiveOriginAnalysis> LiveOrigins; +}; +} // namespace internal +} // namespace clang::lifetimes + +namespace llvm { +template <typename Tag> +struct DenseMapInfo<clang::lifetimes::internal::ID<Tag>> { + using ID = clang::lifetimes::internal::ID<Tag>; + + static inline ID getEmptyKey() { + return {DenseMapInfo<uint32_t>::getEmptyKey()}; + } + + static inline ID getTombstoneKey() { + return {DenseMapInfo<uint32_t>::getTombstoneKey()}; + } + + static unsigned getHashValue(const ID &Val) { + return DenseMapInfo<uint32_t>::getHashValue(Val.Value); + } + + static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } +}; +} // namespace llvm + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h deleted file mode 100644 index 03636be..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h +++ /dev/null @@ -1,35 +0,0 @@ -//===- Checker.h - C++ Lifetime Safety Analysis -*----------- C++-*-=========// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines and enforces the lifetime safety policy. It detects -// use-after-free errors by examining loan expiration points and checking if -// any live origins hold the expired loans. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" - -namespace clang::lifetimes::internal { - -/// Runs the lifetime checker, which detects use-after-free errors by -/// examining loan expiration points and checking if any live origins hold -/// the expired loan. -void runLifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, - const LiveOriginsAnalysis &LiveOrigins, - const FactManager &FactMgr, AnalysisDeclContext &ADC, - LifetimeSafetyReporter *Reporter); - -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h deleted file mode 100644 index 6a90aeb..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h +++ /dev/null @@ -1,232 +0,0 @@ -//===- Facts.h - Lifetime Analysis Facts and Fact Manager ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines Facts, which are atomic lifetime-relevant events (such as -// loan issuance, loan expiration, origin flow, and use), and the FactManager, -// which manages the storage and retrieval of facts for each CFG block. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" -#include <cstdint> - -namespace clang::lifetimes::internal { -/// An abstract base class for a single, atomic lifetime-relevant event. -class Fact { - -public: - enum class Kind : uint8_t { - /// A new loan is issued from a borrow expression (e.g., &x). - Issue, - /// A loan expires as its underlying storage is freed (e.g., variable goes - /// out of scope). - Expire, - /// An origin is propagated from a source to a destination (e.g., p = q). - /// This can also optionally kill the destination origin before flowing into - /// it. Otherwise, the source's loan set is merged into the destination's - /// loan set. - OriginFlow, - /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin, - /// An origin is used (eg. appears as l-value expression like DeclRefExpr). - Use, - /// A marker for a specific point in the code, for testing. - TestPoint, - }; - -private: - Kind K; - -protected: - Fact(Kind K) : K(K) {} - -public: - virtual ~Fact() = default; - Kind getKind() const { return K; } - - template <typename T> const T *getAs() const { - if (T::classof(this)) - return static_cast<const T *>(this); - return nullptr; - } - - virtual void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const; -}; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -class IssueFact : public Fact { - LoanID LID; - OriginID OID; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } - - IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} - LoanID getLoanID() const { return LID; } - OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &OM) const override; -}; - -class ExpireFact : public Fact { - LoanID LID; - SourceLocation ExpiryLoc; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } - - ExpireFact(LoanID LID, SourceLocation ExpiryLoc) - : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} - - LoanID getLoanID() const { return LID; } - SourceLocation getExpiryLoc() const { return ExpiryLoc; } - - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &) const override; -}; - -class OriginFlowFact : public Fact { - OriginID OIDDest; - OriginID OIDSrc; - // True if the destination origin should be killed (i.e., its current loans - // cleared) before the source origin's loans are flowed into it. - bool KillDest; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::OriginFlow; - } - - OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) - : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), - KillDest(KillDest) {} - - OriginID getDestOriginID() const { return OIDDest; } - OriginID getSrcOriginID() const { return OIDSrc; } - bool getKillDest() const { return KillDest; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override; -}; - -class ReturnOfOriginFact : public Fact { - OriginID OID; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::ReturnOfOrigin; - } - - ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} - OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override; -}; - -class UseFact : public Fact { - const Expr *UseExpr; - // True if this use is a write operation (e.g., left-hand side of assignment). - // Write operations are exempted from use-after-free checks. - bool IsWritten = false; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } - - UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} - - OriginID getUsedOrigin(const OriginManager &OM) const { - // TODO: Remove const cast and make OriginManager::get as const. - return const_cast<OriginManager &>(OM).get(*UseExpr); - } - const Expr *getUseExpr() const { return UseExpr; } - void markAsWritten() { IsWritten = true; } - bool isWritten() const { return IsWritten; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override; -}; - -/// A dummy-fact used to mark a specific point in the code for testing. -/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. -class TestPointFact : public Fact { - StringRef Annotation; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } - - explicit TestPointFact(StringRef Annotation) - : Fact(Kind::TestPoint), Annotation(Annotation) {} - - StringRef getAnnotation() const { return Annotation; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const override; -}; - -class FactManager { -public: - llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const { - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) - return It->second; - return {}; - } - - void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) { - if (!NewFacts.empty()) - BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); - } - - template <typename FactType, typename... Args> - FactType *createFact(Args &&...args) { - void *Mem = FactAllocator.Allocate<FactType>(); - return new (Mem) FactType(std::forward<Args>(args)...); - } - - void dump(const CFG &Cfg, AnalysisDeclContext &AC) const; - - /// Retrieves program points that were specially marked in the source code - /// for testing. - /// - /// The analysis recognizes special function calls of the form - /// `void("__lifetime_test_point_<name>")` as test points. This method returns - /// a map from the annotation string (<name>) to the corresponding - /// `ProgramPoint`. This allows test harnesses to query the analysis state at - /// user-defined locations in the code. - /// \note This is intended for testing only. - llvm::StringMap<ProgramPoint> getTestPoints() const; - - LoanManager &getLoanMgr() { return LoanMgr; } - const LoanManager &getLoanMgr() const { return LoanMgr; } - OriginManager &getOriginMgr() { return OriginMgr; } - const OriginManager &getOriginMgr() const { return OriginMgr; } - -private: - LoanManager LoanMgr; - OriginManager OriginMgr; - llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>> - BlockToFactsMap; - llvm::BumpPtrAllocator FactAllocator; -}; -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h deleted file mode 100644 index 5e58abe..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ /dev/null @@ -1,106 +0,0 @@ -//===- FactsGenerator.h - Lifetime Facts Generation -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the FactsGenerator, which traverses the AST to generate -// lifetime-relevant facts (such as loan issuance, expiration, origin flow, -// and use) from CFG statements. These facts are used by the dataflow analyses -// to track pointer lifetimes and detect use-after-free errors. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H - -#include "clang/AST/StmtVisitor.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/SmallVector.h" - -namespace clang::lifetimes::internal { - -class FactsGenerator : public ConstStmtVisitor<FactsGenerator> { - using Base = ConstStmtVisitor<FactsGenerator>; - -public: - FactsGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) - : FactMgr(FactMgr), AC(AC) {} - - void run(); - - void VisitDeclStmt(const DeclStmt *DS); - void VisitDeclRefExpr(const DeclRefExpr *DRE); - void VisitCXXConstructExpr(const CXXConstructExpr *CCE); - void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE); - void VisitCallExpr(const CallExpr *CE); - void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N); - void VisitImplicitCastExpr(const ImplicitCastExpr *ICE); - void VisitUnaryOperator(const UnaryOperator *UO); - void VisitReturnStmt(const ReturnStmt *RS); - void VisitBinaryOperator(const BinaryOperator *BO); - void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE); - void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE); - void VisitInitListExpr(const InitListExpr *ILE); - void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE); - -private: - void handleDestructor(const CFGAutomaticObjDtor &DtorOpt); - - void handleGSLPointerConstruction(const CXXConstructExpr *CCE); - - /// Checks if a call-like expression creates a borrow by passing a value to a - /// reference parameter, creating an IssueFact if it does. - /// \param IsGslConstruction True if this is a GSL construction where all - /// argument origins should flow to the returned origin. - void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, - ArrayRef<const Expr *> Args, - bool IsGslConstruction = false); - - template <typename Destination, typename Source> - void flowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>( - DestOID, SrcOID, /*KillDest=*/false)); - } - - template <typename Destination, typename Source> - void killAndFlowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back( - FactMgr.createFact<OriginFlowFact>(DestOID, SrcOID, /*KillDest=*/true)); - } - - /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. - /// If so, creates a `TestPointFact` and returns true. - bool handleTestPoint(const CXXFunctionalCastExpr *FCE); - - void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr); - - // A DeclRefExpr will be treated as a use of the referenced decl. It will be - // checked for use-after-free unless it is later marked as being written to - // (e.g. on the left-hand side of an assignment). - void handleUse(const DeclRefExpr *DRE); - - void markUseAsWrite(const DeclRefExpr *DRE); - - FactManager &FactMgr; - AnalysisDeclContext &AC; - llvm::SmallVector<Fact *> CurrentBlockFacts; - // To distinguish between reads and writes for use-after-free checks, this map - // stores the `UseFact` for each `DeclRefExpr`. We initially identify all - // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use - // corresponding to the left-hand side is updated to be a "write", thereby - // exempting it from the check. - llvm::DenseMap<const DeclRefExpr *, UseFact *> UseFacts; -}; - -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h deleted file mode 100644 index 91ffbb1..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h +++ /dev/null @@ -1,87 +0,0 @@ -//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the main entry point and orchestrator for the C++ Lifetime -// Safety Analysis. It coordinates the entire analysis pipeline: fact -// generation, loan propagation, live origins analysis, and enforcement of -// lifetime safety policy. -// -// The analysis is based on the concepts of "origins" and "loans" to track -// pointer lifetimes and detect issues like use-after-free and dangling -// pointers. See the RFC for more details: -// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" -#include "clang/Analysis/AnalysisDeclContext.h" - -namespace clang::lifetimes { - -/// Enum to track the confidence level of a potential error. -enum class Confidence : uint8_t { - None, - Maybe, // Reported as a potential error (-Wlifetime-safety-strict) - Definite // Reported as a definite error (-Wlifetime-safety-permissive) -}; - -class LifetimeSafetyReporter { -public: - LifetimeSafetyReporter() = default; - virtual ~LifetimeSafetyReporter() = default; - - virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, - SourceLocation FreeLoc, - Confidence Confidence) {} -}; - -/// The main entry point for the analysis. -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - -namespace internal { -/// An object to hold the factories for immutable collections, ensuring -/// that all created states share the same underlying memory management. -struct LifetimeFactory { - OriginLoanMap::Factory OriginMapFactory{/*canonicalize=*/false}; - LoanSet::Factory LoanSetFactory{/*canonicalize=*/false}; - LivenessMap::Factory LivenessMapFactory{/*canonicalize=*/false}; -}; - -/// Running the lifetime safety analysis and querying its results. It -/// encapsulates the various dataflow analyses. -class LifetimeSafetyAnalysis { -public: - LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - - void run(); - - /// \note These are provided only for testing purposes. - LoanPropagationAnalysis &getLoanPropagation() const { - return *LoanPropagation; - } - LiveOriginsAnalysis &getLiveOrigins() const { return *LiveOrigins; } - FactManager &getFactManager() { return FactMgr; } - -private: - AnalysisDeclContext &AC; - LifetimeSafetyReporter *Reporter; - LifetimeFactory Factory; - FactManager FactMgr; - std::unique_ptr<LiveOriginsAnalysis> LiveOrigins; - std::unique_ptr<LoanPropagationAnalysis> LoanPropagation; -}; -} // namespace internal -} // namespace clang::lifetimes - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h deleted file mode 100644 index c4f5f0e..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h +++ /dev/null @@ -1,97 +0,0 @@ -//===- LiveOrigins.h - Live Origins Analysis -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the LiveOriginAnalysis, a backward dataflow analysis that -// determines which origins are "live" at each program point. An origin is -// "live" at a program point if there's a potential future use of a pointer it -// is associated with. Liveness is "generated" by a use of an origin (e.g., a -// `UseFact` from a read of a pointer) and is "killed" (i.e., it stops being -// live) when the origin is replaced by flowing a different origin into it -// (e.g., an OriginFlow from an assignment that kills the destination). -// -// This information is used for detecting use-after-free errors, as it allows us -// to check if a live origin holds a loan to an object that has already expired. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/Support/Debug.h" - -namespace clang::lifetimes::internal { - -enum class LivenessKind : uint8_t { - Dead, // Not alive - Maybe, // Live on some path but not all paths (may-be-live) - Must // Live on all paths (must-be-live) -}; - -/// Information about why an origin is live at a program point. -struct LivenessInfo { - /// The use that makes the origin live. If liveness is propagated from - /// multiple uses along different paths, this will point to the use appearing - /// earlier in the translation unit. - /// This is 'null' when the origin is not live. - const UseFact *CausingUseFact; - - /// The kind of liveness of the origin. - /// `Must`: The origin is live on all control-flow paths from the current - /// point to the function's exit (i.e. the current point is dominated by a set - /// of uses). - /// `Maybe`: indicates it is live on some but not all paths. - /// - /// This determines the diagnostic's confidence level. - /// `Must`-be-alive at expiration implies a definite use-after-free, - /// while `Maybe`-be-alive suggests a potential one on some paths. - LivenessKind Kind; - - LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} - LivenessInfo(const UseFact *UF, LivenessKind K) - : CausingUseFact(UF), Kind(K) {} - - bool operator==(const LivenessInfo &Other) const { - return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; - } - bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } - - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddPointer(CausingUseFact); - IDBuilder.Add(Kind); - } -}; - -using LivenessMap = llvm::ImmutableMap<OriginID, LivenessInfo>; - -class LiveOriginsAnalysis { -public: - LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LivenessMap::Factory &SF); - ~LiveOriginsAnalysis(); - - /// Returns the set of origins that are live at a specific program point, - /// along with the the details of the liveness. - LivenessMap getLiveOriginsAt(ProgramPoint P) const; - - // Dump liveness values on all test points in the program. - void dump(llvm::raw_ostream &OS, - llvm::StringMap<ProgramPoint> TestPoints) const; - -private: - class Impl; - std::unique_ptr<Impl> PImpl; -}; - -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h deleted file mode 100644 index 447d05c..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h +++ /dev/null @@ -1,48 +0,0 @@ -//===- LoanPropagation.h - Loan Propagation Analysis -----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the LoanPropagationAnalysis, a forward dataflow analysis -// that tracks which loans each origin holds at each program point. Loans -// represent borrows of storage locations and are propagated through the -// program as pointers are copied or assigned. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" - -namespace clang::lifetimes::internal { - -// Using LLVM's immutable collections is efficient for dataflow analysis -// as it avoids deep copies during state transitions. -// TODO(opt): Consider using a bitset to represent the set of loans. -using LoanSet = llvm::ImmutableSet<LoanID>; -using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; - -class LoanPropagationAnalysis { -public: - LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory); - ~LoanPropagationAnalysis(); - - LoanSet getLoans(OriginID OID, ProgramPoint P) const; - -private: - class Impl; - std::unique_ptr<Impl> PImpl; -}; - -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h deleted file mode 100644 index 7f5cf03..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h +++ /dev/null @@ -1,80 +0,0 @@ -//===- Loans.h - Loan and Access Path Definitions --------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the Loan and AccessPath structures, which represent -// borrows of storage locations, and the LoanManager, which manages the -// creation and retrieval of loans during lifetime analysis. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H - -#include "clang/AST/Decl.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" -#include "llvm/Support/raw_ostream.h" - -namespace clang::lifetimes::internal { - -using LoanID = utils::ID<struct LoanTag>; -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { - return OS << ID.Value; -} - -/// Represents the storage location being borrowed, e.g., a specific stack -/// variable. -/// TODO: Model access paths of other types, e.g., s.field, heap and globals. -struct AccessPath { - const clang::ValueDecl *D; - - AccessPath(const clang::ValueDecl *D) : D(D) {} -}; - -/// Information about a single borrow, or "Loan". A loan is created when a -/// reference or pointer is created. -struct Loan { - /// TODO: Represent opaque loans. - /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it - /// is represented as empty LoanSet - LoanID ID; - AccessPath Path; - /// The expression that creates the loan, e.g., &x. - const Expr *IssueExpr; - - Loan(LoanID id, AccessPath path, const Expr *IssueExpr) - : ID(id), Path(path), IssueExpr(IssueExpr) {} - - void dump(llvm::raw_ostream &OS) const; -}; - -/// Manages the creation, storage and retrieval of loans. -class LoanManager { -public: - LoanManager() = default; - - Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { - AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); - return AllLoans.back(); - } - - const Loan &getLoan(LoanID ID) const { - assert(ID.Value < AllLoans.size()); - return AllLoans[ID.Value]; - } - llvm::ArrayRef<Loan> getLoans() const { return AllLoans; } - -private: - LoanID getNextLoanID() { return NextLoanID++; } - - LoanID NextLoanID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector<Loan> AllLoans; -}; -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h deleted file mode 100644 index ba138b0..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h +++ /dev/null @@ -1,91 +0,0 @@ -//===- Origins.h - Origin and Origin Management ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines Origins, which represent the set of possible loans a -// pointer-like object could hold, and the OriginManager, which manages the -// creation, storage, and retrieval of origins for variables and expressions. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H - -#include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" - -namespace clang::lifetimes::internal { - -using OriginID = utils::ID<struct OriginTag>; - -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { - return OS << ID.Value; -} - -/// An Origin is a symbolic identifier that represents the set of possible -/// loans a pointer-like object could hold at any given time. -/// TODO: Enhance the origin model to handle complex types, pointer -/// indirection and reborrowing. The plan is to move from a single origin per -/// variable/expression to a "list of origins" governed by the Type. -/// For example, the type 'int**' would have two origins. -/// See discussion: -/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 -struct Origin { - OriginID ID; - /// A pointer to the AST node that this origin represents. This union - /// distinguishes between origins from declarations (variables or parameters) - /// and origins from expressions. - llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr; - - Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} - Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} - - const clang::ValueDecl *getDecl() const { - return Ptr.dyn_cast<const clang::ValueDecl *>(); - } - const clang::Expr *getExpr() const { - return Ptr.dyn_cast<const clang::Expr *>(); - } -}; - -/// Manages the creation, storage, and retrieval of origins for pointer-like -/// variables and expressions. -class OriginManager { -public: - OriginManager() = default; - - Origin &addOrigin(OriginID ID, const clang::ValueDecl &D); - Origin &addOrigin(OriginID ID, const clang::Expr &E); - - // TODO: Mark this method as const once we remove the call to getOrCreate. - OriginID get(const Expr &E); - - OriginID get(const ValueDecl &D); - - OriginID getOrCreate(const Expr &E); - - const Origin &getOrigin(OriginID ID) const; - - llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; } - - OriginID getOrCreate(const ValueDecl &D); - - void dump(OriginID OID, llvm::raw_ostream &OS) const; - -private: - OriginID getNextOriginID() { return NextOriginID++; } - - OriginID NextOriginID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector<Origin> AllOrigins; - llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; - llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; -}; -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h deleted file mode 100644 index 4183cab..0000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h +++ /dev/null @@ -1,118 +0,0 @@ -//===- Utils.h - Utility Functions for Lifetime Safety --------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// This file provides utilities for the lifetime safety analysis, including -// join operations for LLVM's immutable data structures. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H - -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" - -namespace clang::lifetimes::internal::utils { - -/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. -/// Used for giving ID to loans and origins. -template <typename Tag> struct ID { - uint32_t Value = 0; - - bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; } - bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); } - bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; } - ID<Tag> operator++(int) { - ID<Tag> Tmp = *this; - ++Value; - return Tmp; - } - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddInteger(Value); - } -}; - -/// Computes the union of two ImmutableSets. -template <typename T> -static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, - llvm::ImmutableSet<T> B, - typename llvm::ImmutableSet<T>::Factory &F) { - if (A.getHeight() < B.getHeight()) - std::swap(A, B); - for (const T &E : B) - A = F.add(A, E); - return A; -} - -/// Describes the strategy for joining two `ImmutableMap` instances, primarily -/// differing in how they handle keys that are unique to one of the maps. -/// -/// A `Symmetric` join is universally correct, while an `Asymmetric` join -/// serves as a performance optimization. The latter is applicable only when the -/// join operation possesses a left identity element, allowing for a more -/// efficient, one-sided merge. -enum class JoinKind { - /// A symmetric join applies the `JoinValues` operation to keys unique to - /// either map, ensuring that values from both maps contribute to the result. - Symmetric, - /// An asymmetric join preserves keys unique to the first map as-is, while - /// applying the `JoinValues` operation only to keys unique to the second map. - Asymmetric, -}; - -/// Computes the key-wise union of two ImmutableMaps. -// TODO(opt): This key-wise join is a performance bottleneck. A more -// efficient merge could be implemented using a Patricia Trie or HAMT -// instead of the current AVL-tree-based ImmutableMap. -template <typename K, typename V, typename Joiner> -static llvm::ImmutableMap<K, V> -join(const llvm::ImmutableMap<K, V> &A, const llvm::ImmutableMap<K, V> &B, - typename llvm::ImmutableMap<K, V>::Factory &F, Joiner JoinValues, - JoinKind Kind) { - if (A.getHeight() < B.getHeight()) - return join(B, A, F, JoinValues, Kind); - - // For each element in B, join it with the corresponding element in A - // (or with an empty value if it doesn't exist in A). - llvm::ImmutableMap<K, V> Res = A; - for (const auto &Entry : B) { - const K &Key = Entry.first; - const V &ValB = Entry.second; - Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); - } - if (Kind == JoinKind::Symmetric) { - for (const auto &Entry : A) { - const K &Key = Entry.first; - const V &ValA = Entry.second; - if (!B.contains(Key)) - Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); - } - } - return Res; -} -} // namespace clang::lifetimes::internal::utils - -namespace llvm { -template <typename Tag> -struct DenseMapInfo<clang::lifetimes::internal::utils::ID<Tag>> { - using ID = clang::lifetimes::internal::utils::ID<Tag>; - - static inline ID getEmptyKey() { - return {DenseMapInfo<uint32_t>::getEmptyKey()}; - } - - static inline ID getTombstoneKey() { - return {DenseMapInfo<uint32_t>::getTombstoneKey()}; - } - - static unsigned getHashValue(const ID &Val) { - return DenseMapInfo<uint32_t>::getHashValue(Val.Value); - } - - static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } -}; -} // namespace llvm - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 4b27a42..ef3f59f 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1400,7 +1400,7 @@ def C23 : DiagGroup<"c23-extensions", [VariadicMacroArgumentsOmitted]>; def : DiagGroup<"c2x-extensions", [C23]>; // A warning group for warnings about using C2y features as extensions. -def C2y : DiagGroup<"c2y-extensions">; +def C2y : DiagGroup<"c2y-extensions", [StaticInInline]>; // Previously supported warning group which is no longer pertinent as binary // literals are a C++14 and C23 extension now instead of a GNU extension. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5be63c0..3df28f2 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6326,11 +6326,14 @@ def warn_internal_linkage_local_storage : Warning< InGroup<IgnoredAttributes>; def ext_internal_in_extern_inline : ExtWarn< - "static %select{function|variable}0 %1 is used in an inline function with " - "external linkage">, InGroup<StaticInInline>; + "using static %select{function|variable}0 %1 in an inline function with " + "external linkage is a C2y extension">, InGroup<StaticInInline>; def ext_internal_in_extern_inline_quiet : Extension< - "static %select{function|variable}0 %1 is used in an inline function with " - "external linkage">, InGroup<StaticInInline>; + ext_internal_in_extern_inline.Summary>, InGroup<StaticInInline>; +def warn_c2y_compat_internal_in_extern_inline : Warning< + "using static %select{function|variable}0 %1 in an inline function with " + "external linkage is incompatible with standards before C2y">, + InGroup<CPre2yCompat>, DefaultIgnore; def warn_static_local_in_extern_inline : Warning< "non-constant static local variable in inline function may be different " "in different files">, InGroup<StaticLocalInInline>; diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 11ad61f..b4c24d7 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -451,6 +451,11 @@ public: return createBitcast(src, getPointerTo(newPointeeTy)); } + mlir::Value createPtrIsNull(mlir::Value ptr) { + mlir::Value nullPtr = getNullPtr(ptr.getType(), ptr.getLoc()); + return createCompare(ptr.getLoc(), cir::CmpOpKind::eq, ptr, nullPtr); + } + //===--------------------------------------------------------------------===// // Binary Operators //===--------------------------------------------------------------------===// @@ -644,6 +649,12 @@ public: return getI64IntegerAttr(size.getQuantity()); } + // Creates constant nullptr for pointer type ty. + cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc) { + assert(!cir::MissingFeatures::targetCodeGenInfoGetNullPointer()); + return cir::ConstantOp::create(*this, loc, getConstPtrAttr(ty, 0)); + } + /// Create a loop condition. cir::ConditionOp createCondition(mlir::Value condition) { return cir::ConditionOp::create(*this, condition.getLoc(), condition); diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index ace2086..df82ca1 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -85,6 +85,7 @@ struct MissingFeatures { static bool opFuncReadOnly() { return false; } static bool opFuncSection() { return false; } static bool opFuncWillReturn() { return false; } + static bool opFuncNoReturn() { return false; } static bool setLLVMFunctionFEnvAttributes() { return false; } static bool setFunctionAttributes() { return false; } @@ -256,6 +257,8 @@ struct MissingFeatures { static bool loopInfoStack() { return false; } static bool lowerAggregateLoadStore() { return false; } static bool lowerModeOptLevel() { return false; } + static bool loweringPrepareX86CXXABI() { return false; } + static bool loweringPrepareAArch64XXABI() { return false; } static bool maybeHandleStaticInExternC() { return false; } static bool mergeAllConstants() { return false; } static bool metaDataNode() { return false; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ec38231..c2f2ac5 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -545,15 +545,16 @@ multiclass BoolFOption<string flag_base, KeyPathAndMacro kpm, Group<f_Group>; } -// Creates a BoolOption where both of the flags are prefixed with "g" and have -// the Group<g_Group>. +// Creates a BoolOption where both of the flags are prefixed with "g". +// Does *not* map to g_Group, because that is reserved for flags that are +// intended to enable (or disable) debug info, which is not appropriate for a +// negative boolean flag (-gno-${feature}). // Used for -cc1 frontend options. Driver-only options do not map to // CompilerInvocation. multiclass BoolGOption<string flag_base, KeyPathAndMacro kpm, Default default, FlagDef flag1, FlagDef flag2, BothFlags both = BothFlags<[]>> { - defm NAME : BoolOption<"g", flag_base, kpm, default, flag1, flag2, both>, - Group<g_Group>; + defm NAME : BoolOption<"g", flag_base, kpm, default, flag1, flag2, both>; } multiclass BoolMOption<string flag_base, KeyPathAndMacro kpm, @@ -4845,8 +4846,7 @@ defm structor_decl_linkage_names NegFlag<SetFalse>, PosFlag<SetTrue, [], [], "Attach linkage names to C++ constructor/destructor " - "declarations in DWARF." - "Implies -g.">, + "declarations in DWARF.">, BothFlags<[], [ClangOption, CLOption, CC1Option]>>, DocBrief<[{On some ABIs (e.g., Itanium), constructors and destructors may have multiple variants. Historically, when generating DWARF, Clang did not attach ``DW_AT_linkage_name`` to structor DIEs because there were multiple possible manglings (depending on the structor variant) that could be used. With ``-gstructor-decl-linkage-names``, for ABIs with structor variants, we attach a "unified" mangled name to structor declarations DIEs which debuggers can use to look up all the definitions for a structor declaration. E.g., a "unified" mangled name ``_ZN3FooC4Ev`` may have multiple definitions associated with it such as ``_ZN3FooC1Ev`` and ``_ZN3FooC2Ev``. @@ -4855,7 +4855,7 @@ defm key_instructions : BoolGOption<"key-instructions", CodeGenOpts<"DebugKeyInstructions">, DefaultFalse, NegFlag<SetFalse>, PosFlag<SetTrue, [], [], "Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code" - " in some debuggers. DWARF only. Implies -g.">, + " in some debuggers. DWARF only.">, BothFlags<[], [ClangOption, CLOption, CC1Option]>>; def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">; def help : Flag<["-", "--"], "help">, diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index 09fdf75..6cadc34 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -911,6 +911,7 @@ public: ExprResult CheckReductionVar(OpenACCDirectiveKind DirectiveKind, OpenACCReductionOperator ReductionOp, Expr *VarExpr); + bool CheckReductionVarType(Expr *VarExpr); /// Called to check the 'var' type is a variable of pointer type, necessary /// for 'deviceptr' and 'attach' clauses. Returns true on success. diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index f899b3c..597cbd8 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -5290,6 +5290,33 @@ QualType ArraySectionExpr::getBaseOriginalType(const Expr *Base) { return OriginalTy; } +QualType ArraySectionExpr::getElementType() const { + QualType BaseTy = getBase()->IgnoreParenImpCasts()->getType(); + // We only have to look into the array section exprs, else we will get the + // type of the base, which should already be valid. + if (auto *ASE = dyn_cast<ArraySectionExpr>(getBase()->IgnoreParenImpCasts())) + BaseTy = ASE->getElementType(); + + if (BaseTy->isAnyPointerType()) + return BaseTy->getPointeeType(); + if (BaseTy->isArrayType()) + return BaseTy->castAsArrayTypeUnsafe()->getElementType(); + + // If this isn't a pointer or array, the base is a dependent expression, so + // just return the BaseTy anyway. + assert(BaseTy->isInstantiationDependentType()); + return BaseTy; +} + +QualType ArraySectionExpr::getBaseType() const { + // We only have to look into the array section exprs, else we will get the + // type of the base, which should already be valid. + if (auto *ASE = dyn_cast<ArraySectionExpr>(getBase()->IgnoreParenImpCasts())) + return ASE->getElementType(); + + return getBase()->IgnoreParenImpCasts()->getType(); +} + RecoveryExpr::RecoveryExpr(ASTContext &Ctx, QualType T, SourceLocation BeginLoc, SourceLocation EndLoc, ArrayRef<Expr *> SubExprs) : Expr(RecoveryExprClass, T.getNonReferenceType(), diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 1dbd415..5a26f3e 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,6 +21,8 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp + LifetimeAnnotations.cpp + LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp @@ -49,4 +51,3 @@ add_clang_library(clangAnalysis add_subdirectory(plugins) add_subdirectory(FlowSensitive) -add_subdirectory(LifetimeSafety) diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp b/clang/lib/Analysis/LifetimeAnnotations.cpp index ad61a42..e791224 100644 --- a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp +++ b/clang/lib/Analysis/LifetimeAnnotations.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" @@ -13,7 +13,8 @@ #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" -namespace clang::lifetimes { +namespace clang { +namespace lifetimes { const FunctionDecl * getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { @@ -70,4 +71,5 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { return isNormalAssignmentOperator(FD); } -} // namespace clang::lifetimes +} // namespace lifetimes +} // namespace clang diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp new file mode 100644 index 0000000..6196ec3 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -0,0 +1,1546 @@ +//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include <cstdint> +#include <memory> +#include <optional> + +namespace clang::lifetimes { +namespace internal { + +/// Represents the storage location being borrowed, e.g., a specific stack +/// variable. +/// TODO: Model access paths of other types, e.g., s.field, heap and globals. +struct AccessPath { + const clang::ValueDecl *D; + + AccessPath(const clang::ValueDecl *D) : D(D) {} +}; + +/// Information about a single borrow, or "Loan". A loan is created when a +/// reference or pointer is created. +struct Loan { + /// TODO: Represent opaque loans. + /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it + /// is represented as empty LoanSet + LoanID ID; + AccessPath Path; + /// The expression that creates the loan, e.g., &x. + const Expr *IssueExpr; + + Loan(LoanID id, AccessPath path, const Expr *IssueExpr) + : ID(id), Path(path), IssueExpr(IssueExpr) {} + + void dump(llvm::raw_ostream &OS) const { + OS << ID << " (Path: "; + OS << Path.D->getNameAsString() << ")"; + } +}; + +/// An Origin is a symbolic identifier that represents the set of possible +/// loans a pointer-like object could hold at any given time. +/// TODO: Enhance the origin model to handle complex types, pointer +/// indirection and reborrowing. The plan is to move from a single origin per +/// variable/expression to a "list of origins" governed by the Type. +/// For example, the type 'int**' would have two origins. +/// See discussion: +/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 +struct Origin { + OriginID ID; + /// A pointer to the AST node that this origin represents. This union + /// distinguishes between origins from declarations (variables or parameters) + /// and origins from expressions. + llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr; + + Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} + Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} + + const clang::ValueDecl *getDecl() const { + return Ptr.dyn_cast<const clang::ValueDecl *>(); + } + const clang::Expr *getExpr() const { + return Ptr.dyn_cast<const clang::Expr *>(); + } +}; + +/// Manages the creation, storage and retrieval of loans. +class LoanManager { +public: + LoanManager() = default; + + Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { + AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); + return AllLoans.back(); + } + + const Loan &getLoan(LoanID ID) const { + assert(ID.Value < AllLoans.size()); + return AllLoans[ID.Value]; + } + llvm::ArrayRef<Loan> getLoans() const { return AllLoans; } + +private: + LoanID getNextLoanID() { return NextLoanID++; } + + LoanID NextLoanID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector<Loan> AllLoans; +}; + +/// Manages the creation, storage, and retrieval of origins for pointer-like +/// variables and expressions. +class OriginManager { +public: + OriginManager() = default; + + Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { + AllOrigins.emplace_back(ID, &D); + return AllOrigins.back(); + } + Origin &addOrigin(OriginID ID, const clang::Expr &E) { + AllOrigins.emplace_back(ID, &E); + return AllOrigins.back(); + } + + // TODO: Mark this method as const once we remove the call to getOrCreate. + OriginID get(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + // If the expression itself has no specific origin, and it's a reference + // to a declaration, its origin is that of the declaration it refers to. + // For pointer types, where we don't pre-emptively create an origin for the + // DeclRefExpr itself. + if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) + return get(*DRE->getDecl()); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + return getOrCreate(E); + } + + OriginID get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + // TODO: This should be an assert(It != DeclToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + + return It->second; + } + + OriginID getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + + OriginID NewID = getNextOriginID(); + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; + return NewID; + } + + const Origin &getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; + } + + llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; + return NewID; + } + + void dump(OriginID OID, llvm::raw_ostream &OS) const { + OS << OID << " ("; + Origin O = getOrigin(OID); + if (const ValueDecl *VD = O.getDecl()) + OS << "Decl: " << VD->getNameAsString(); + else if (const Expr *E = O.getExpr()) + OS << "Expr: " << E->getStmtClassName(); + else + OS << "Unknown"; + OS << ")"; + } + +private: + OriginID getNextOriginID() { return NextOriginID++; } + + OriginID NextOriginID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector<Origin> AllOrigins; + llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; + llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; +}; + +/// An abstract base class for a single, atomic lifetime-relevant event. +class Fact { + +public: + enum class Kind : uint8_t { + /// A new loan is issued from a borrow expression (e.g., &x). + Issue, + /// A loan expires as its underlying storage is freed (e.g., variable goes + /// out of scope). + Expire, + /// An origin is propagated from a source to a destination (e.g., p = q). + /// This can also optionally kill the destination origin before flowing into + /// it. Otherwise, the source's loan set is merged into the destination's + /// loan set. + OriginFlow, + /// An origin escapes the function by flowing into the return value. + ReturnOfOrigin, + /// An origin is used (eg. appears as l-value expression like DeclRefExpr). + Use, + /// A marker for a specific point in the code, for testing. + TestPoint, + }; + +private: + Kind K; + +protected: + Fact(Kind K) : K(K) {} + +public: + virtual ~Fact() = default; + Kind getKind() const { return K; } + + template <typename T> const T *getAs() const { + if (T::classof(this)) + return static_cast<const T *>(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "Fact (Kind: " << static_cast<int>(K) << ")\n"; + } +}; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + + IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const override { + OS << "Issue ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ", ToOrigin: "; + OM.dump(getOriginID(), OS); + OS << ")\n"; + } +}; + +class ExpireFact : public Fact { + LoanID LID; + SourceLocation ExpiryLoc; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + + ExpireFact(LoanID LID, SourceLocation ExpiryLoc) + : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} + + LoanID getLoanID() const { return LID; } + SourceLocation getExpiryLoc() const { return ExpiryLoc; } + + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const override { + OS << "Expire ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ")\n"; + } +}; + +class OriginFlowFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + // True if the destination origin should be killed (i.e., its current loans + // cleared) before the source origin's loans are flowed into it. + bool KillDest; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::OriginFlow; + } + + OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) + : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), + KillDest(KillDest) {} + + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + bool getKillDest() const { return KillDest; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { + OS << "OriginFlow (Dest: "; + OM.dump(getDestOriginID(), OS); + OS << ", Src: "; + OM.dump(getSrcOriginID(), OS); + OS << (getKillDest() ? "" : ", Merge"); + OS << ")\n"; + } +}; + +class ReturnOfOriginFact : public Fact { + OriginID OID; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::ReturnOfOrigin; + } + + ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { + OS << "ReturnOfOrigin ("; + OM.dump(getReturnedOriginID(), OS); + OS << ")\n"; + } +}; + +class UseFact : public Fact { + const Expr *UseExpr; + // True if this use is a write operation (e.g., left-hand side of assignment). + // Write operations are exempted from use-after-free checks. + bool IsWritten = false; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } + + UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} + + OriginID getUsedOrigin(const OriginManager &OM) const { + // TODO: Remove const cast and make OriginManager::get as const. + return const_cast<OriginManager &>(OM).get(*UseExpr); + } + const Expr *getUseExpr() const { return UseExpr; } + void markAsWritten() { IsWritten = true; } + bool isWritten() const { return IsWritten; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { + OS << "Use ("; + OM.dump(getUsedOrigin(OM), OS); + OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; + } +}; + +/// A dummy-fact used to mark a specific point in the code for testing. +/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. +class TestPointFact : public Fact { + StringRef Annotation; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } + + explicit TestPointFact(StringRef Annotation) + : Fact(Kind::TestPoint), Annotation(Annotation) {} + + StringRef getAnnotation() const { return Annotation; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const override { + OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; + } +}; + +class FactManager { +public: + llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const { + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) + return It->second; + return {}; + } + + void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) { + if (!NewFacts.empty()) + BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); + } + + template <typename FactType, typename... Args> + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate<FactType>(); + return new (Mem) FactType(std::forward<Args>(args)...); + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) + if (const auto *ND = dyn_cast<NamedDecl>(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + // Print blocks in the order as they appear in code for a stable ordering. + for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); + } + } + llvm::dbgs() << " End of Block\n"; + } + } + + LoanManager &getLoanMgr() { return LoanMgr; } + OriginManager &getOriginMgr() { return OriginMgr; } + +private: + LoanManager LoanMgr; + OriginManager OriginMgr; + llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>> + BlockToFactsMap; + llvm::BumpPtrAllocator FactAllocator; +}; + +class FactGenerator : public ConstStmtVisitor<FactGenerator> { + using Base = ConstStmtVisitor<FactGenerator>; + +public: + FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) + : FactMgr(FactMgr), AC(AC) {} + + void run() { + llvm::TimeTraceScope TimeProfile("FactGenerator"); + // Iterate through the CFG blocks in reverse post-order to ensure that + // initializations and destructions are processed in the correct sequence. + for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) { + CurrentBlockFacts.clear(); + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) + Visit(CS->getStmt()); + else if (std::optional<CFGAutomaticObjDtor> DtorOpt = + Element.getAs<CFGAutomaticObjDtor>()) + handleDestructor(*DtorOpt); + } + FactMgr.addBlockFacts(Block, CurrentBlockFacts); + } + } + + void VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast<VarDecl>(D)) + if (hasOrigin(VD)) + if (const Expr *InitExpr = VD->getInit()) + killAndFlowOrigin(*VD, *InitExpr); + } + + void VisitDeclRefExpr(const DeclRefExpr *DRE) { + handleUse(DRE); + // For non-pointer/non-view types, a reference to the variable's storage + // is a borrow. We create a loan for it. + // For pointer/view types, we stick to the existing model for now and do + // not create an extra origin for the l-value expression itself. + + // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is + // not sufficient to model the different levels of indirection. The current + // single-origin model cannot distinguish between a loan to the variable's + // storage and a loan to what it points to. A multi-origin model would be + // required for this. + if (!isPointerType(DRE->getType())) { + if (const Loan *L = createLoan(DRE)) { + OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); + CurrentBlockFacts.push_back( + FactMgr.createFact<IssueFact>(L->ID, ExprOID)); + } + } + } + + void VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (isGslPointerType(CCE->getType())) { + handleGSLPointerConstruction(CCE); + return; + } + } + + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Specifically for conversion operators, + // like `std::string_view p = std::string{};` + if (isGslPointerType(MCE->getType()) && + isa<CXXConversionDecl>(MCE->getCalleeDecl())) { + // The argument is the implicit object itself. + handleFunctionCall(MCE, MCE->getMethodDecl(), + {MCE->getImplicitObjectArgument()}, + /*IsGslConstruction=*/true); + } + if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { + // Construct the argument list, with the implicit 'this' object as the + // first argument. + llvm::SmallVector<const Expr *, 4> Args; + Args.push_back(MCE->getImplicitObjectArgument()); + Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); + + handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); + } + } + + void VisitCallExpr(const CallExpr *CE) { + handleFunctionCall(CE, CE->getDirectCallee(), + {CE->getArgs(), CE->getNumArgs()}); + } + + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE)) + return; + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + killAndFlowOrigin(*ICE, *ICE->getSubExpr()); + } + + void VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + // Taking address of a pointer-type expression is not yet supported and + // will be supported in multi-origin model. + if (isPointerType(SubExpr->getType())) + return; + // The origin of an address-of expression (e.g., &x) is the origin of + // its sub-expression (x). This fact will cause the dataflow analysis + // to propagate any loans held by the sub-expression's origin to the + // origin of this UnaryOperator expression. + killAndFlowOrigin(*UO, *SubExpr); + } + } + + void VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr)) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + CurrentBlockFacts.push_back( + FactMgr.createFact<ReturnOfOriginFact>(OID)); + } + } + } + + void VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) + handleAssignment(BO->getLHS(), BO->getRHS()); + } + + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + // Assignment operators have special "kill-then-propagate" semantics + // and are handled separately. + if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { + handleAssignment(OCE->getArg(0), OCE->getArg(1)); + return; + } + handleFunctionCall(OCE, OCE->getDirectCallee(), + {OCE->getArgs(), OCE->getNumArgs()}, + /*IsGslConstruction=*/false); + } + + void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { + // Check if this is a test point marker. If so, we are done with this + // expression. + if (handleTestPoint(FCE)) + return; + if (isGslPointerType(FCE->getType())) + killAndFlowOrigin(*FCE, *FCE->getSubExpr()); + } + + void VisitInitListExpr(const InitListExpr *ILE) { + if (!hasOrigin(ILE)) + return; + // For list initialization with a single element, like `View{...}`, the + // origin of the list itself is the origin of its single element. + if (ILE->getNumInits() == 1) + killAndFlowOrigin(*ILE, *ILE->getInit(0)); + } + + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { + if (!hasOrigin(MTE)) + return; + // A temporary object's origin is the same as the origin of the + // expression that initializes it. + killAndFlowOrigin(*MTE, *MTE->getSubExpr()); + } + + void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + /// variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the + /// lifetime ends. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + /// TODO(opt): Do better than a linear search to find loans associated with + /// 'DestructedVD'. + for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = L.Path; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.D == DestructedVD) + CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>( + L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); + } + } + +private: + static bool isGslPointerType(QualType QT) { + if (const auto *RD = QT->getAsCXXRecordDecl()) { + // We need to check the template definition for specializations. + if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) + return CTSD->getSpecializedTemplate() + ->getTemplatedDecl() + ->hasAttr<PointerAttr>(); + return RD->hasAttr<PointerAttr>(); + } + return false; + } + + static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType() || isGslPointerType(QT); + } + // Check if a type has an origin. + static bool hasOrigin(const Expr *E) { + return E->isGLValue() || isPointerType(E->getType()); + } + + static bool hasOrigin(const VarDecl *VD) { + return isPointerType(VD->getType()); + } + + void handleGSLPointerConstruction(const CXXConstructExpr *CCE) { + assert(isGslPointerType(CCE->getType())); + if (CCE->getNumArgs() != 1) + return; + if (hasOrigin(CCE->getArg(0))) + killAndFlowOrigin(*CCE, *CCE->getArg(0)); + else + // This could be a new borrow. + handleFunctionCall(CCE, CCE->getConstructor(), + {CCE->getArgs(), CCE->getNumArgs()}, + /*IsGslConstruction=*/true); + } + + /// Checks if a call-like expression creates a borrow by passing a value to a + /// reference parameter, creating an IssueFact if it does. + /// \param IsGslConstruction True if this is a GSL construction where all + /// argument origins should flow to the returned origin. + void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, + ArrayRef<const Expr *> Args, + bool IsGslConstruction = false) { + // Ignore functions returning values with no origin. + if (!FD || !hasOrigin(Call)) + return; + auto IsArgLifetimeBound = [FD](unsigned I) -> bool { + const ParmVarDecl *PVD = nullptr; + if (const auto *Method = dyn_cast<CXXMethodDecl>(FD); + Method && Method->isInstance()) { + if (I == 0) + // For the 'this' argument, the attribute is on the method itself. + return implicitObjectParamIsLifetimeBound(Method); + if ((I - 1) < Method->getNumParams()) + // For explicit arguments, find the corresponding parameter + // declaration. + PVD = Method->getParamDecl(I - 1); + } else if (I < FD->getNumParams()) + // For free functions or static methods. + PVD = FD->getParamDecl(I); + return PVD ? PVD->hasAttr<clang::LifetimeBoundAttr>() : false; + }; + if (Args.empty()) + return; + bool killedSrc = false; + for (unsigned I = 0; I < Args.size(); ++I) + if (IsGslConstruction || IsArgLifetimeBound(I)) { + if (!killedSrc) { + killedSrc = true; + killAndFlowOrigin(*Call, *Args[I]); + } else + flowOrigin(*Call, *Args[I]); + } + } + + /// Creates a loan for the storage path of a given declaration reference. + /// This function should be called whenever a DeclRefExpr represents a borrow. + /// \param DRE The declaration reference expression that initiates the borrow. + /// \return The new Loan on success, nullptr otherwise. + const Loan *createLoan(const DeclRefExpr *DRE) { + if (const auto *VD = dyn_cast<ValueDecl>(DRE->getDecl())) { + AccessPath Path(VD); + // The loan is created at the location of the DeclRefExpr. + return &FactMgr.getLoanMgr().addLoan(Path, DRE); + } + return nullptr; + } + + template <typename Destination, typename Source> + void flowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>( + DestOID, SrcOID, /*KillDest=*/false)); + } + + template <typename Destination, typename Source> + void killAndFlowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact<OriginFlowFact>(DestOID, SrcOID, /*KillDest=*/true)); + } + + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. + /// If so, creates a `TestPointFact` and returns true. + bool handleTestPoint(const CXXFunctionalCastExpr *FCE) { + if (!FCE->getType()->isVoidType()) + return false; + + const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) { + llvm::StringRef LiteralValue = SL->getString(); + const std::string Prefix = "__lifetime_test_point_"; + + if (LiteralValue.starts_with(Prefix)) { + StringRef Annotation = LiteralValue.drop_front(Prefix.length()); + CurrentBlockFacts.push_back( + FactMgr.createFact<TestPointFact>(Annotation)); + return true; + } + } + return false; + } + + void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { + if (!hasOrigin(LHSExpr)) + return; + // Find the underlying variable declaration for the left-hand side. + if (const auto *DRE_LHS = + dyn_cast<DeclRefExpr>(LHSExpr->IgnoreParenImpCasts())) { + markUseAsWrite(DRE_LHS); + if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl())) { + // Kill the old loans of the destination origin and flow the new loans + // from the source origin. + killAndFlowOrigin(*VD_LHS, *RHSExpr); + } + } + } + + // A DeclRefExpr will be treated as a use of the referenced decl. It will be + // checked for use-after-free unless it is later marked as being written to + // (e.g. on the left-hand side of an assignment). + void handleUse(const DeclRefExpr *DRE) { + if (isPointerType(DRE->getType())) { + UseFact *UF = FactMgr.createFact<UseFact>(DRE); + CurrentBlockFacts.push_back(UF); + assert(!UseFacts.contains(DRE)); + UseFacts[DRE] = UF; + } + } + + void markUseAsWrite(const DeclRefExpr *DRE) { + if (!isPointerType(DRE->getType())) + return; + assert(UseFacts.contains(DRE)); + UseFacts[DRE]->markAsWritten(); + } + + FactManager &FactMgr; + AnalysisDeclContext &AC; + llvm::SmallVector<Fact *> CurrentBlockFacts; + // To distinguish between reads and writes for use-after-free checks, this map + // stores the `UseFact` for each `DeclRefExpr`. We initially identify all + // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use + // corresponding to the left-hand side is updated to be a "write", thereby + // exempting it from the check. + llvm::DenseMap<const DeclRefExpr *, UseFact *> UseFacts; +}; + +// ========================================================================= // +// Generic Dataflow Analysis +// ========================================================================= // + +enum class Direction { Forward, Backward }; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// A generic, policy-based driver for dataflow analyses. It combines +/// the dataflow runner and the transferer logic into a single class hierarchy. +/// +/// The derived class is expected to provide: +/// - A `Lattice` type. +/// - `StringRef getAnalysisName() const` +/// - `Lattice getInitialState();` The initial state of the analysis. +/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. +/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single +/// lifetime-relevant `Fact` transforms the lattice state. Only overloads +/// for facts relevant to the analysis need to be implemented. +/// +/// \tparam Derived The CRTP derived class that implements the specific +/// analysis. +/// \tparam LatticeType The dataflow lattice used by the analysis. +/// \tparam Dir The direction of the analysis (Forward or Backward). +/// TODO: Maybe use the dataflow framework! The framework might need changes +/// to support the current comparison done at block-entry. +template <typename Derived, typename LatticeType, Direction Dir> +class DataflowAnalysis { +public: + using Lattice = LatticeType; + using Base = DataflowAnalysis<Derived, Lattice, Dir>; + +private: + const CFG &Cfg; + AnalysisDeclContext &AC; + + /// The dataflow state before a basic block is processed. + llvm::DenseMap<const CFGBlock *, Lattice> InStates; + /// The dataflow state after a basic block is processed. + llvm::DenseMap<const CFGBlock *, Lattice> OutStates; + /// The dataflow state at a Program Point. + /// In a forward analysis, this is the state after the Fact at that point has + /// been applied, while in a backward analysis, it is the state before. + llvm::DenseMap<ProgramPoint, Lattice> PerPointStates; + + static constexpr bool isForward() { return Dir == Direction::Forward; } + +protected: + FactManager &AllFacts; + + explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC, + FactManager &F) + : Cfg(C), AC(AC), AllFacts(F) {} + +public: + void run() { + Derived &D = static_cast<Derived &>(*this); + llvm::TimeTraceScope Time(D.getAnalysisName()); + + using Worklist = + std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist, + BackwardDataflowWorklist>; + Worklist W(Cfg, AC); + + const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); + InStates[Start] = D.getInitialState(); + W.enqueueBlock(Start); + + while (const CFGBlock *B = W.dequeue()) { + Lattice StateIn = *getInState(B); + Lattice StateOut = transferBlock(B, StateIn); + OutStates[B] = StateOut; + for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { + if (!AdjacentB) + continue; + std::optional<Lattice> OldInState = getInState(AdjacentB); + Lattice NewInState = + !OldInState ? StateOut : D.join(*OldInState, StateOut); + // Enqueue the adjacent block if its in-state has changed or if we have + // never seen it. + if (!OldInState || NewInState != *OldInState) { + InStates[AdjacentB] = NewInState; + W.enqueueBlock(AdjacentB); + } + } + } + } + +protected: + Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } + + std::optional<Lattice> getInState(const CFGBlock *B) const { + auto It = InStates.find(B); + if (It == InStates.end()) + return std::nullopt; + return It->second; + } + + Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } + + void dump() const { + const Derived *D = static_cast<const Derived *>(this); + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << D->getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); + getOutState(&B).dump(llvm::dbgs()); + } + +private: + /// Computes the state at one end of a block by applying all its facts + /// sequentially to a given state from the other end. + Lattice transferBlock(const CFGBlock *Block, Lattice State) { + auto Facts = AllFacts.getFacts(Block); + if constexpr (isForward()) { + for (const Fact *F : Facts) { + State = transferFact(State, F); + PerPointStates[F] = State; + } + } else { + for (const Fact *F : llvm::reverse(Facts)) { + // In backward analysis, capture the state before applying the fact. + PerPointStates[F] = State; + State = transferFact(State, F); + } + } + return State; + } + + Lattice transferFact(Lattice In, const Fact *F) { + assert(F); + Derived *D = static_cast<Derived *>(this); + switch (F->getKind()) { + case Fact::Kind::Issue: + return D->transfer(In, *F->getAs<IssueFact>()); + case Fact::Kind::Expire: + return D->transfer(In, *F->getAs<ExpireFact>()); + case Fact::Kind::OriginFlow: + return D->transfer(In, *F->getAs<OriginFlowFact>()); + case Fact::Kind::ReturnOfOrigin: + return D->transfer(In, *F->getAs<ReturnOfOriginFact>()); + case Fact::Kind::Use: + return D->transfer(In, *F->getAs<UseFact>()); + case Fact::Kind::TestPoint: + return D->transfer(In, *F->getAs<TestPointFact>()); + } + llvm_unreachable("Unknown fact kind"); + } + +public: + Lattice transfer(Lattice In, const IssueFact &) { return In; } + Lattice transfer(Lattice In, const ExpireFact &) { return In; } + Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } + Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const UseFact &) { return In; } + Lattice transfer(Lattice In, const TestPointFact &) { return In; } +}; + +namespace utils { + +/// Computes the union of two ImmutableSets. +template <typename T> +static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, + llvm::ImmutableSet<T> B, + typename llvm::ImmutableSet<T>::Factory &F) { + if (A.getHeight() < B.getHeight()) + std::swap(A, B); + for (const T &E : B) + A = F.add(A, E); + return A; +} + +/// Describes the strategy for joining two `ImmutableMap` instances, primarily +/// differing in how they handle keys that are unique to one of the maps. +/// +/// A `Symmetric` join is universally correct, while an `Asymmetric` join +/// serves as a performance optimization. The latter is applicable only when the +/// join operation possesses a left identity element, allowing for a more +/// efficient, one-sided merge. +enum class JoinKind { + /// A symmetric join applies the `JoinValues` operation to keys unique to + /// either map, ensuring that values from both maps contribute to the result. + Symmetric, + /// An asymmetric join preserves keys unique to the first map as-is, while + /// applying the `JoinValues` operation only to keys unique to the second map. + Asymmetric, +}; + +/// Computes the key-wise union of two ImmutableMaps. +// TODO(opt): This key-wise join is a performance bottleneck. A more +// efficient merge could be implemented using a Patricia Trie or HAMT +// instead of the current AVL-tree-based ImmutableMap. +template <typename K, typename V, typename Joiner> +static llvm::ImmutableMap<K, V> +join(const llvm::ImmutableMap<K, V> &A, const llvm::ImmutableMap<K, V> &B, + typename llvm::ImmutableMap<K, V>::Factory &F, Joiner JoinValues, + JoinKind Kind) { + if (A.getHeight() < B.getHeight()) + return join(B, A, F, JoinValues, Kind); + + // For each element in B, join it with the corresponding element in A + // (or with an empty value if it doesn't exist in A). + llvm::ImmutableMap<K, V> Res = A; + for (const auto &Entry : B) { + const K &Key = Entry.first; + const V &ValB = Entry.second; + Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); + } + if (Kind == JoinKind::Symmetric) { + for (const auto &Entry : A) { + const K &Key = Entry.first; + const V &ValA = Entry.second; + if (!B.contains(Key)) + Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); + } + } + return Res; +} +} // namespace utils + +// ========================================================================= // +// Loan Propagation Analysis +// ========================================================================= // + +/// Represents the dataflow lattice for loan propagation. +/// +/// This lattice tracks which loans each origin may hold at a given program +/// point.The lattice has a finite height: An origin's loan set is bounded by +/// the total number of loans in the function. +/// TODO(opt): To reduce the lattice size, propagate origins of declarations, +/// not expressions, because expressions are not visible across blocks. +struct LoanPropagationLattice { + /// The map from an origin to the set of loans it contains. + OriginLoanMap Origins = OriginLoanMap(nullptr); + + explicit LoanPropagationLattice(const OriginLoanMap &S) : Origins(S) {} + LoanPropagationLattice() = default; + + bool operator==(const LoanPropagationLattice &Other) const { + return Origins == Other.Origins; + } + bool operator!=(const LoanPropagationLattice &Other) const { + return !(*this == Other); + } + + void dump(llvm::raw_ostream &OS) const { + OS << "LoanPropagationLattice State:\n"; + if (Origins.isEmpty()) + OS << " <empty>\n"; + for (const auto &Entry : Origins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + } +}; + +/// The analysis that tracks which loans belong to which origins. +class LoanPropagationAnalysis + : public DataflowAnalysis<LoanPropagationAnalysis, LoanPropagationLattice, + Direction::Forward> { + OriginLoanMap::Factory &OriginLoanMapFactory; + LoanSet::Factory &LoanSetFactory; + +public: + LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), + LoanSetFactory(LoanSetFactory) {} + + using Base::transfer; + + StringRef getAnalysisName() const { return "LoanPropagation"; } + + Lattice getInitialState() { return Lattice{}; } + + /// Merges two lattices by taking the union of loans for each origin. + // TODO(opt): Keep the state small by removing origins which become dead. + Lattice join(Lattice A, Lattice B) { + OriginLoanMap JoinedOrigins = utils::join( + A.Origins, B.Origins, OriginLoanMapFactory, + [&](const LoanSet *S1, const LoanSet *S2) { + assert((S1 || S2) && "unexpectedly merging 2 empty sets"); + if (!S1) + return *S2; + if (!S2) + return *S1; + return utils::join(*S1, *S2, LoanSetFactory); + }, + // Asymmetric join is a performance win. For origins present only on one + // branch, the loan set can be carried over as-is. + utils::JoinKind::Asymmetric); + return Lattice(JoinedOrigins); + } + + /// A new loan is issued to the origin. Old loans are erased. + Lattice transfer(Lattice In, const IssueFact &F) { + OriginID OID = F.getOriginID(); + LoanID LID = F.getLoanID(); + return LoanPropagationLattice(OriginLoanMapFactory.add( + In.Origins, OID, + LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); + } + + /// A flow from source to destination. If `KillDest` is true, this replaces + /// the destination's loans with the source's. Otherwise, the source's loans + /// are merged into the destination's. + Lattice transfer(Lattice In, const OriginFlowFact &F) { + OriginID DestOID = F.getDestOriginID(); + OriginID SrcOID = F.getSrcOriginID(); + + LoanSet DestLoans = + F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); + LoanSet SrcLoans = getLoans(In, SrcOID); + LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); + + return LoanPropagationLattice( + OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); + } + + LoanSet getLoans(OriginID OID, ProgramPoint P) const { + return getLoans(getState(P), OID); + } + +private: + LoanSet getLoans(Lattice L, OriginID OID) const { + if (auto *Loans = L.Origins.lookup(OID)) + return *Loans; + return LoanSetFactory.getEmptySet(); + } +}; + +// ========================================================================= // +// Live Origins Analysis +// ========================================================================= // +// +// A backward dataflow analysis that determines which origins are "live" at each +// program point. An origin is "live" at a program point if there's a potential +// future use of the pointer it represents. Liveness is "generated" by a read of +// origin's loan set (e.g., a `UseFact`) and is "killed" (i.e., it stops being +// live) when its loan set is overwritten (e.g. a OriginFlow killing the +// destination origin). +// +// This information is used for detecting use-after-free errors, as it allows us +// to check if a live origin holds a loan to an object that has already expired. +// ========================================================================= // + +/// Information about why an origin is live at a program point. +struct LivenessInfo { + /// The use that makes the origin live. If liveness is propagated from + /// multiple uses along different paths, this will point to the use appearing + /// earlier in the translation unit. + /// This is 'null' when the origin is not live. + const UseFact *CausingUseFact; + /// The kind of liveness of the origin. + /// `Must`: The origin is live on all control-flow paths from the current + /// point to the function's exit (i.e. the current point is dominated by a set + /// of uses). + /// `Maybe`: indicates it is live on some but not all paths. + /// + /// This determines the diagnostic's confidence level. + /// `Must`-be-alive at expiration implies a definite use-after-free, + /// while `Maybe`-be-alive suggests a potential one on some paths. + LivenessKind Kind; + + LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} + LivenessInfo(const UseFact *UF, LivenessKind K) + : CausingUseFact(UF), Kind(K) {} + + bool operator==(const LivenessInfo &Other) const { + return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; + } + bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } + + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddPointer(CausingUseFact); + IDBuilder.Add(Kind); + } +}; + +using LivenessMap = llvm::ImmutableMap<OriginID, LivenessInfo>; + +/// The dataflow lattice for origin liveness analysis. +/// It tracks which origins are live, why they're live (which UseFact), +/// and the confidence level of that liveness. +struct LivenessLattice { + LivenessMap LiveOrigins; + + LivenessLattice() : LiveOrigins(nullptr) {}; + + explicit LivenessLattice(LivenessMap L) : LiveOrigins(L) {} + + bool operator==(const LivenessLattice &Other) const { + return LiveOrigins == Other.LiveOrigins; + } + + bool operator!=(const LivenessLattice &Other) const { + return !(*this == Other); + } + + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { + if (LiveOrigins.isEmpty()) + OS << " <empty>\n"; + for (const auto &Entry : LiveOrigins) { + OriginID OID = Entry.first; + const LivenessInfo &Info = Entry.second; + OS << " "; + OM.dump(OID, OS); + OS << " is "; + switch (Info.Kind) { + case LivenessKind::Must: + OS << "definitely"; + break; + case LivenessKind::Maybe: + OS << "maybe"; + break; + case LivenessKind::Dead: + llvm_unreachable("liveness kind of live origins should not be dead."); + } + OS << " live at this point\n"; + } + } +}; + +/// The analysis that tracks which origins are live, with granular information +/// about the causing use fact and confidence level. This is a backward +/// analysis. +class LiveOriginAnalysis + : public DataflowAnalysis<LiveOriginAnalysis, LivenessLattice, + Direction::Backward> { + FactManager &FactMgr; + LivenessMap::Factory &Factory; + +public: + LiveOriginAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF) + : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} + using DataflowAnalysis<LiveOriginAnalysis, Lattice, + Direction::Backward>::transfer; + + StringRef getAnalysisName() const { return "LiveOrigins"; } + + Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } + + /// Merges two lattices by combining liveness information. + /// When the same origin has different confidence levels, we take the lower + /// one. + Lattice join(Lattice L1, Lattice L2) const { + LivenessMap Merged = L1.LiveOrigins; + // Take the earliest UseFact to make the join hermetic and commutative. + auto CombineUseFact = [](const UseFact &A, + const UseFact &B) -> const UseFact * { + return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A + : &B; + }; + auto CombineLivenessKind = [](LivenessKind K1, + LivenessKind K2) -> LivenessKind { + assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); + assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); + // Only return "Must" if both paths are "Must", otherwise Maybe. + if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) + return LivenessKind::Must; + return LivenessKind::Maybe; + }; + auto CombineLivenessInfo = [&](const LivenessInfo *L1, + const LivenessInfo *L2) -> LivenessInfo { + assert((L1 || L2) && "unexpectedly merging 2 empty sets"); + if (!L1) + return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); + if (!L2) + return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); + return LivenessInfo( + CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), + CombineLivenessKind(L1->Kind, L2->Kind)); + }; + return Lattice(utils::join( + L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, + // A symmetric join is required here. If an origin is live on one + // branch but not the other, its confidence must be demoted to `Maybe`. + utils::JoinKind::Symmetric)); + } + + /// A read operation makes the origin live with definite confidence, as it + /// dominates this program point. A write operation kills the liveness of + /// the origin since it overwrites the value. + Lattice transfer(Lattice In, const UseFact &UF) { + OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); + // Write kills liveness. + if (UF.isWritten()) + return Lattice(Factory.remove(In.LiveOrigins, OID)); + // Read makes origin live with definite confidence (dominates this point). + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&UF, LivenessKind::Must))); + } + + /// Issuing a new loan to an origin kills its liveness. + Lattice transfer(Lattice In, const IssueFact &IF) { + return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); + } + + /// An OriginFlow kills the liveness of the destination origin if `KillDest` + /// is true. Otherwise, it propagates liveness from destination to source. + Lattice transfer(Lattice In, const OriginFlowFact &OF) { + if (!OF.getKillDest()) + return In; + return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); + } + + LivenessMap getLiveOrigins(ProgramPoint P) const { + return getState(P).LiveOrigins; + } + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, const LifetimeSafetyAnalysis &LSA) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + for (const auto &Entry : LSA.getTestPoints()) { + OS << "TestPoint: " << Entry.getKey() << "\n"; + getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); + } + } +}; + +// ========================================================================= // +// Lifetime checker and Error reporter +// ========================================================================= // + +/// Struct to store the complete context for a potential lifetime violation. +struct PendingWarning { + SourceLocation ExpiryLoc; // Where the loan expired. + const Expr *UseExpr; // Where the origin holding this loan was used. + Confidence ConfidenceLevel; +}; + +class LifetimeChecker { +private: + llvm::DenseMap<LoanID, PendingWarning> FinalWarningsMap; + LoanPropagationAnalysis &LoanPropagation; + LiveOriginAnalysis &LiveOrigins; + FactManager &FactMgr; + AnalysisDeclContext &ADC; + LifetimeSafetyReporter *Reporter; + +public: + LifetimeChecker(LoanPropagationAnalysis &LPA, LiveOriginAnalysis &LOA, + FactManager &FM, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter) + : LoanPropagation(LPA), LiveOrigins(LOA), FactMgr(FM), ADC(ADC), + Reporter(Reporter) {} + + void run() { + llvm::TimeTraceScope TimeProfile("LifetimeChecker"); + for (const CFGBlock *B : *ADC.getAnalysis<PostOrderCFGView>()) + for (const Fact *F : FactMgr.getFacts(B)) + if (const auto *EF = F->getAs<ExpireFact>()) + checkExpiry(EF); + issuePendingWarnings(); + } + + /// Checks for use-after-free errors when a loan expires. + /// + /// This method examines all live origins at the expiry point and determines + /// if any of them hold the expiring loan. If so, it creates a pending + /// warning with the appropriate confidence level based on the liveness + /// information. The confidence reflects whether the origin is definitely + /// or maybe live at this point. + /// + /// Note: This implementation considers only the confidence of origin + /// liveness. Future enhancements could also consider the confidence of loan + /// propagation (e.g., a loan may only be held on some execution paths). + void checkExpiry(const ExpireFact *EF) { + LoanID ExpiredLoan = EF->getLoanID(); + LivenessMap Origins = LiveOrigins.getLiveOrigins(EF); + Confidence CurConfidence = Confidence::None; + const UseFact *BadUse = nullptr; + for (auto &[OID, LiveInfo] : Origins) { + LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); + if (!HeldLoans.contains(ExpiredLoan)) + continue; + // Loan is defaulted. + Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); + if (CurConfidence < NewConfidence) { + CurConfidence = NewConfidence; + BadUse = LiveInfo.CausingUseFact; + } + } + if (!BadUse) + return; + // We have a use-after-free. + Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; + if (LastConf >= CurConfidence) + return; + FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), + /*UseExpr=*/BadUse->getUseExpr(), + /*ConfidenceLevel=*/CurConfidence}; + } + + static Confidence livenessKindToConfidence(LivenessKind K) { + switch (K) { + case LivenessKind::Must: + return Confidence::Definite; + case LivenessKind::Maybe: + return Confidence::Maybe; + case LivenessKind::Dead: + return Confidence::None; + } + llvm_unreachable("unknown liveness kind"); + } + + void issuePendingWarnings() { + if (!Reporter) + return; + for (const auto &[LID, Warning] : FinalWarningsMap) { + const Loan &L = FactMgr.getLoanMgr().getLoan(LID); + const Expr *IssueExpr = L.IssueExpr; + Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, + Warning.ExpiryLoc, Warning.ConfidenceLevel); + } + } +}; + +// ========================================================================= // +// LifetimeSafetyAnalysis Class Implementation +// ========================================================================= // + +/// An object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + llvm::BumpPtrAllocator Allocator; + OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false}; + LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false}; + LivenessMap::Factory LivenessMapFactory{Allocator, /*canonicalize=*/false}; +}; + +// We need this here for unique_ptr with forward declared class. +LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; + +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) + : AC(AC), Reporter(Reporter), Factory(std::make_unique<LifetimeFactory>()), + FactMgr(std::make_unique<FactManager>()) {} + +void LifetimeSafetyAnalysis::run() { + llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + + const CFG &Cfg = *AC.getCFG(); + DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), + /*ShowColors=*/true)); + + FactGenerator FactGen(*FactMgr, AC); + FactGen.run(); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC)); + + /// TODO(opt): Consider optimizing individual blocks before running the + /// dataflow analysis. + /// 1. Expression Origins: These are assigned once and read at most once, + /// forming simple chains. These chains can be compressed into a single + /// assignment. + /// 2. Block-Local Loans: Origins of expressions are never read by other + /// blocks; only Decls are visible. Therefore, loans in a block that + /// never reach an Origin associated with a Decl can be safely dropped by + /// the analysis. + /// 3. Collapse ExpireFacts belonging to same source location into a single + /// Fact. + LoanPropagation = std::make_unique<LoanPropagationAnalysis>( + Cfg, AC, *FactMgr, Factory->OriginMapFactory, Factory->LoanSetFactory); + LoanPropagation->run(); + + LiveOrigins = std::make_unique<LiveOriginAnalysis>( + Cfg, AC, *FactMgr, Factory->LivenessMapFactory); + LiveOrigins->run(); + DEBUG_WITH_TYPE("LiveOrigins", LiveOrigins->dump(llvm::dbgs(), *this)); + + LifetimeChecker Checker(*LoanPropagation, *LiveOrigins, *FactMgr, AC, + Reporter); + Checker.run(); +} + +LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, + ProgramPoint PP) const { + assert(LoanPropagation && "Analysis has not been run."); + return LoanPropagation->getLoans(OID, PP); +} + +std::optional<OriginID> +LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { + assert(FactMgr && "FactManager not initialized"); + // This assumes the OriginManager's `get` can find an existing origin. + // We might need a `find` method on OriginManager to avoid `getOrCreate` logic + // in a const-query context if that becomes an issue. + return FactMgr->getOriginMgr().get(*D); +} + +std::vector<LoanID> +LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const { + assert(FactMgr && "FactManager not initialized"); + std::vector<LoanID> Result; + for (const Loan &L : FactMgr->getLoanMgr().getLoans()) + if (L.Path.D == VD) + Result.push_back(L.ID); + return Result; +} + +std::vector<std::pair<OriginID, LivenessKind>> +LifetimeSafetyAnalysis::getLiveOriginsAtPoint(ProgramPoint PP) const { + assert(LiveOrigins && "LiveOriginAnalysis has not been run."); + std::vector<std::pair<OriginID, LivenessKind>> Result; + for (auto &[OID, Info] : LiveOrigins->getLiveOrigins(PP)) + Result.push_back({OID, Info.Kind}); + return Result; +} + +llvm::StringMap<ProgramPoint> LifetimeSafetyAnalysis::getTestPoints() const { + assert(FactMgr && "FactManager not initialized"); + llvm::StringMap<ProgramPoint> AnnotationToPointMap; + for (const CFGBlock *Block : *AC.getCFG()) { + for (const Fact *F : FactMgr->getFacts(Block)) { + if (const auto *TPF = F->getAs<TestPointFact>()) { + StringRef PointName = TPF->getAnnotation(); + assert(AnnotationToPointMap.find(PointName) == + AnnotationToPointMap.end() && + "more than one test points with the same name"); + AnnotationToPointMap[PointName] = F; + } + } + } + return AnnotationToPointMap; +} +} // namespace internal + +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) { + internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); + Analysis.run(); +} +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt deleted file mode 100644 index 8584292..0000000 --- a/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -add_clang_library(clangAnalysisLifetimeSafety - Checker.cpp - Facts.cpp - FactsGenerator.cpp - LifetimeAnnotations.cpp - LifetimeSafety.cpp - LiveOrigins.cpp - Loans.cpp - LoanPropagation.cpp - Origins.cpp - ) - diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp deleted file mode 100644 index c443c3a..0000000 --- a/clang/lib/Analysis/LifetimeSafety/Checker.cpp +++ /dev/null @@ -1,130 +0,0 @@ -//===- Checker.cpp - C++ Lifetime Safety Checker ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the LifetimeChecker, which detects use-after-free -// errors by checking if live origins hold loans that have expired. -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" -#include "clang/AST/Expr.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Basic/SourceLocation.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" - -namespace clang::lifetimes::internal { - -static Confidence livenessKindToConfidence(LivenessKind K) { - switch (K) { - case LivenessKind::Must: - return Confidence::Definite; - case LivenessKind::Maybe: - return Confidence::Maybe; - case LivenessKind::Dead: - return Confidence::None; - } - llvm_unreachable("unknown liveness kind"); -} - -namespace { - -/// Struct to store the complete context for a potential lifetime violation. -struct PendingWarning { - SourceLocation ExpiryLoc; // Where the loan expired. - const Expr *UseExpr; // Where the origin holding this loan was used. - Confidence ConfidenceLevel; -}; - -class LifetimeChecker { -private: - llvm::DenseMap<LoanID, PendingWarning> FinalWarningsMap; - const LoanPropagationAnalysis &LoanPropagation; - const LiveOriginsAnalysis &LiveOrigins; - const FactManager &FactMgr; - LifetimeSafetyReporter *Reporter; - -public: - LifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, - const LiveOriginsAnalysis &LiveOrigins, const FactManager &FM, - AnalysisDeclContext &ADC, LifetimeSafetyReporter *Reporter) - : LoanPropagation(LoanPropagation), LiveOrigins(LiveOrigins), FactMgr(FM), - Reporter(Reporter) { - for (const CFGBlock *B : *ADC.getAnalysis<PostOrderCFGView>()) - for (const Fact *F : FactMgr.getFacts(B)) - if (const auto *EF = F->getAs<ExpireFact>()) - checkExpiry(EF); - issuePendingWarnings(); - } - - /// Checks for use-after-free errors when a loan expires. - /// - /// This method examines all live origins at the expiry point and determines - /// if any of them hold the expiring loan. If so, it creates a pending - /// warning with the appropriate confidence level based on the liveness - /// information. The confidence reflects whether the origin is definitely - /// or maybe live at this point. - /// - /// Note: This implementation considers only the confidence of origin - /// liveness. Future enhancements could also consider the confidence of loan - /// propagation (e.g., a loan may only be held on some execution paths). - void checkExpiry(const ExpireFact *EF) { - LoanID ExpiredLoan = EF->getLoanID(); - LivenessMap Origins = LiveOrigins.getLiveOriginsAt(EF); - Confidence CurConfidence = Confidence::None; - const UseFact *BadUse = nullptr; - for (auto &[OID, LiveInfo] : Origins) { - LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); - if (!HeldLoans.contains(ExpiredLoan)) - continue; - // Loan is defaulted. - Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); - if (CurConfidence < NewConfidence) { - CurConfidence = NewConfidence; - BadUse = LiveInfo.CausingUseFact; - } - } - if (!BadUse) - return; - // We have a use-after-free. - Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; - if (LastConf >= CurConfidence) - return; - FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), - /*UseExpr=*/BadUse->getUseExpr(), - /*ConfidenceLevel=*/CurConfidence}; - } - - void issuePendingWarnings() { - if (!Reporter) - return; - for (const auto &[LID, Warning] : FinalWarningsMap) { - const Loan &L = FactMgr.getLoanMgr().getLoan(LID); - const Expr *IssueExpr = L.IssueExpr; - Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, - Warning.ExpiryLoc, Warning.ConfidenceLevel); - } - } -}; -} // namespace - -void runLifetimeChecker(const LoanPropagationAnalysis &LP, - const LiveOriginsAnalysis &LO, - const FactManager &FactMgr, AnalysisDeclContext &ADC, - LifetimeSafetyReporter *Reporter) { - llvm::TimeTraceScope TimeProfile("LifetimeChecker"); - LifetimeChecker Checker(LP, LO, FactMgr, ADC, Reporter); -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h deleted file mode 100644 index 2f7bcb6..0000000 --- a/clang/lib/Analysis/LifetimeSafety/Dataflow.h +++ /dev/null @@ -1,188 +0,0 @@ -//===- Dataflow.h - Generic Dataflow Analysis Framework --------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines a generic, policy-based driver for dataflow analyses. -// It provides a flexible framework that combines the dataflow runner and -// transfer functions, allowing derived classes to implement specific analyses -// by defining their lattice, join, and transfer functions. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" -#include <optional> - -namespace clang::lifetimes::internal { - -enum class Direction { Forward, Backward }; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -/// A generic, policy-based driver for dataflow analyses. It combines -/// the dataflow runner and the transferer logic into a single class hierarchy. -/// -/// The derived class is expected to provide: -/// - A `Lattice` type. -/// - `StringRef getAnalysisName() const` -/// - `Lattice getInitialState();` The initial state of the analysis. -/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. -/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single -/// lifetime-relevant `Fact` transforms the lattice state. Only overloads -/// for facts relevant to the analysis need to be implemented. -/// -/// \tparam Derived The CRTP derived class that implements the specific -/// analysis. -/// \tparam LatticeType The dataflow lattice used by the analysis. -/// \tparam Dir The direction of the analysis (Forward or Backward). -/// TODO: Maybe use the dataflow framework! The framework might need changes -/// to support the current comparison done at block-entry. -template <typename Derived, typename LatticeType, Direction Dir> -class DataflowAnalysis { -public: - using Lattice = LatticeType; - using Base = DataflowAnalysis<Derived, Lattice, Dir>; - -private: - const CFG &Cfg; - AnalysisDeclContext &AC; - - /// The dataflow state before a basic block is processed. - llvm::DenseMap<const CFGBlock *, Lattice> InStates; - /// The dataflow state after a basic block is processed. - llvm::DenseMap<const CFGBlock *, Lattice> OutStates; - /// The dataflow state at a Program Point. - /// In a forward analysis, this is the state after the Fact at that point has - /// been applied, while in a backward analysis, it is the state before. - llvm::DenseMap<ProgramPoint, Lattice> PerPointStates; - - static constexpr bool isForward() { return Dir == Direction::Forward; } - -protected: - FactManager &FactMgr; - - explicit DataflowAnalysis(const CFG &Cfg, AnalysisDeclContext &AC, - FactManager &FactMgr) - : Cfg(Cfg), AC(AC), FactMgr(FactMgr) {} - -public: - void run() { - Derived &D = static_cast<Derived &>(*this); - llvm::TimeTraceScope Time(D.getAnalysisName()); - - using Worklist = - std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist, - BackwardDataflowWorklist>; - Worklist W(Cfg, AC); - - const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); - InStates[Start] = D.getInitialState(); - W.enqueueBlock(Start); - - while (const CFGBlock *B = W.dequeue()) { - Lattice StateIn = *getInState(B); - Lattice StateOut = transferBlock(B, StateIn); - OutStates[B] = StateOut; - for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { - if (!AdjacentB) - continue; - std::optional<Lattice> OldInState = getInState(AdjacentB); - Lattice NewInState = - !OldInState ? StateOut : D.join(*OldInState, StateOut); - // Enqueue the adjacent block if its in-state has changed or if we have - // never seen it. - if (!OldInState || NewInState != *OldInState) { - InStates[AdjacentB] = NewInState; - W.enqueueBlock(AdjacentB); - } - } - } - } - -protected: - Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } - - std::optional<Lattice> getInState(const CFGBlock *B) const { - auto It = InStates.find(B); - if (It == InStates.end()) - return std::nullopt; - return It->second; - } - - Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } - - void dump() const { - const Derived *D = static_cast<const Derived *>(this); - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << D->getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); - getOutState(&B).dump(llvm::dbgs()); - } - -private: - /// Computes the state at one end of a block by applying all its facts - /// sequentially to a given state from the other end. - Lattice transferBlock(const CFGBlock *Block, Lattice State) { - auto Facts = FactMgr.getFacts(Block); - if constexpr (isForward()) { - for (const Fact *F : Facts) { - State = transferFact(State, F); - PerPointStates[F] = State; - } - } else { - for (const Fact *F : llvm::reverse(Facts)) { - // In backward analysis, capture the state before applying the fact. - PerPointStates[F] = State; - State = transferFact(State, F); - } - } - return State; - } - - Lattice transferFact(Lattice In, const Fact *F) { - assert(F); - Derived *D = static_cast<Derived *>(this); - switch (F->getKind()) { - case Fact::Kind::Issue: - return D->transfer(In, *F->getAs<IssueFact>()); - case Fact::Kind::Expire: - return D->transfer(In, *F->getAs<ExpireFact>()); - case Fact::Kind::OriginFlow: - return D->transfer(In, *F->getAs<OriginFlowFact>()); - case Fact::Kind::ReturnOfOrigin: - return D->transfer(In, *F->getAs<ReturnOfOriginFact>()); - case Fact::Kind::Use: - return D->transfer(In, *F->getAs<UseFact>()); - case Fact::Kind::TestPoint: - return D->transfer(In, *F->getAs<TestPointFact>()); - } - llvm_unreachable("Unknown fact kind"); - } - -public: - Lattice transfer(Lattice In, const IssueFact &) { return In; } - Lattice transfer(Lattice In, const ExpireFact &) { return In; } - Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } - Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } - Lattice transfer(Lattice In, const UseFact &) { return In; } - Lattice transfer(Lattice In, const TestPointFact &) { return In; } -}; -} // namespace clang::lifetimes::internal -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp deleted file mode 100644 index 1aea64f..0000000 --- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp +++ /dev/null @@ -1,102 +0,0 @@ -//===- Facts.cpp - Lifetime Analysis Facts Implementation -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/AST/Decl.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" - -namespace clang::lifetimes::internal { - -void Fact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const { - OS << "Fact (Kind: " << static_cast<int>(K) << ")\n"; -} - -void IssueFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &OM) const { - OS << "Issue ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ", ToOrigin: "; - OM.dump(getOriginID(), OS); - OS << ")\n"; -} - -void ExpireFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &) const { - OS << "Expire ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ")\n"; -} - -void OriginFlowFact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const { - OS << "OriginFlow (Dest: "; - OM.dump(getDestOriginID(), OS); - OS << ", Src: "; - OM.dump(getSrcOriginID(), OS); - OS << (getKillDest() ? "" : ", Merge"); - OS << ")\n"; -} - -void ReturnOfOriginFact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const { - OS << "ReturnOfOrigin ("; - OM.dump(getReturnedOriginID(), OS); - OS << ")\n"; -} - -void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const { - OS << "Use ("; - OM.dump(getUsedOrigin(OM), OS); - OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; -} - -void TestPointFact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const { - OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; -} - -llvm::StringMap<ProgramPoint> FactManager::getTestPoints() const { - llvm::StringMap<ProgramPoint> AnnotationToPointMap; - for (const CFGBlock *Block : BlockToFactsMap.keys()) { - for (const Fact *F : getFacts(Block)) { - if (const auto *TPF = F->getAs<TestPointFact>()) { - StringRef PointName = TPF->getAnnotation(); - assert(AnnotationToPointMap.find(PointName) == - AnnotationToPointMap.end() && - "more than one test points with the same name"); - AnnotationToPointMap[PointName] = F; - } - } - } - return AnnotationToPointMap; -} - -void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << " Lifetime Analysis Facts:\n"; - llvm::dbgs() << "==========================================\n"; - if (const Decl *D = AC.getDecl()) - if (const auto *ND = dyn_cast<NamedDecl>(D)) - llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; - // Print blocks in the order as they appear in code for a stable ordering. - for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) { - llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) { - for (const Fact *F : It->second) { - llvm::dbgs() << " "; - F->dump(llvm::dbgs(), LoanMgr, OriginMgr); - } - } - llvm::dbgs() << " End of Block\n"; - } -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp deleted file mode 100644 index 485308f..0000000 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ /dev/null @@ -1,348 +0,0 @@ -//===- FactsGenerator.cpp - Lifetime Facts Generation -----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" -#include "llvm/Support/TimeProfiler.h" - -namespace clang::lifetimes::internal { - -static bool isGslPointerType(QualType QT) { - if (const auto *RD = QT->getAsCXXRecordDecl()) { - // We need to check the template definition for specializations. - if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) - return CTSD->getSpecializedTemplate() - ->getTemplatedDecl() - ->hasAttr<PointerAttr>(); - return RD->hasAttr<PointerAttr>(); - } - return false; -} - -static bool isPointerType(QualType QT) { - return QT->isPointerOrReferenceType() || isGslPointerType(QT); -} -// Check if a type has an origin. -static bool hasOrigin(const Expr *E) { - return E->isGLValue() || isPointerType(E->getType()); -} - -static bool hasOrigin(const VarDecl *VD) { - return isPointerType(VD->getType()); -} - -/// Creates a loan for the storage path of a given declaration reference. -/// This function should be called whenever a DeclRefExpr represents a borrow. -/// \param DRE The declaration reference expression that initiates the borrow. -/// \return The new Loan on success, nullptr otherwise. -static const Loan *createLoan(FactManager &FactMgr, const DeclRefExpr *DRE) { - if (const auto *VD = dyn_cast<ValueDecl>(DRE->getDecl())) { - AccessPath Path(VD); - // The loan is created at the location of the DeclRefExpr. - return &FactMgr.getLoanMgr().addLoan(Path, DRE); - } - return nullptr; -} - -void FactsGenerator::run() { - llvm::TimeTraceScope TimeProfile("FactGenerator"); - // Iterate through the CFG blocks in reverse post-order to ensure that - // initializations and destructions are processed in the correct sequence. - for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) { - CurrentBlockFacts.clear(); - for (unsigned I = 0; I < Block->size(); ++I) { - const CFGElement &Element = Block->Elements[I]; - if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) - Visit(CS->getStmt()); - else if (std::optional<CFGAutomaticObjDtor> DtorOpt = - Element.getAs<CFGAutomaticObjDtor>()) - handleDestructor(*DtorOpt); - } - FactMgr.addBlockFacts(Block, CurrentBlockFacts); - } -} - -void FactsGenerator::VisitDeclStmt(const DeclStmt *DS) { - for (const Decl *D : DS->decls()) - if (const auto *VD = dyn_cast<VarDecl>(D)) - if (hasOrigin(VD)) - if (const Expr *InitExpr = VD->getInit()) - killAndFlowOrigin(*VD, *InitExpr); -} - -void FactsGenerator::VisitDeclRefExpr(const DeclRefExpr *DRE) { - handleUse(DRE); - // For non-pointer/non-view types, a reference to the variable's storage - // is a borrow. We create a loan for it. - // For pointer/view types, we stick to the existing model for now and do - // not create an extra origin for the l-value expression itself. - - // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is - // not sufficient to model the different levels of indirection. The current - // single-origin model cannot distinguish between a loan to the variable's - // storage and a loan to what it points to. A multi-origin model would be - // required for this. - if (!isPointerType(DRE->getType())) { - if (const Loan *L = createLoan(FactMgr, DRE)) { - OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); - CurrentBlockFacts.push_back( - FactMgr.createFact<IssueFact>(L->ID, ExprOID)); - } - } -} - -void FactsGenerator::VisitCXXConstructExpr(const CXXConstructExpr *CCE) { - if (isGslPointerType(CCE->getType())) { - handleGSLPointerConstruction(CCE); - return; - } -} - -void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { - // Specifically for conversion operators, - // like `std::string_view p = std::string{};` - if (isGslPointerType(MCE->getType()) && - isa<CXXConversionDecl>(MCE->getCalleeDecl())) { - // The argument is the implicit object itself. - handleFunctionCall(MCE, MCE->getMethodDecl(), - {MCE->getImplicitObjectArgument()}, - /*IsGslConstruction=*/true); - } - if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { - // Construct the argument list, with the implicit 'this' object as the - // first argument. - llvm::SmallVector<const Expr *, 4> Args; - Args.push_back(MCE->getImplicitObjectArgument()); - Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); - - handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); - } -} - -void FactsGenerator::VisitCallExpr(const CallExpr *CE) { - handleFunctionCall(CE, CE->getDirectCallee(), - {CE->getArgs(), CE->getNumArgs()}); -} - -void FactsGenerator::VisitCXXNullPtrLiteralExpr( - const CXXNullPtrLiteralExpr *N) { - /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized - /// pointers can use the same type of loan. - FactMgr.getOriginMgr().getOrCreate(*N); -} - -void FactsGenerator::VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { - if (!hasOrigin(ICE)) - return; - // An ImplicitCastExpr node itself gets an origin, which flows from the - // origin of its sub-expression (after stripping its own parens/casts). - killAndFlowOrigin(*ICE, *ICE->getSubExpr()); -} - -void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) { - if (UO->getOpcode() == UO_AddrOf) { - const Expr *SubExpr = UO->getSubExpr(); - // Taking address of a pointer-type expression is not yet supported and - // will be supported in multi-origin model. - if (isPointerType(SubExpr->getType())) - return; - // The origin of an address-of expression (e.g., &x) is the origin of - // its sub-expression (x). This fact will cause the dataflow analysis - // to propagate any loans held by the sub-expression's origin to the - // origin of this UnaryOperator expression. - killAndFlowOrigin(*UO, *SubExpr); - } -} - -void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) { - if (const Expr *RetExpr = RS->getRetValue()) { - if (hasOrigin(RetExpr)) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); - CurrentBlockFacts.push_back(FactMgr.createFact<ReturnOfOriginFact>(OID)); - } - } -} - -void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { - if (BO->isAssignmentOp()) - handleAssignment(BO->getLHS(), BO->getRHS()); -} - -void FactsGenerator::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { - // Assignment operators have special "kill-then-propagate" semantics - // and are handled separately. - if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { - handleAssignment(OCE->getArg(0), OCE->getArg(1)); - return; - } - handleFunctionCall(OCE, OCE->getDirectCallee(), - {OCE->getArgs(), OCE->getNumArgs()}, - /*IsGslConstruction=*/false); -} - -void FactsGenerator::VisitCXXFunctionalCastExpr( - const CXXFunctionalCastExpr *FCE) { - // Check if this is a test point marker. If so, we are done with this - // expression. - if (handleTestPoint(FCE)) - return; - if (isGslPointerType(FCE->getType())) - killAndFlowOrigin(*FCE, *FCE->getSubExpr()); -} - -void FactsGenerator::VisitInitListExpr(const InitListExpr *ILE) { - if (!hasOrigin(ILE)) - return; - // For list initialization with a single element, like `View{...}`, the - // origin of the list itself is the origin of its single element. - if (ILE->getNumInits() == 1) - killAndFlowOrigin(*ILE, *ILE->getInit(0)); -} - -void FactsGenerator::VisitMaterializeTemporaryExpr( - const MaterializeTemporaryExpr *MTE) { - if (!hasOrigin(MTE)) - return; - // A temporary object's origin is the same as the origin of the - // expression that initializes it. - killAndFlowOrigin(*MTE, *MTE->getSubExpr()); -} - -void FactsGenerator::handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { - /// TODO: Also handle trivial destructors (e.g., for `int` - /// variables) which will never have a CFGAutomaticObjDtor node. - /// TODO: Handle loans to temporaries. - /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the - /// lifetime ends. - const VarDecl *DestructedVD = DtorOpt.getVarDecl(); - if (!DestructedVD) - return; - // Iterate through all loans to see if any expire. - /// TODO(opt): Do better than a linear search to find loans associated with - /// 'DestructedVD'. - for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { - const AccessPath &LoanPath = L.Path; - // Check if the loan is for a stack variable and if that variable - // is the one being destructed. - if (LoanPath.D == DestructedVD) - CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>( - L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); - } -} - -void FactsGenerator::handleGSLPointerConstruction(const CXXConstructExpr *CCE) { - assert(isGslPointerType(CCE->getType())); - if (CCE->getNumArgs() != 1) - return; - if (hasOrigin(CCE->getArg(0))) - killAndFlowOrigin(*CCE, *CCE->getArg(0)); - else - // This could be a new borrow. - handleFunctionCall(CCE, CCE->getConstructor(), - {CCE->getArgs(), CCE->getNumArgs()}, - /*IsGslConstruction=*/true); -} - -/// Checks if a call-like expression creates a borrow by passing a value to a -/// reference parameter, creating an IssueFact if it does. -/// \param IsGslConstruction True if this is a GSL construction where all -/// argument origins should flow to the returned origin. -void FactsGenerator::handleFunctionCall(const Expr *Call, - const FunctionDecl *FD, - ArrayRef<const Expr *> Args, - bool IsGslConstruction) { - // Ignore functions returning values with no origin. - if (!FD || !hasOrigin(Call)) - return; - auto IsArgLifetimeBound = [FD](unsigned I) -> bool { - const ParmVarDecl *PVD = nullptr; - if (const auto *Method = dyn_cast<CXXMethodDecl>(FD); - Method && Method->isInstance()) { - if (I == 0) - // For the 'this' argument, the attribute is on the method itself. - return implicitObjectParamIsLifetimeBound(Method); - if ((I - 1) < Method->getNumParams()) - // For explicit arguments, find the corresponding parameter - // declaration. - PVD = Method->getParamDecl(I - 1); - } else if (I < FD->getNumParams()) - // For free functions or static methods. - PVD = FD->getParamDecl(I); - return PVD ? PVD->hasAttr<clang::LifetimeBoundAttr>() : false; - }; - if (Args.empty()) - return; - bool killedSrc = false; - for (unsigned I = 0; I < Args.size(); ++I) - if (IsGslConstruction || IsArgLifetimeBound(I)) { - if (!killedSrc) { - killedSrc = true; - killAndFlowOrigin(*Call, *Args[I]); - } else - flowOrigin(*Call, *Args[I]); - } -} - -/// Checks if the expression is a `void("__lifetime_test_point_...")` cast. -/// If so, creates a `TestPointFact` and returns true. -bool FactsGenerator::handleTestPoint(const CXXFunctionalCastExpr *FCE) { - if (!FCE->getType()->isVoidType()) - return false; - - const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); - if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) { - llvm::StringRef LiteralValue = SL->getString(); - const std::string Prefix = "__lifetime_test_point_"; - - if (LiteralValue.starts_with(Prefix)) { - StringRef Annotation = LiteralValue.drop_front(Prefix.length()); - CurrentBlockFacts.push_back( - FactMgr.createFact<TestPointFact>(Annotation)); - return true; - } - } - return false; -} - -void FactsGenerator::handleAssignment(const Expr *LHSExpr, - const Expr *RHSExpr) { - if (!hasOrigin(LHSExpr)) - return; - // Find the underlying variable declaration for the left-hand side. - if (const auto *DRE_LHS = - dyn_cast<DeclRefExpr>(LHSExpr->IgnoreParenImpCasts())) { - markUseAsWrite(DRE_LHS); - if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl())) { - // Kill the old loans of the destination origin and flow the new loans - // from the source origin. - killAndFlowOrigin(*VD_LHS, *RHSExpr); - } - } -} - -// A DeclRefExpr will be treated as a use of the referenced decl. It will be -// checked for use-after-free unless it is later marked as being written to -// (e.g. on the left-hand side of an assignment). -void FactsGenerator::handleUse(const DeclRefExpr *DRE) { - if (isPointerType(DRE->getType())) { - UseFact *UF = FactMgr.createFact<UseFact>(DRE); - CurrentBlockFacts.push_back(UF); - assert(!UseFacts.contains(DRE)); - UseFacts[DRE] = UF; - } -} - -void FactsGenerator::markUseAsWrite(const DeclRefExpr *DRE) { - if (!isPointerType(DRE->getType())) - return; - assert(UseFacts.contains(DRE)); - UseFacts[DRE]->markAsWritten(); -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp deleted file mode 100644 index 00c7ed90..0000000 --- a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp +++ /dev/null @@ -1,77 +0,0 @@ -//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the main LifetimeSafetyAnalysis class, which coordinates -// the various components (fact generation, loan propagation, live origins -// analysis, and checking) to detect lifetime safety violations in C++ code. -// -//===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" -#include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" -#include <memory> - -namespace clang::lifetimes { -namespace internal { - -LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) - : AC(AC), Reporter(Reporter) {} - -void LifetimeSafetyAnalysis::run() { - llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); - - const CFG &Cfg = *AC.getCFG(); - DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), - /*ShowColors=*/true)); - - FactsGenerator FactGen(FactMgr, AC); - FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); - - /// TODO(opt): Consider optimizing individual blocks before running the - /// dataflow analysis. - /// 1. Expression Origins: These are assigned once and read at most once, - /// forming simple chains. These chains can be compressed into a single - /// assignment. - /// 2. Block-Local Loans: Origins of expressions are never read by other - /// blocks; only Decls are visible. Therefore, loans in a block that - /// never reach an Origin associated with a Decl can be safely dropped by - /// the analysis. - /// 3. Collapse ExpireFacts belonging to same source location into a single - /// Fact. - LoanPropagation = std::make_unique<LoanPropagationAnalysis>( - Cfg, AC, FactMgr, Factory.OriginMapFactory, Factory.LoanSetFactory); - - LiveOrigins = std::make_unique<LiveOriginsAnalysis>( - Cfg, AC, FactMgr, Factory.LivenessMapFactory); - DEBUG_WITH_TYPE("LiveOrigins", - LiveOrigins->dump(llvm::dbgs(), FactMgr.getTestPoints())); - - runLifetimeChecker(*LoanPropagation, *LiveOrigins, FactMgr, AC, Reporter); -} -} // namespace internal - -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) { - internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); - Analysis.run(); -} -} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp deleted file mode 100644 index cddb3f3c..0000000 --- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp +++ /dev/null @@ -1,180 +0,0 @@ -//===- LiveOrigins.cpp - Live Origins Analysis -----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "Dataflow.h" -#include "llvm/Support/ErrorHandling.h" - -namespace clang::lifetimes::internal { -namespace { - -/// The dataflow lattice for origin liveness analysis. -/// It tracks which origins are live, why they're live (which UseFact), -/// and the confidence level of that liveness. -struct Lattice { - LivenessMap LiveOrigins; - - Lattice() : LiveOrigins(nullptr) {}; - - explicit Lattice(LivenessMap L) : LiveOrigins(L) {} - - bool operator==(const Lattice &Other) const { - return LiveOrigins == Other.LiveOrigins; - } - - bool operator!=(const Lattice &Other) const { return !(*this == Other); } - - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { - if (LiveOrigins.isEmpty()) - OS << " <empty>\n"; - for (const auto &Entry : LiveOrigins) { - OriginID OID = Entry.first; - const LivenessInfo &Info = Entry.second; - OS << " "; - OM.dump(OID, OS); - OS << " is "; - switch (Info.Kind) { - case LivenessKind::Must: - OS << "definitely"; - break; - case LivenessKind::Maybe: - OS << "maybe"; - break; - case LivenessKind::Dead: - llvm_unreachable("liveness kind of live origins should not be dead."); - } - OS << " live at this point\n"; - } - } -}; - -/// The analysis that tracks which origins are live, with granular information -/// about the causing use fact and confidence level. This is a backward -/// analysis. -class AnalysisImpl - : public DataflowAnalysis<AnalysisImpl, Lattice, Direction::Backward> { - -public: - AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LivenessMap::Factory &SF) - : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} - using DataflowAnalysis<AnalysisImpl, Lattice, Direction::Backward>::transfer; - - StringRef getAnalysisName() const { return "LiveOrigins"; } - - Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } - - /// Merges two lattices by combining liveness information. - /// When the same origin has different confidence levels, we take the lower - /// one. - Lattice join(Lattice L1, Lattice L2) const { - LivenessMap Merged = L1.LiveOrigins; - // Take the earliest UseFact to make the join hermetic and commutative. - auto CombineUseFact = [](const UseFact &A, - const UseFact &B) -> const UseFact * { - return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A - : &B; - }; - auto CombineLivenessKind = [](LivenessKind K1, - LivenessKind K2) -> LivenessKind { - assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); - assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); - // Only return "Must" if both paths are "Must", otherwise Maybe. - if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) - return LivenessKind::Must; - return LivenessKind::Maybe; - }; - auto CombineLivenessInfo = [&](const LivenessInfo *L1, - const LivenessInfo *L2) -> LivenessInfo { - assert((L1 || L2) && "unexpectedly merging 2 empty sets"); - if (!L1) - return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); - if (!L2) - return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); - return LivenessInfo( - CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), - CombineLivenessKind(L1->Kind, L2->Kind)); - }; - return Lattice(utils::join( - L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, - // A symmetric join is required here. If an origin is live on one - // branch but not the other, its confidence must be demoted to `Maybe`. - utils::JoinKind::Symmetric)); - } - - /// A read operation makes the origin live with definite confidence, as it - /// dominates this program point. A write operation kills the liveness of - /// the origin since it overwrites the value. - Lattice transfer(Lattice In, const UseFact &UF) { - OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); - // Write kills liveness. - if (UF.isWritten()) - return Lattice(Factory.remove(In.LiveOrigins, OID)); - // Read makes origin live with definite confidence (dominates this point). - return Lattice(Factory.add(In.LiveOrigins, OID, - LivenessInfo(&UF, LivenessKind::Must))); - } - - /// Issuing a new loan to an origin kills its liveness. - Lattice transfer(Lattice In, const IssueFact &IF) { - return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); - } - - /// An OriginFlow kills the liveness of the destination origin if `KillDest` - /// is true. Otherwise, it propagates liveness from destination to source. - Lattice transfer(Lattice In, const OriginFlowFact &OF) { - if (!OF.getKillDest()) - return In; - return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); - } - - LivenessMap getLiveOriginsAt(ProgramPoint P) const { - return getState(P).LiveOrigins; - } - - // Dump liveness values on all test points in the program. - void dump(llvm::raw_ostream &OS, - llvm::StringMap<ProgramPoint> TestPoints) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - for (const auto &Entry : TestPoints) { - OS << "TestPoint: " << Entry.getKey() << "\n"; - getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); - } - } - -private: - FactManager &FactMgr; - LivenessMap::Factory &Factory; -}; -} // namespace - -// PImpl wrapper implementation -class LiveOriginsAnalysis::Impl : public AnalysisImpl { - using AnalysisImpl::AnalysisImpl; -}; - -LiveOriginsAnalysis::LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, - FactManager &F, - LivenessMap::Factory &SF) - : PImpl(std::make_unique<Impl>(C, AC, F, SF)) { - PImpl->run(); -} - -LiveOriginsAnalysis::~LiveOriginsAnalysis() = default; - -LivenessMap LiveOriginsAnalysis::getLiveOriginsAt(ProgramPoint P) const { - return PImpl->getLiveOriginsAt(P); -} - -void LiveOriginsAnalysis::dump(llvm::raw_ostream &OS, - llvm::StringMap<ProgramPoint> TestPoints) const { - PImpl->dump(OS, TestPoints); -} -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp deleted file mode 100644 index 387097e..0000000 --- a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===- LoanPropagation.cpp - Loan Propagation Analysis ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" -#include "Dataflow.h" -#include <memory> - -namespace clang::lifetimes::internal { -namespace { -/// Represents the dataflow lattice for loan propagation. -/// -/// This lattice tracks which loans each origin may hold at a given program -/// point.The lattice has a finite height: An origin's loan set is bounded by -/// the total number of loans in the function. -/// TODO(opt): To reduce the lattice size, propagate origins of declarations, -/// not expressions, because expressions are not visible across blocks. -struct Lattice { - /// The map from an origin to the set of loans it contains. - OriginLoanMap Origins = OriginLoanMap(nullptr); - - explicit Lattice(const OriginLoanMap &S) : Origins(S) {} - Lattice() = default; - - bool operator==(const Lattice &Other) const { - return Origins == Other.Origins; - } - bool operator!=(const Lattice &Other) const { return !(*this == Other); } - - void dump(llvm::raw_ostream &OS) const { - OS << "LoanPropagationLattice State:\n"; - if (Origins.isEmpty()) - OS << " <empty>\n"; - for (const auto &Entry : Origins) { - if (Entry.second.isEmpty()) - OS << " Origin " << Entry.first << " contains no loans\n"; - for (const LoanID &LID : Entry.second) - OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; - } - } -}; - -class AnalysisImpl - : public DataflowAnalysis<AnalysisImpl, Lattice, Direction::Forward> { -public: - AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory) - : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), - LoanSetFactory(LoanSetFactory) {} - - using Base::transfer; - - StringRef getAnalysisName() const { return "LoanPropagation"; } - - Lattice getInitialState() { return Lattice{}; } - - /// Merges two lattices by taking the union of loans for each origin. - // TODO(opt): Keep the state small by removing origins which become dead. - Lattice join(Lattice A, Lattice B) { - OriginLoanMap JoinedOrigins = utils::join( - A.Origins, B.Origins, OriginLoanMapFactory, - [&](const LoanSet *S1, const LoanSet *S2) { - assert((S1 || S2) && "unexpectedly merging 2 empty sets"); - if (!S1) - return *S2; - if (!S2) - return *S1; - return utils::join(*S1, *S2, LoanSetFactory); - }, - // Asymmetric join is a performance win. For origins present only on one - // branch, the loan set can be carried over as-is. - utils::JoinKind::Asymmetric); - return Lattice(JoinedOrigins); - } - - /// A new loan is issued to the origin. Old loans are erased. - Lattice transfer(Lattice In, const IssueFact &F) { - OriginID OID = F.getOriginID(); - LoanID LID = F.getLoanID(); - return Lattice(OriginLoanMapFactory.add( - In.Origins, OID, - LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); - } - - /// A flow from source to destination. If `KillDest` is true, this replaces - /// the destination's loans with the source's. Otherwise, the source's loans - /// are merged into the destination's. - Lattice transfer(Lattice In, const OriginFlowFact &F) { - OriginID DestOID = F.getDestOriginID(); - OriginID SrcOID = F.getSrcOriginID(); - - LoanSet DestLoans = - F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); - LoanSet SrcLoans = getLoans(In, SrcOID); - LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); - - return Lattice(OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); - } - - LoanSet getLoans(OriginID OID, ProgramPoint P) const { - return getLoans(getState(P), OID); - } - -private: - LoanSet getLoans(Lattice L, OriginID OID) const { - if (auto *Loans = L.Origins.lookup(OID)) - return *Loans; - return LoanSetFactory.getEmptySet(); - } - - OriginLoanMap::Factory &OriginLoanMapFactory; - LoanSet::Factory &LoanSetFactory; -}; -} // namespace - -class LoanPropagationAnalysis::Impl final : public AnalysisImpl { - using AnalysisImpl::AnalysisImpl; -}; - -LoanPropagationAnalysis::LoanPropagationAnalysis( - const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory) - : PImpl(std::make_unique<Impl>(C, AC, F, OriginLoanMapFactory, - LoanSetFactory)) { - PImpl->run(); -} - -LoanPropagationAnalysis::~LoanPropagationAnalysis() = default; - -LoanSet LoanPropagationAnalysis::getLoans(OriginID OID, ProgramPoint P) const { - return PImpl->getLoans(OID, P); -} -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Loans.cpp b/clang/lib/Analysis/LifetimeSafety/Loans.cpp deleted file mode 100644 index 2c85a3c..0000000 --- a/clang/lib/Analysis/LifetimeSafety/Loans.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===- Loans.cpp - Loan Implementation --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" - -namespace clang::lifetimes::internal { - -void Loan::dump(llvm::raw_ostream &OS) const { - OS << ID << " (Path: "; - OS << Path.D->getNameAsString() << ")"; -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp deleted file mode 100644 index ea51a75..0000000 --- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp +++ /dev/null @@ -1,89 +0,0 @@ -//===- Origins.cpp - Origin Implementation -----------------------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" - -namespace clang::lifetimes::internal { - -void OriginManager::dump(OriginID OID, llvm::raw_ostream &OS) const { - OS << OID << " ("; - Origin O = getOrigin(OID); - if (const ValueDecl *VD = O.getDecl()) - OS << "Decl: " << VD->getNameAsString(); - else if (const Expr *E = O.getExpr()) - OS << "Expr: " << E->getStmtClassName(); - else - OS << "Unknown"; - OS << ")"; -} - -Origin &OriginManager::addOrigin(OriginID ID, const clang::ValueDecl &D) { - AllOrigins.emplace_back(ID, &D); - return AllOrigins.back(); -} - -Origin &OriginManager::addOrigin(OriginID ID, const clang::Expr &E) { - AllOrigins.emplace_back(ID, &E); - return AllOrigins.back(); -} - -// TODO: Mark this method as const once we remove the call to getOrCreate. -OriginID OriginManager::get(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - // If the expression itself has no specific origin, and it's a reference - // to a declaration, its origin is that of the declaration it refers to. - // For pointer types, where we don't pre-emptively create an origin for the - // DeclRefExpr itself. - if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) - return get(*DRE->getDecl()); - // TODO: This should be an assert(It != ExprToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - return getOrCreate(E); -} - -OriginID OriginManager::get(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - // TODO: This should be an assert(It != DeclToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == DeclToOriginID.end()) - return getOrCreate(D); - - return It->second; -} - -OriginID OriginManager::getOrCreate(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - - OriginID NewID = getNextOriginID(); - addOrigin(NewID, E); - ExprToOriginID[&E] = NewID; - return NewID; -} - -const Origin &OriginManager::getOrigin(OriginID ID) const { - assert(ID.Value < AllOrigins.size()); - return AllOrigins[ID.Value]; -} - -OriginID OriginManager::getOrCreate(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - if (It != DeclToOriginID.end()) - return It->second; - OriginID NewID = getNextOriginID(); - addOrigin(NewID, D); - DeclToOriginID[&D] = NewID; - return NewID; -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index d9aafc6..b7e8bad 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -704,6 +704,10 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::Emscripten: return std::make_unique<EmscriptenTargetInfo<WebAssembly32TargetInfo>>( Triple, Opts); + + case llvm::Triple::Linux: + return std::make_unique<WALITargetInfo<WebAssembly32TargetInfo>>(Triple, + Opts); case llvm::Triple::UnknownOS: return std::make_unique<WebAssemblyOSTargetInfo<WebAssembly32TargetInfo>>( Triple, Opts); diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 6c49a09..bd6ffcf 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -948,6 +948,23 @@ public: using WebAssemblyOSTargetInfo<Target>::WebAssemblyOSTargetInfo; }; +// WALI target +template <typename Target> +class LLVM_LIBRARY_VISIBILITY WALITargetInfo + : public WebAssemblyOSTargetInfo<Target> { + void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, + MacroBuilder &Builder) const final { + WebAssemblyOSTargetInfo<Target>::getOSDefines(Opts, Triple, Builder); + // Linux defines; list based off of gcc output + DefineStd(Builder, "unix", Opts); + DefineStd(Builder, "linux", Opts); + Builder.defineMacro("__wali__"); + } + +public: + using WebAssemblyOSTargetInfo<Target>::WebAssemblyOSTargetInfo; +}; + // Emscripten target template <typename Target> class LLVM_LIBRARY_VISIBILITY EmscriptenTargetInfo diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index eba7422..4de6ce6 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -88,12 +88,23 @@ public: LongDoubleWidth = LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; - // size_t being unsigned long for both wasm32 and wasm64 makes mangled names - // more consistent between the two. - SizeType = UnsignedLong; - PtrDiffType = SignedLong; - IntPtrType = SignedLong; HasUnalignedAccess = true; + if (T.isWALI()) { + // The WALI ABI is documented here: + // https://doc.rust-lang.org/rustc/platform-support/wasm32-wali-linux.html + // Currently, this ABI only applies to wasm32 targets and notably requires + // 64-bit longs + LongAlign = LongWidth = 64; + SizeType = UnsignedInt; + PtrDiffType = SignedInt; + IntPtrType = SignedInt; + } else { + // size_t being unsigned long for both wasm32 and wasm64 makes mangled + // names more consistent between the two. + SizeType = UnsignedLong; + PtrDiffType = SignedLong; + IntPtrType = SignedLong; + } } StringRef getABI() const override; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 25afe8b..a6f10e6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -319,12 +319,6 @@ public: return cir::ConstantOp::create(*this, loc, cir::IntAttr::get(sInt64Ty, c)); } - // Creates constant nullptr for pointer type ty. - cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc) { - assert(!cir::MissingFeatures::targetCodeGenInfoGetNullPointer()); - return cir::ConstantOp::create(*this, loc, getConstPtrAttr(ty, 0)); - } - mlir::Value createNeg(mlir::Value value) { if (auto intTy = mlir::dyn_cast<cir::IntType>(value.getType())) { diff --git a/clang/lib/CIR/CodeGen/CIRGenException.cpp b/clang/lib/CIR/CodeGen/CIRGenException.cpp index 6453843..f9ff37b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenException.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenException.cpp @@ -64,3 +64,11 @@ void CIRGenFunction::emitAnyExprToExn(const Expr *e, Address addr) { // Deactivate the cleanup block. assert(!cir::MissingFeatures::ehCleanupScope()); } + +mlir::LogicalResult CIRGenFunction::emitCXXTryStmt(const CXXTryStmt &s) { + if (s.getTryBlock()->body_empty()) + return mlir::LogicalResult::success(); + + cgm.errorNYI("exitCXXTryStmt: CXXTryStmt with non-empty body"); + return mlir::LogicalResult::success(); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 7a606ee..d71de2f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1297,6 +1297,8 @@ public: void emitCXXThrowExpr(const CXXThrowExpr *e); + mlir::LogicalResult emitCXXTryStmt(const clang::CXXTryStmt &s); + void emitCtorPrologue(const clang::CXXConstructorDecl *ctor, clang::CXXCtorType ctorType, FunctionArgList &args); diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp index 4cf2237..5ba6bcb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp @@ -73,7 +73,7 @@ CIRGenFunction::getOpenACCDataOperandInfo(const Expr *e) { // Array sections are special, and we have to treat them that way. if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr->IgnoreParenImpCasts())) - origType = ArraySectionExpr::getBaseOriginalType(section); + origType = section->getElementType(); mlir::Location exprLoc = cgm.getLoc(curVarExpr->getBeginLoc()); llvm::SmallVector<mlir::Value> bounds; @@ -84,16 +84,10 @@ CIRGenFunction::getOpenACCDataOperandInfo(const Expr *e) { e->printPretty(os, nullptr, getContext().getPrintingPolicy()); auto addBoundType = [&](const Expr *e) { - if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) { - QualType baseTy = ArraySectionExpr::getBaseOriginalType( - section->getBase()->IgnoreParenImpCasts()); - if (auto *at = getContext().getAsArrayType(baseTy)) - boundTypes.push_back(at->getElementType()); - else - boundTypes.push_back(baseTy->getPointeeType()); - } else { + if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) + boundTypes.push_back(section->getElementType()); + else boundTypes.push_back(curVarExpr->getType()); - } }; addBoundType(curVarExpr); @@ -113,8 +107,7 @@ CIRGenFunction::getOpenACCDataOperandInfo(const Expr *e) { if (const Expr *len = section->getLength()) { extent = emitOpenACCIntExpr(len); } else { - QualType baseTy = ArraySectionExpr::getBaseOriginalType( - section->getBase()->IgnoreParenImpCasts()); + QualType baseTy = section->getBaseType(); // We know this is the case as implicit lengths are only allowed for // array types with a constant size, or a dependent size. AND since // we are codegen we know we're not dependent. diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index 0b8f8bf..cfd48a2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -154,6 +154,8 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, return emitWhileStmt(cast<WhileStmt>(*s)); case Stmt::DoStmtClass: return emitDoStmt(cast<DoStmt>(*s)); + case Stmt::CXXTryStmtClass: + return emitCXXTryStmt(cast<CXXTryStmt>(*s)); case Stmt::CXXForRangeStmtClass: return emitCXXForRangeStmt(cast<CXXForRangeStmt>(*s), attr); case Stmt::OpenACCComputeConstructClass: @@ -199,7 +201,6 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, case Stmt::CoroutineBodyStmtClass: return emitCoroutineBody(cast<CoroutineBodyStmt>(*s)); case Stmt::CoreturnStmtClass: - case Stmt::CXXTryStmtClass: case Stmt::IndirectGotoStmtClass: case Stmt::OMPParallelDirectiveClass: case Stmt::OMPTaskwaitDirectiveClass: diff --git a/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt index df7a1a3..3fc5b06 100644 --- a/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt @@ -4,6 +4,7 @@ add_clang_library(MLIRCIRTransforms FlattenCFG.cpp HoistAllocas.cpp LoweringPrepare.cpp + LoweringPrepareItaniumCXXABI.cpp GotoSolver.cpp DEPENDS diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 706e54f..dbff0b9 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "LoweringPrepareCXXABI.h" #include "PassDetail.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/Module.h" @@ -62,6 +63,7 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { void lowerComplexMulOp(cir::ComplexMulOp op); void lowerUnaryOp(cir::UnaryOp op); void lowerGlobalOp(cir::GlobalOp op); + void lowerDynamicCastOp(cir::DynamicCastOp op); void lowerArrayDtor(cir::ArrayDtor op); void lowerArrayCtor(cir::ArrayCtor op); @@ -91,6 +93,9 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { clang::ASTContext *astCtx; + // Helper for lowering C++ ABI specific operations. + std::shared_ptr<cir::LoweringPrepareCXXABI> cxxABI; + /// Tracks current module. mlir::ModuleOp mlirModule; @@ -101,7 +106,24 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { /// List of ctors and their priorities to be called before main() llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalCtorList; - void setASTContext(clang::ASTContext *c) { astCtx = c; } + void setASTContext(clang::ASTContext *c) { + astCtx = c; + switch (c->getCXXABIKind()) { + case clang::TargetCXXABI::GenericItanium: + // We'll need X86-specific support for handling vaargs lowering, but for + // now the Itanium ABI will work. + assert(!cir::MissingFeatures::loweringPrepareX86CXXABI()); + cxxABI.reset(cir::LoweringPrepareCXXABI::createItaniumABI()); + break; + case clang::TargetCXXABI::GenericAArch64: + case clang::TargetCXXABI::AppleARM64: + assert(!cir::MissingFeatures::loweringPrepareAArch64XXABI()); + cxxABI.reset(cir::LoweringPrepareCXXABI::createItaniumABI()); + break; + default: + llvm_unreachable("NYI"); + } + } }; } // namespace @@ -850,6 +872,17 @@ void LoweringPreparePass::buildCXXGlobalInitFunc() { cir::ReturnOp::create(builder, f.getLoc()); } +void LoweringPreparePass::lowerDynamicCastOp(DynamicCastOp op) { + CIRBaseBuilderTy builder(getContext()); + builder.setInsertionPointAfter(op); + + assert(astCtx && "AST context is not available during lowering prepare"); + auto loweredValue = cxxABI->lowerDynamicCast(builder, *astCtx, op); + + op.replaceAllUsesWith(loweredValue); + op.erase(); +} + static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, clang::ASTContext *astCtx, mlir::Operation *op, mlir::Type eltTy, @@ -954,6 +987,8 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) { lowerComplexMulOp(complexMul); else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op)) lowerGlobalOp(glob); + else if (auto dynamicCast = mlir::dyn_cast<cir::DynamicCastOp>(op)) + lowerDynamicCastOp(dynamicCast); else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op)) lowerUnaryOp(unary); } @@ -967,8 +1002,8 @@ void LoweringPreparePass::runOnOperation() { op->walk([&](mlir::Operation *op) { if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp, - cir::ComplexMulOp, cir::ComplexDivOp, cir::GlobalOp, - cir::UnaryOp>(op)) + cir::ComplexMulOp, cir::ComplexDivOp, cir::DynamicCastOp, + cir::GlobalOp, cir::UnaryOp>(op)) opsToTransform.push_back(op); }); diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h new file mode 100644 index 0000000..2582c332 --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the LoweringPrepareCXXABI class, which is the base class +// for ABI specific functionalities that are required during LLVM lowering +// prepare. +// +//===----------------------------------------------------------------------===// + +#ifndef CIR_DIALECT_TRANSFORMS__LOWERINGPREPARECXXABI_H +#define CIR_DIALECT_TRANSFORMS__LOWERINGPREPARECXXABI_H + +#include "mlir/IR/Value.h" +#include "clang/AST/ASTContext.h" +#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" + +namespace cir { + +class LoweringPrepareCXXABI { +public: + static LoweringPrepareCXXABI *createItaniumABI(); + + virtual ~LoweringPrepareCXXABI() {} + + virtual mlir::Value lowerDynamicCast(CIRBaseBuilderTy &builder, + clang::ASTContext &astCtx, + cir::DynamicCastOp op) = 0; +}; + +} // namespace cir + +#endif // CIR_DIALECT_TRANSFORMS__LOWERINGPREPARECXXABI_H diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareItaniumCXXABI.cpp new file mode 100644 index 0000000..7d3c711 --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareItaniumCXXABI.cpp @@ -0,0 +1,126 @@ +//===--------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with +// LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--------------------------------------------------------------------===// +// +// This file provides Itanium C++ ABI specific code +// that is used during LLVMIR lowering prepare. +// +//===--------------------------------------------------------------------===// + +#include "LoweringPrepareCXXABI.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/Value.h" +#include "mlir/IR/ValueRange.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDataLayout.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/MissingFeatures.h" + +class LoweringPrepareItaniumCXXABI : public cir::LoweringPrepareCXXABI { +public: + mlir::Value lowerDynamicCast(cir::CIRBaseBuilderTy &builder, + clang::ASTContext &astCtx, + cir::DynamicCastOp op) override; +}; + +cir::LoweringPrepareCXXABI *cir::LoweringPrepareCXXABI::createItaniumABI() { + return new LoweringPrepareItaniumCXXABI(); +} + +static void buildBadCastCall(cir::CIRBaseBuilderTy &builder, mlir::Location loc, + mlir::FlatSymbolRefAttr badCastFuncRef) { + builder.createCallOp(loc, badCastFuncRef, cir::VoidType(), + mlir::ValueRange{}); + // TODO(cir): Set the 'noreturn' attribute on the function. + assert(!cir::MissingFeatures::opFuncNoReturn()); + cir::UnreachableOp::create(builder, loc); + builder.clearInsertionPoint(); +} + +static mlir::Value +buildDynamicCastAfterNullCheck(cir::CIRBaseBuilderTy &builder, + cir::DynamicCastOp op) { + mlir::Location loc = op->getLoc(); + mlir::Value srcValue = op.getSrc(); + cir::DynamicCastInfoAttr castInfo = op.getInfo().value(); + + // TODO(cir): consider address space + assert(!cir::MissingFeatures::addressSpace()); + + mlir::Value srcPtr = builder.createBitcast(srcValue, builder.getVoidPtrTy()); + cir::ConstantOp srcRtti = builder.getConstant(loc, castInfo.getSrcRtti()); + cir::ConstantOp destRtti = builder.getConstant(loc, castInfo.getDestRtti()); + cir::ConstantOp offsetHint = + builder.getConstant(loc, castInfo.getOffsetHint()); + + mlir::FlatSymbolRefAttr dynCastFuncRef = castInfo.getRuntimeFunc(); + mlir::Value dynCastFuncArgs[4] = {srcPtr, srcRtti, destRtti, offsetHint}; + + mlir::Value castedPtr = + builder + .createCallOp(loc, dynCastFuncRef, builder.getVoidPtrTy(), + dynCastFuncArgs) + .getResult(); + + assert(mlir::isa<cir::PointerType>(castedPtr.getType()) && + "the return value of __dynamic_cast should be a ptr"); + + /// C++ [expr.dynamic.cast]p9: + /// A failed cast to reference type throws std::bad_cast + if (op.isRefCast()) { + // Emit a cir.if that checks the casted value. + mlir::Value castedValueIsNull = builder.createPtrIsNull(castedPtr); + builder.create<cir::IfOp>( + loc, castedValueIsNull, false, [&](mlir::OpBuilder &, mlir::Location) { + buildBadCastCall(builder, loc, castInfo.getBadCastFunc()); + }); + } + + // Note that castedPtr is a void*. Cast it to a pointer to the destination + // type before return. + return builder.createBitcast(castedPtr, op.getType()); +} + +static mlir::Value +buildDynamicCastToVoidAfterNullCheck(cir::CIRBaseBuilderTy &builder, + clang::ASTContext &astCtx, + cir::DynamicCastOp op) { + llvm_unreachable("dynamic cast to void is NYI"); +} + +mlir::Value +LoweringPrepareItaniumCXXABI::lowerDynamicCast(cir::CIRBaseBuilderTy &builder, + clang::ASTContext &astCtx, + cir::DynamicCastOp op) { + mlir::Location loc = op->getLoc(); + mlir::Value srcValue = op.getSrc(); + + assert(!cir::MissingFeatures::emitTypeCheck()); + + if (op.isRefCast()) + return buildDynamicCastAfterNullCheck(builder, op); + + mlir::Value srcValueIsNotNull = builder.createPtrToBoolCast(srcValue); + return builder + .create<cir::TernaryOp>( + loc, srcValueIsNotNull, + [&](mlir::OpBuilder &, mlir::Location) { + mlir::Value castedValue = + op.isCastToVoid() + ? buildDynamicCastToVoidAfterNullCheck(builder, astCtx, op) + : buildDynamicCastAfterNullCheck(builder, op); + builder.createYield(loc, castedValue); + }, + [&](mlir::OpBuilder &, mlir::Location) { + builder.createYield( + loc, builder.getNullPtr(op.getType(), loc).getResult()); + }) + .getResult(); +} diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index a1ecfc7..26e0ba9 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1739,7 +1739,6 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( const mlir::LLVM::Linkage linkage = convertLinkage(op.getLinkage()); const StringRef symbol = op.getSymName(); SmallVector<mlir::NamedAttribute> attributes; - mlir::SymbolRefAttr comdatAttr = getComdatAttr(op, rewriter); if (init.has_value()) { if (mlir::isa<cir::FPAttr, cir::IntAttr, cir::BoolAttr>(init.value())) { @@ -1771,6 +1770,7 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( } // Rewrite op. + mlir::SymbolRefAttr comdatAttr = getComdatAttr(op, rewriter); auto newOp = rewriter.replaceOpWithNewOp<mlir::LLVM::GlobalOp>( op, llvmType, isConst, linkage, symbol, init.value_or(mlir::Attribute()), alignment, addrSpace, isDsoLocal, isThreadLocal, comdatAttr, attributes); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 85a13357..40ea513 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6836,6 +6836,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args); else if (Target.isOHOSFamily()) TC = std::make_unique<toolchains::OHOS>(*this, Target, Args); + else if (Target.isWALI()) + TC = std::make_unique<toolchains::WebAssembly>(*this, Target, Args); else TC = std::make_unique<toolchains::Linux>(*this, Target, Args); break; diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 9abaf79..e9ca8ce 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -29,7 +29,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 0ebf56e..51e0ee1 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -111,7 +111,6 @@ add_clang_library(clangSema clangAPINotes clangAST clangAnalysis - clangAnalysisLifetimeSafety clangBasic clangEdit clangLex diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index 8aebf53..e8a7ad3 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -10,7 +10,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 45fe80d..e32f437 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -11,9 +11,9 @@ //===----------------------------------------------------------------------===// #include "clang/Sema/SemaAMDGPU.h" +#include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/TargetBuiltins.h" -#include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Sema/Ownership.h" #include "clang/Sema/Sema.h" #include "llvm/Support/AMDGPUAddrSpace.h" diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index 0d8d0fa..35cdfbf 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -17,7 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/TypeLoc.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Sema/SemaObjC.h" diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 4230ea7..01abc1f 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -166,6 +166,11 @@ static void diagnoseUseOfInternalDeclInInlineFunction(Sema &S, // This is disabled under C++; there are too many ways for this to fire in // contexts where the warning is a false positive, or where it is technically // correct but benign. + // + // WG14 N3622 which removed the constraint entirely in C2y. It is left + // enabled in earlier language modes because this is a constraint in those + // language modes. But in C2y mode, we still want to issue the "incompatible + // with previous standards" diagnostic, too. if (S.getLangOpts().CPlusPlus) return; @@ -190,16 +195,17 @@ static void diagnoseUseOfInternalDeclInInlineFunction(Sema &S, // This last can give us false negatives, but it's better than warning on // wrappers for simple C library functions. const FunctionDecl *UsedFn = dyn_cast<FunctionDecl>(D); - bool DowngradeWarning = S.getSourceManager().isInMainFile(Loc); - if (!DowngradeWarning && UsedFn) - DowngradeWarning = UsedFn->isInlined() || UsedFn->hasAttr<ConstAttr>(); - - S.Diag(Loc, DowngradeWarning ? diag::ext_internal_in_extern_inline_quiet - : diag::ext_internal_in_extern_inline) - << /*IsVar=*/!UsedFn << D; + unsigned DiagID; + if (S.getLangOpts().C2y) + DiagID = diag::warn_c2y_compat_internal_in_extern_inline; + else if ((UsedFn && (UsedFn->isInlined() || UsedFn->hasAttr<ConstAttr>())) || + S.getSourceManager().isInMainFile(Loc)) + DiagID = diag::ext_internal_in_extern_inline_quiet; + else + DiagID = diag::ext_internal_in_extern_inline; + S.Diag(Loc, DiagID) << /*IsVar=*/!UsedFn << D; S.MaybeSuggestAddingStaticToDecl(Current); - S.Diag(D->getCanonicalDecl()->getLocation(), diag::note_entity_declared_at) << D; } diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 4824b5a..f3969a9 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -2759,7 +2759,7 @@ OpenACCPrivateRecipe SemaOpenACC::CreatePrivateInitRecipe(const Expr *VarExpr) { // Array sections are special, and we have to treat them that way. if (const auto *ASE = dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts())) - VarTy = ArraySectionExpr::getBaseOriginalType(ASE); + VarTy = ASE->getElementType(); VarDecl *AllocaDecl = CreateAllocaDecl( getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(), @@ -2795,7 +2795,7 @@ SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) { // Array sections are special, and we have to treat them that way. if (const auto *ASE = dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts())) - VarTy = ArraySectionExpr::getBaseOriginalType(ASE); + VarTy = ASE->getElementType(); VarDecl *AllocaDecl = CreateAllocaDecl( getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(), @@ -2896,7 +2896,7 @@ OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe( // Array sections are special, and we have to treat them that way. if (const auto *ASE = dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts())) - VarTy = ArraySectionExpr::getBaseOriginalType(ASE); + VarTy = ASE->getElementType(); VarDecl *AllocaDecl = CreateAllocaDecl( getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(), diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp index b086929..881e960 100644 --- a/clang/lib/Sema/SemaOpenACCClause.cpp +++ b/clang/lib/Sema/SemaOpenACCClause.cpp @@ -1915,51 +1915,34 @@ SemaOpenACC::ActOnClause(ArrayRef<const OpenACCClause *> ExistingClauses, return Result; } -/// OpenACC 3.3 section 2.5.15: -/// At a mininmum, the supported data types include ... the numerical data types -/// in C, C++, and Fortran. -/// -/// If the reduction var is a composite variable, each -/// member of the composite variable must be a supported datatype for the -/// reduction operation. -ExprResult SemaOpenACC::CheckReductionVar(OpenACCDirectiveKind DirectiveKind, - OpenACCReductionOperator ReductionOp, - Expr *VarExpr) { - // For now, we only support 'scalar' types, or composites/arrays of scalar - // types. - VarExpr = VarExpr->IgnoreParenCasts(); +bool SemaOpenACC::CheckReductionVarType(Expr *VarExpr) { SourceLocation VarLoc = VarExpr->getBeginLoc(); SmallVector<PartialDiagnosticAt> Notes; - QualType CurType = VarExpr->getType(); - - // For array like things, the expression can either be an array element - // (subscript expr), array section, or array type. Peel those off, and add - // notes in case we find an illegal kind. We'll allow scalar or composite of - // scalars inside of this. - if (auto *ASE = dyn_cast<ArraySectionExpr>(VarExpr)) { - QualType BaseType = ArraySectionExpr::getBaseOriginalType(ASE); + // The standard isn't clear how many levels of 'array element' or 'subarray' + // are permitted, but we can handle as many as we need, so we'll strip them + // off here. This will result in CurType being the actual 'type' of the + // expression, which is what we are looking to check. + QualType CurType = isa<ArraySectionExpr>(VarExpr) + ? ArraySectionExpr::getBaseOriginalType(VarExpr) + : VarExpr->getType(); + + // This can happen when we have a dependent type in an array element that the + // above function has tried to 'unwrap'. Since this can only happen with + // dependence, just let it go. + if (CurType.isNull()) + return false; - PartialDiagnostic PD = PDiag(diag::note_acc_reduction_array) - << diag::OACCReductionArray::Section << BaseType; - Notes.push_back({ASE->getBeginLoc(), PD}); - - CurType = getASTContext().getBaseElementType(BaseType); - } else if (auto *SubExpr = dyn_cast<ArraySubscriptExpr>(VarExpr)) { - // Array subscript already results in the type of the thing as its type, so - // there is no type to change here. - PartialDiagnostic PD = - PDiag(diag::note_acc_reduction_array) - << diag::OACCReductionArray::Subscript - << SubExpr->getBase()->IgnoreParenImpCasts()->getType(); - Notes.push_back({SubExpr->getBeginLoc(), PD}); - } else if (auto *AT = getASTContext().getAsArrayType(CurType)) { + // If we are still an array type, we allow 1 level of 'unpeeling' of the + // array. The standard isn't clear here whether this is allowed, but + // array-of-valid-things makes sense. + if (auto *AT = getASTContext().getAsArrayType(CurType)) { // If we're already the array type, peel off the array and leave the element // type. - CurType = getASTContext().getBaseElementType(AT); PartialDiagnostic PD = PDiag(diag::note_acc_reduction_array) << diag::OACCReductionArray::ArrayTy << CurType; Notes.push_back({VarLoc, PD}); + CurType = AT->getElementType(); } auto IsValidMemberOfComposite = [](QualType Ty) { @@ -1974,31 +1957,26 @@ ExprResult SemaOpenACC::CheckReductionVar(OpenACCDirectiveKind DirectiveKind, for (auto [Loc, PD] : Notes) Diag(Loc, PD); - Diag(VarLoc, diag::note_acc_reduction_type_summary); + return Diag(VarLoc, diag::note_acc_reduction_type_summary); }; // If the type is already scalar, or is dependent, just give up. if (IsValidMemberOfComposite(CurType)) { // Nothing to do here, is valid. } else if (auto *RD = CurType->getAsRecordDecl()) { - if (!RD->isStruct() && !RD->isClass()) { - EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) - << RD << diag::OACCReductionTy::NotClassStruct); - return ExprError(); - } + if (!RD->isStruct() && !RD->isClass()) + return EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) + << RD + << diag::OACCReductionTy::NotClassStruct); - if (!RD->isCompleteDefinition()) { - EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) - << RD << diag::OACCReductionTy::NotComplete); - return ExprError(); - } + if (!RD->isCompleteDefinition()) + return EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) + << RD << diag::OACCReductionTy::NotComplete); if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD); - CXXRD && !CXXRD->isAggregate()) { - EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) - << CXXRD << diag::OACCReductionTy::NotAgg); - return ExprError(); - } + CXXRD && !CXXRD->isAggregate()) + return EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) + << CXXRD << diag::OACCReductionTy::NotAgg); for (FieldDecl *FD : RD->fields()) { if (!IsValidMemberOfComposite(FD->getType())) { @@ -2007,17 +1985,37 @@ ExprResult SemaOpenACC::CheckReductionVar(OpenACCDirectiveKind DirectiveKind, << FD->getName() << RD->getName(); Notes.push_back({FD->getBeginLoc(), PD}); // TODO: member here.note_acc_reduction_member_of_composite - EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) - << FD->getType() - << diag::OACCReductionTy::MemberNotScalar); - return ExprError(); + return EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) + << FD->getType() + << diag::OACCReductionTy::MemberNotScalar); } } } else { - EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) - << CurType << diag::OACCReductionTy::NotScalar); + return EmitDiags(VarLoc, PDiag(diag::err_acc_reduction_type) + << CurType + << diag::OACCReductionTy::NotScalar); } + return false; +} + +/// OpenACC 3.3 section 2.5.15: +/// At a mininmum, the supported data types include ... the numerical data types +/// in C, C++, and Fortran. +/// +/// If the reduction var is a composite variable, each +/// member of the composite variable must be a supported datatype for the +/// reduction operation. +ExprResult SemaOpenACC::CheckReductionVar(OpenACCDirectiveKind DirectiveKind, + OpenACCReductionOperator ReductionOp, + Expr *VarExpr) { + // For now, we only support 'scalar' types, or composites/arrays of scalar + // types. + VarExpr = VarExpr->IgnoreParenCasts(); + + if (CheckReductionVarType(VarExpr)) + return ExprError(); + // OpenACC3.3: 2.9.11: Reduction clauses on nested constructs for the same // reduction 'var' must have the same reduction operator. if (!VarExpr->isInstantiationDependent()) { diff --git a/clang/test/C/C2y/n3622.c b/clang/test/C/C2y/n3622.c new file mode 100644 index 0000000..95b92e8 --- /dev/null +++ b/clang/test/C/C2y/n3622.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -verify=good -pedantic -Wall -std=c2y %s +// RUN: %clang_cc1 -verify=compat,expected -pedantic -Wall -Wpre-c2y-compat -std=c2y %s +// RUN: %clang_cc1 -verify=ped,expected -pedantic -Wall -std=c23 %s +// RUN: %clang_cc1 -verify=ped,expected -pedantic -Wall -std=c17 %s +// good-no-diagnostics + +/* WG14 N3622: Clang 22 + * Allow calling static inline within extern inline + * + * This verifies that a constraint from previous standards is no longer + * triggered in C2y mode. The constraint is with calling a static function + * or using a static variable from an inline function with external linkage. + */ + +static void static_func(void) {} // expected-note {{declared here}} +static int static_var; // expected-note {{declared here}} + +extern inline void test(void) { + static_func(); /* ped-warning {{using static function 'static_func' in an inline function with external linkage is a C2y extension}} + compat-warning {{using static function 'static_func' in an inline function with external linkage is incompatible with standards before C2y}} + */ + static_var = 12; /* ped-warning {{using static variable 'static_var' in an inline function with external linkage is a C2y extension}} + compat-warning {{using static variable 'static_var' in an inline function with external linkage is incompatible with standards before C2y}} + */ +} diff --git a/clang/test/C/C2y/n3623.c b/clang/test/C/C2y/n3623.c index a557eda..5e004c1 100644 --- a/clang/test/C/C2y/n3623.c +++ b/clang/test/C/C2y/n3623.c @@ -64,7 +64,10 @@ int wmain(int, wchar_t *[], wchar_t *[]) {} // Invalid signatures. #if defined(INVALID1) inline int main(int, char *[]); /* invalid-error {{'main' is not allowed to be declared inline}} */ +#if !__is_target_os(darwin) +// This test doesn't make sense on Darwin where four arguments are allowed. int main(int, char *[], char *[], float); /* invalid-error {{too many parameters (4) for 'main': must be 0, 2, or 3}} */ +#endif float main(int); /* invalid-error {{'main' must return 'int'}} */ _Noreturn int main(int, char *[]); /* invalid-warning {{'main' is not allowed to be declared _Noreturn}} invalid-note {{remove '_Noreturn'}} diff --git a/clang/test/CIR/CodeGen/dynamic-cast.cpp b/clang/test/CIR/CodeGen/dynamic-cast.cpp index e5244b2..b493840 100644 --- a/clang/test/CIR/CodeGen/dynamic-cast.cpp +++ b/clang/test/CIR/CodeGen/dynamic-cast.cpp @@ -1,5 +1,11 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2> %t.before.log // RUN: FileCheck %s --input-file=%t.before.log -check-prefix=CIR-BEFORE +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2> %t.after.log +// RUN: FileCheck %s --input-file=%t.after.log -check-prefix=CIR-AFTER +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck %s --input-file=%t-cir.ll -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -emit-llvm %s -o %t.ll +// RUN: FileCheck %s --input-file=%t.ll -check-prefix=OGCG struct Base { virtual ~Base(); @@ -19,6 +25,46 @@ Derived *ptr_cast(Base *b) { // CIR-BEFORE: %{{.+}} = cir.dyn_cast ptr %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!rec_Derived> #dyn_cast_info__ZTI4Base__ZTI7Derived // CIR-BEFORE: } +// CIR-AFTER: cir.func dso_local @_Z8ptr_castP4Base +// CIR-AFTER: %[[SRC:.*]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_Base>>, !cir.ptr<!rec_Base> +// CIR-AFTER-NEXT: %[[SRC_IS_NOT_NULL:.*]] = cir.cast ptr_to_bool %[[SRC]] : !cir.ptr<!rec_Base> -> !cir.bool +// CIR-AFTER-NEXT: %{{.+}} = cir.ternary(%[[SRC_IS_NOT_NULL]], true { +// CIR-AFTER-NEXT: %[[SRC_VOID_PTR:.*]] = cir.cast bitcast %[[SRC]] : !cir.ptr<!rec_Base> -> !cir.ptr<!void> +// CIR-AFTER-NEXT: %[[BASE_RTTI:.*]] = cir.const #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i> +// CIR-AFTER-NEXT: %[[DERIVED_RTTI:.*]] = cir.const #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i> +// CIR-AFTER-NEXT: %[[HINT:.*]] = cir.const #cir.int<0> : !s64i +// CIR-AFTER-NEXT: %[[RT_CALL_RET:.*]] = cir.call @__dynamic_cast(%[[SRC_VOID_PTR]], %[[BASE_RTTI]], %[[DERIVED_RTTI]], %[[HINT]]) : (!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void> +// CIR-AFTER-NEXT: %[[CASTED:.*]] = cir.cast bitcast %[[RT_CALL_RET]] : !cir.ptr<!void> -> !cir.ptr<!rec_Derived> +// CIR-AFTER-NEXT: cir.yield %[[CASTED]] : !cir.ptr<!rec_Derived> +// CIR-AFTER-NEXT: }, false { +// CIR-AFTER-NEXT: %[[NULL_PTR:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!rec_Derived> +// CIR-AFTER-NEXT: cir.yield %[[NULL_PTR]] : !cir.ptr<!rec_Derived> +// CIR-AFTER-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_Derived> +// CIR-AFTER: } + +// LLVM: define {{.*}} @_Z8ptr_castP4Base +// LLVM: %[[IS_NOT_NULL:.*]] = icmp ne ptr %[[PTR:.*]], null +// LLVM: br i1 %[[IS_NOT_NULL]], label %[[NOT_NULL:.*]], label %[[NULL:.*]] +// LLVM: [[NOT_NULL]]: +// LLVM: %[[RESULT:.*]] = call ptr @__dynamic_cast(ptr %[[PTR]], ptr @_ZTI4Base, ptr @_ZTI7Derived, i64 0) +// LLVM: br label %[[DONE:.*]] +// LLVM: [[NULL]]: +// LLVM: br label %[[DONE]] +// LLVM: [[DONE]]: +// LLVM: %[[RET:.*]] = phi ptr [ null, %[[NULL]] ], [ %[[RESULT]], %[[NOT_NULL]] ] + +// OGCG: define {{.*}} @_Z8ptr_castP4Base +// OGCG: %[[IS_NULL:.*]] = icmp eq ptr %[[PTR:.*]], null +// OGCG: br i1 %[[IS_NULL]], label %[[NULL:.*]], label %[[NOT_NULL:.*]] +// OGCG: [[NOT_NULL]]: +// OGCG: %[[RESULT:.*]] = call ptr @__dynamic_cast(ptr %[[PTR]], ptr @_ZTI4Base, ptr @_ZTI7Derived, i64 0) +// OGCG: br label %[[DONE:.*]] +// OGCG: [[NULL]]: +// OGCG: br label %[[DONE]] +// OGCG: [[DONE]]: +// OGCG: %[[RET:.*]] = phi ptr [ %[[RESULT]], %[[NOT_NULL]] ], [ null, %[[NULL]] ] + + Derived &ref_cast(Base &b) { return dynamic_cast<Derived &>(b); } @@ -26,3 +72,32 @@ Derived &ref_cast(Base &b) { // CIR-BEFORE: cir.func dso_local @_Z8ref_castR4Base // CIR-BEFORE: %{{.+}} = cir.dyn_cast ref %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!rec_Derived> #dyn_cast_info__ZTI4Base__ZTI7Derived // CIR-BEFORE: } + +// CIR-AFTER: cir.func dso_local @_Z8ref_castR4Base +// CIR-AFTER: %[[SRC_VOID_PTR:.*]] = cir.cast bitcast %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!void> +// CIR-AFTER-NEXT: %[[SRC_RTTI:.*]] = cir.const #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i> +// CIR-AFTER-NEXT: %[[DEST_RTTI:.*]] = cir.const #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i> +// CIR-AFTER-NEXT: %[[OFFSET_HINT:.*]] = cir.const #cir.int<0> : !s64i +// CIR-AFTER-NEXT: %[[CASTED_PTR:.*]] = cir.call @__dynamic_cast(%[[SRC_VOID_PTR]], %[[SRC_RTTI]], %[[DEST_RTTI]], %[[OFFSET_HINT]]) : (!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void> +// CIR-AFTER-NEXT: %[[NULL_PTR:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void> +// CIR-AFTER-NEXT: %[[CASTED_PTR_IS_NULL:.*]] = cir.cmp(eq, %[[CASTED_PTR]], %[[NULL_PTR]]) : !cir.ptr<!void>, !cir.bool +// CIR-AFTER-NEXT: cir.if %[[CASTED_PTR_IS_NULL]] { +// CIR-AFTER-NEXT: cir.call @__cxa_bad_cast() : () -> () +// CIR-AFTER-NEXT: cir.unreachable +// CIR-AFTER-NEXT: } +// CIR-AFTER-NEXT: %{{.+}} = cir.cast bitcast %[[CASTED_PTR]] : !cir.ptr<!void> -> !cir.ptr<!rec_Derived> +// CIR-AFTER: } + +// LLVM: define {{.*}} ptr @_Z8ref_castR4Base +// LLVM: %[[RESULT:.*]] = call ptr @__dynamic_cast(ptr %{{.*}}, ptr @_ZTI4Base, ptr @_ZTI7Derived, i64 0) +// LLVM: %[[IS_NULL:.*]] = icmp eq ptr %[[RESULT]], null +// LLVM: br i1 %[[IS_NULL]], label %[[BAD_CAST:.*]], label %[[DONE:.*]] +// LLVM: [[BAD_CAST]]: +// LLVM: call void @__cxa_bad_cast() + +// OGCG: define {{.*}} ptr @_Z8ref_castR4Base +// OGCG: %[[RESULT:.*]] = call ptr @__dynamic_cast(ptr %0, ptr @_ZTI4Base, ptr @_ZTI7Derived, i64 0) +// OGCG: %[[IS_NULL:.*]] = icmp eq ptr %[[RESULT]], null +// OGCG: br i1 %[[IS_NULL]], label %[[BAD_CAST:.*]], label %[[DONE:.*]] +// OGCG: [[BAD_CAST]]: +// OGCG: call void @__cxa_bad_cast() diff --git a/clang/test/CIR/CodeGen/try-catch.cpp b/clang/test/CIR/CodeGen/try-catch.cpp new file mode 100644 index 0000000..8f0b3c4 --- /dev/null +++ b/clang/test/CIR/CodeGen/try-catch.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +void empty_try_block_with_catch_all() { + try {} catch (...) {} +} + +// CIR: cir.func{{.*}} @_Z30empty_try_block_with_catch_allv() +// CIR: cir.return + +// LLVM: define{{.*}} void @_Z30empty_try_block_with_catch_allv() +// LLVM: ret void + +// OGCG: define{{.*}} void @_Z30empty_try_block_with_catch_allv() +// OGCG: ret void + +void empty_try_block_with_catch_with_int_exception() { + try {} catch (int e) {} +} + +// CIR: cir.func{{.*}} @_Z45empty_try_block_with_catch_with_int_exceptionv() +// CIR: cir.return + +// LLVM: define{{.*}} void @_Z45empty_try_block_with_catch_with_int_exceptionv() +// LLVM: ret void + +// OGCG: define{{.*}} void @_Z45empty_try_block_with_catch_with_int_exceptionv() +// OGCG: ret void diff --git a/clang/test/DebugInfo/KeyInstructions/flag.cpp b/clang/test/DebugInfo/KeyInstructions/flag.cpp index a5cd855..6aeeed6 100644 --- a/clang/test/DebugInfo/KeyInstructions/flag.cpp +++ b/clang/test/DebugInfo/KeyInstructions/flag.cpp @@ -1,12 +1,15 @@ // RUN: %clang -### -target x86_64 -c -gdwarf -gkey-instructions %s 2>&1 | FileCheck %s --check-prefixes=KEY-INSTRUCTIONS // RUN: %clang -### -target x86_64 -c -gdwarf -gno-key-instructions %s 2>&1 | FileCheck %s --check-prefixes=NO-KEY-INSTRUCTIONS +// RUN: %clang -### -target x86_64 -c -gno-key-instructions %s 2>&1 | FileCheck %s --check-prefixes=NO-DEBUG //// Help. // RUN %clang --help | FileCheck %s --check-prefix=HELP -// HELP: -gkey-instructions Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code in some debuggers. DWARF only. Implies -g. +// HELP: -gkey-instructions Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code in some debuggers. DWARF only. // KEY-INSTRUCTIONS: "-gkey-instructions" // NO-KEY-INSTRUCTIONS-NOT: key-instructions +// NO-DEBUG-NOT: debug-info-kind +// NO-DEBUG-NOT: dwarf //// Help hidden: flag should not be visible. // RUN: %clang --help | FileCheck %s --check-prefix=HELP diff --git a/clang/test/Driver/debug-options.c b/clang/test/Driver/debug-options.c index 73f2f40..45ac450 100644 --- a/clang/test/Driver/debug-options.c +++ b/clang/test/Driver/debug-options.c @@ -268,11 +268,11 @@ // RUN: %clang -### -c %s 2>&1 | FileCheck -check-prefix=NORNGBSE %s // RUN: %clang -### -c -fdebug-ranges-base-address -fno-debug-ranges-base-address %s 2>&1 | FileCheck -check-prefix=NORNGBSE %s // -// RUN: %clang -### -c -gomit-unreferenced-methods -fno-standalone-debug %s 2>&1 | FileCheck -check-prefix=INCTYPES %s +// RUN: %clang -### -c -g -gomit-unreferenced-methods -fno-standalone-debug %s 2>&1 | FileCheck -check-prefix=INCTYPES %s // RUN: %clang -### -c %s 2>&1 | FileCheck -check-prefix=NOINCTYPES %s -// RUN: %clang -### -c -gomit-unreferenced-methods -fdebug-types-section -target x86_64-unknown-linux %s 2>&1 \ +// RUN: %clang -### -c -g -gomit-unreferenced-methods -fdebug-types-section -target x86_64-unknown-linux %s 2>&1 \ // RUN: | FileCheck -check-prefix=NOINCTYPES %s -// RUN: %clang -### -c -gomit-unreferenced-methods -fstandalone-debug %s 2>&1 | FileCheck -check-prefix=NOINCTYPES %s +// RUN: %clang -### -c -g -gomit-unreferenced-methods -fstandalone-debug %s 2>&1 | FileCheck -check-prefix=NOINCTYPES %s // // RUN: %clang -### -c -glldb %s 2>&1 | FileCheck -check-prefix=NOPUB %s // RUN: %clang -### -c -glldb -gno-pubnames %s 2>&1 | FileCheck -check-prefix=NOPUB %s diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index 91803fe..29a94ae 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -296,3 +296,10 @@ // RUN: | FileCheck -check-prefix=LINK_WASIP2_USE_WASMLD %s // LINK_WASIP2_USE_WASMLD: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK_WASIP2_USE_WASMLD: wasm-ld{{.*}}" "-m" "wasm32" {{.*}} "[[temp]]" {{.*}} + +// Basic `wasm32-linux-muslwali` compile-link test. + +// RUN: %clang -### --target=wasm32-linux-muslwali --sysroot=/foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=LINK_WALI_BASIC %s +// LINK_WALI_BASIC: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" +// LINK_WALI_BASIC: wasm-ld{{.*}}" "-L/foo/lib/wasm32-linux-muslwali" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" diff --git a/clang/test/Driver/wasm-toolchain.cpp b/clang/test/Driver/wasm-toolchain.cpp index ba1c55b..d7ff76c 100644 --- a/clang/test/Driver/wasm-toolchain.cpp +++ b/clang/test/Driver/wasm-toolchain.cpp @@ -86,3 +86,28 @@ // COMPILE_STDCXX: "-internal-isystem" "[[RESOURCE_DIR]]{{(/|\\\\)}}include" // COMPILE_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-wasi" // COMPILE_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include" + +// RUN: %clangxx -### --target=wasm32-linux-muslwali --stdlib=libc++ %s 2>&1 \ +// RUN: --sysroot=%S/Inputs/basic_linux_libcxx_tree/usr \ +// RUN: | FileCheck -check-prefix=COMPILE_WALI %s +// COMPILE_WALI: "-cc1" +// COMPILE_WALI: "-resource-dir" "[[RESOURCE_DIR:[^"]*]]" +// COMPILE_WALI: "-isysroot" "[[SYSROOT:[^"]+]]" +// COMPILE_WALI: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-linux-muslwali/c++/v1" +// COMPILE_WALI: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/c++/v1" +// COMPILE_WALI: "-internal-isystem" "[[RESOURCE_DIR]]{{(/|\\\\)}}include" +// COMPILE_WALI: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-linux-muslwali" +// COMPILE_WALI: "-internal-isystem" "[[SYSROOT:[^"]+]]/include" + +// RUN: %clangxx -### --target=wasm32-linux-muslwali --stdlib=libstdc++ %s 2>&1 \ +// RUN: --sysroot=%S/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr \ +// RUN: | FileCheck -check-prefix=COMPILE_WALI_STDCXX %s +// COMPILE_WALI_STDCXX: "-cc1" +// COMPILE_WALI_STDCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]*]]" +// COMPILE_WALI_STDCXX: "-isysroot" "[[SYSROOT:[^"]+]]" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/c++/4.8/wasm32-linux-muslwali" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/c++/4.8" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/c++/4.8/backward" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[RESOURCE_DIR]]{{(/|\\\\)}}include" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include/wasm32-linux-muslwali" +// COMPILE_WALI_STDCXX: "-internal-isystem" "[[SYSROOT:[^"]+]]/include" diff --git a/clang/test/InstallAPI/project-header-only-args-visibility.test b/clang/test/InstallAPI/project-header-only-args-visibility.test new file mode 100644 index 0000000..0403487 --- /dev/null +++ b/clang/test/InstallAPI/project-header-only-args-visibility.test @@ -0,0 +1,69 @@ +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json + +; RUN: clang-installapi \ +; RUN: -target arm64-apple-macos26 -install_name @rpath/libfoo.dylib \ +; RUN: -current_version 1 -compatibility_version 1 \ +; RUN: -Xproject -fvisibility=hidden -I%t/usr/include \ +; RUN: -I%t -dynamiclib %t/inputs.json \ +; RUN: -o %t/output.tbd 2>&1 | FileCheck %s --allow-empty +; RUN: llvm-readtapi --compare %t/output.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty + +; CHECK-NOT: error +; CHECK-NOT: warning + +//--- usr/include/public.h +int foo(void); + +//--- project.h +int bar(void); + +//--- expected.tbd +{ + "main_library": { + "exported_symbols": [ + { + "text": { + "global": [ + "_foo" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "@rpath/libfoo.dylib" + } + ], + "target_info": [ + { + "min_deployment": "26", + "target": "arm64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} + +;--- inputs.json.in +{ + "headers": [ + { + "path" : "DSTROOT/usr/include/public.h", + "type" : "public" + }, + { + "path" : "DSTROOT/project.h", + "type" : "project" + } + ], + "version": "3" +} diff --git a/clang/test/Sema/inline.c b/clang/test/Sema/inline.c index 804c510..6361db8 100644 --- a/clang/test/Sema/inline.c +++ b/clang/test/Sema/inline.c @@ -11,14 +11,14 @@ static int staticFunction(void); // expected-note + {{'staticFunction' declared static struct { int x; } staticStruct; // expected-note + {{'staticStruct' declared here}} inline int useStatic (void) { // expected-note 3 {{use 'static' to give inline function 'useStatic' internal linkage}} - staticFunction(); // expected-warning{{static function 'staticFunction' is used in an inline function with external linkage}} - (void)staticStruct.x; // expected-warning{{static variable 'staticStruct' is used in an inline function with external linkage}} - return staticVar; // expected-warning{{static variable 'staticVar' is used in an inline function with external linkage}} + staticFunction(); // expected-warning{{using static function 'staticFunction' in an inline function with external linkage is a C2y extension}} + (void)staticStruct.x; // expected-warning{{using static variable 'staticStruct' in an inline function with external linkage is a C2y extension}} + return staticVar; // expected-warning{{using static variable 'staticVar' in an inline function with external linkage is a C2y extension}} } extern inline int useStaticFromExtern (void) { // no suggestions - staticFunction(); // expected-warning{{static function 'staticFunction' is used in an inline function with external linkage}} - return staticVar; // expected-warning{{static variable 'staticVar' is used in an inline function with external linkage}} + staticFunction(); // expected-warning{{using static function 'staticFunction' in an inline function with external linkage is a C2y extension}} + return staticVar; // expected-warning{{using static variable 'staticVar' in an inline function with external linkage is a C2y extension}} } static inline int useStaticFromStatic (void) { @@ -67,8 +67,8 @@ inline int useStaticMainFile (void) { #pragma clang diagnostic warning "-Wstatic-in-inline" inline int useStaticAgain (void) { // expected-note 2 {{use 'static' to give inline function 'useStaticAgain' internal linkage}} - staticFunction(); // expected-warning{{static function 'staticFunction' is used in an inline function with external linkage}} - return staticVar; // expected-warning{{static variable 'staticVar' is used in an inline function with external linkage}} + staticFunction(); // expected-warning{{using static function 'staticFunction' in an inline function with external linkage is a C2y extension}} + return staticVar; // expected-warning{{using static variable 'staticVar' in an inline function with external linkage is a C2y extension}} } #pragma clang diagnostic pop @@ -86,8 +86,8 @@ extern inline void defineStaticVarInExtern(void) { // Check behavior of line markers. # 1 "XXX.h" 1 inline int useStaticMainFileInLineMarker(void) { // expected-note 2 {{use 'static' to give inline function 'useStaticMainFileInLineMarker' internal linkage}} - staticFunction(); // expected-warning{{static function 'staticFunction' is used in an inline function with external linkage}} - return staticVar; // expected-warning{{static variable 'staticVar' is used in an inline function with external linkage}} + staticFunction(); // expected-warning{{using static function 'staticFunction' in an inline function with external linkage is a C2y extension}} + return staticVar; // expected-warning{{using static variable 'staticVar' in an inline function with external linkage is a C2y extension}} } # 100 "inline.c" 2 diff --git a/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp index 5aa90bd..72d7e6b 100644 --- a/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp +++ b/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp @@ -166,19 +166,17 @@ void uses(unsigned Parm) { CompositeHasComposite CoCArr[5]; // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of array type 'CompositeHasComposite'}} + // expected-note@+3{{used as element type of array type 'CompositeHasComposite[5]'}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel loop reduction(+:CoCArr) for(int i = 0; i < 5; ++i); - // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of array type 'CompositeHasComposite[5]'}} + // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel loop reduction(+:CoCArr[3]) for(int i = 0; i < 5; ++i); - // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of sub-array type 'CompositeHasComposite'}} + // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel loop reduction(+:CoCArr[1:1]) diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c index 07cb498..265c498 100644 --- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c +++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c @@ -72,8 +72,7 @@ void uses(unsigned Parm) { while (1); struct CompositeHasComposite ChCArray[5]; - // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of sub-array type 'struct CompositeHasComposite'}} + // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel reduction(&: CoS, Array[I], ChCArray[0:I]) diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp index 9c2f3d9..edc67ce 100644 --- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp +++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp @@ -91,19 +91,17 @@ void uses(unsigned Parm) { CompositeHasComposite CoCArr[5]; // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of array type 'CompositeHasComposite'}} + // expected-note@+3{{used as element type of array type 'CompositeHasComposite[5]'}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel reduction(+:CoCArr) while (1); - // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of array type 'CompositeHasComposite[5]'}} + // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel reduction(+:CoCArr[3]) while (1); - // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of sub-array type 'CompositeHasComposite'}} + // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel reduction(+:CoCArr[1:1]) @@ -121,7 +119,7 @@ void uses(unsigned Parm) { int *IPtrArr[5]; // expected-error@+3{{invalid type 'int *' used in OpenACC 'reduction' variable reference; type is not a scalar value, or array of scalars, or composite of scalars}} - // expected-note@+2{{used as element type of array type 'int *'}} + // expected-note@+2{{used as element type of array type 'int *[5]'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel reduction(+:IPtrArr) while (1); @@ -136,7 +134,7 @@ void uses(unsigned Parm) { HasPtr HPArr[5]; // expected-error@+4{{invalid type 'int *' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of array type 'HasPtr'}} + // expected-note@+3{{used as element type of array type 'HasPtr[5]'}} // expected-note@#HASPTR{{used as field 'I' of composite 'HasPtr'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel reduction(+:HPArr) @@ -152,7 +150,7 @@ void uses(unsigned Parm) { #pragma acc parallel reduction(+:CplxI) while (1); // expected-error@+3{{invalid type '_Complex int' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+2{{used as element type of array type '_Complex int'}} + // expected-note@+2{{used as element type of array type '_Complex int[5]'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel reduction(+:CplxIArr) while (1); @@ -161,7 +159,7 @@ void uses(unsigned Parm) { #pragma acc parallel reduction(+:CplxF) while (1); // expected-error@+3{{invalid type '_Complex float' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+2{{used as element type of array type '_Complex float'}} + // expected-note@+2{{used as element type of array type '_Complex float[5]'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc parallel reduction(+:CplxFArr) while (1); @@ -242,6 +240,50 @@ void TemplUses(T Parm, U CoS, V ChC) { // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}} #pragma acc parallel reduction(&: ChCPtr->COS) while (1); + + T ThreeDArray[3][4][5]; + + // expected-error@+3{{invalid type 'int[4][5]' used in OpenACC 'reduction' variable reference; type is not a scalar value}} + // expected-note@+2{{used as element type of array type 'int[3][4][5]'}} + // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} +#pragma acc parallel reduction(+:ThreeDArray) + while (1); + // expected-error@+3{{invalid type 'int[5]' used in OpenACC 'reduction' variable reference; type is not a scalar value}} + // expected-note@+2{{used as element type of array type 'int[4][5]'}} + // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} +#pragma acc parallel reduction(+:ThreeDArray[1:1]) + while (1); + // expected-error@+3{{invalid type 'int[5]' used in OpenACC 'reduction' variable reference; type is not a scalar value}} + // expected-note@+2{{used as element type of array type 'int[4][5]'}} + // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} +#pragma acc parallel reduction(+:ThreeDArray[1]) + while (1); + +#pragma acc parallel reduction(+:ThreeDArray[1:1][1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1:1][1:1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1][1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1][1:1]) + while (1); + +#pragma acc parallel reduction(+:ThreeDArray[1:1][1][1:1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1:1][1][1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1:1][1:1][1:1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1:1][1:1][1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1][1][1:1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1][1][1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1][1:1][1:1]) + while (1); +#pragma acc parallel reduction(+:ThreeDArray[1][1:1][1]) + while (1); } void inst() { diff --git a/clang/test/SemaOpenACC/loop-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/loop-construct-reduction-clause.cpp index 2a07c2c..f2dd928 100644 --- a/clang/test/SemaOpenACC/loop-construct-reduction-clause.cpp +++ b/clang/test/SemaOpenACC/loop-construct-reduction-clause.cpp @@ -153,19 +153,17 @@ void uses() { CompositeHasComposite CoCArr[5]; // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of array type 'CompositeHasComposite'}} + // expected-note@+3{{used as element type of array type 'CompositeHasComposite[5]'}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc loop reduction(+:CoCArr) for(int i = 0; i < 5; ++i); - // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of array type 'CompositeHasComposite[5]'}} + // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc loop reduction(+:CoCArr[3]) for(int i = 0; i < 5; ++i); - // expected-error@+4{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} - // expected-note@+3{{used as element type of sub-array type 'CompositeHasComposite'}} + // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} // expected-note@+1{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} #pragma acc loop reduction(+:CoCArr[1:1]) diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 0c05184..169b2d2 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Testing/TestAST.h" @@ -63,7 +63,7 @@ public: Analysis = std::make_unique<LifetimeSafetyAnalysis>(*AnalysisCtx, nullptr); Analysis->run(); - AnnotationToPointMap = Analysis->getFactManager().getTestPoints(); + AnnotationToPointMap = Analysis->getTestPoints(); } LifetimeSafetyAnalysis &getAnalysis() { return *Analysis; } @@ -98,11 +98,10 @@ public: auto *VD = findDecl<ValueDecl>(VarName); if (!VD) return std::nullopt; - // This assumes the OriginManager's `get` can find an existing origin. - // We might need a `find` method on OriginManager to avoid `getOrCreate` - // logic in a const-query context if that becomes an issue. - return const_cast<OriginManager &>(Analysis.getFactManager().getOriginMgr()) - .get(*VD); + auto OID = Analysis.getOriginIDForDecl(VD); + if (!OID) + ADD_FAILURE() << "Origin for '" << VarName << "' not found."; + return OID; } std::vector<LoanID> getLoansForVar(llvm::StringRef VarName) { @@ -111,10 +110,7 @@ public: ADD_FAILURE() << "Failed to find VarDecl for '" << VarName << "'"; return {}; } - std::vector<LoanID> LID; - for (const Loan &L : Analysis.getFactManager().getLoanMgr().getLoans()) - if (L.Path.D == VD) - LID.push_back(L.ID); + std::vector<LoanID> LID = Analysis.getLoanIDForVar(VD); if (LID.empty()) { ADD_FAILURE() << "Loan for '" << VarName << "' not found."; return {}; @@ -127,7 +123,7 @@ public: ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getLoanPropagation().getLoans(OID, PP); + return Analysis.getLoansAtPoint(OID, PP); } std::optional<std::vector<std::pair<OriginID, LivenessKind>>> @@ -135,10 +131,7 @@ public: ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - std::vector<std::pair<OriginID, LivenessKind>> Result; - for (auto &[OID, Info] : Analysis.getLiveOrigins().getLiveOriginsAt(PP)) - Result.push_back({OID, Info.Kind}); - return Result; + return Analysis.getLiveOriginsAtPoint(PP); } private: diff --git a/clang/www/c_status.html b/clang/www/c_status.html index a6bcd4c..b803962 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -349,7 +349,7 @@ conformance.</p> <tr> <td>Allow calling static inline within extern inline</td> <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3622.txt">N3622</a></td> - <td class="unknown" align="center">Unknown</td> + <td class="unreleased" align="center">Clang 22</td> </tr> <tr> <td>Generic replacement (v. 2 of quasi-literals)</td> diff --git a/flang/docs/C++17.md b/flang/docs/C++17.md index b20364b..f36110a 100644 --- a/flang/docs/C++17.md +++ b/flang/docs/C++17.md @@ -153,3 +153,8 @@ signifies a successful recognition and absence denotes a failed parse. It is used in data structures in place of nullable pointers to avoid indirection as well as the possible confusion over whether a pointer is allowed to be null. + +`std::optional<bool>` is commonly used to denote a "tri-state" +logical value that might be unknown. +Please try to avoid implicit casts of `std::optional<bool>` to `bool`, +and use `.value_or(default)` or `.has_value()` instead as appropriate. diff --git a/flang/docs/C++style.md b/flang/docs/C++style.md index 0457913..cbb96f1 100644 --- a/flang/docs/C++style.md +++ b/flang/docs/C++style.md @@ -205,11 +205,13 @@ contents, and it is assumed that the contents are present, validate that assumption by using `x.value()` instead. 1. We use `c_str()` rather than `data()` when converting a `std::string` to a `const char *` when the result is expected to be NUL-terminated. -1. Avoid explicit comparisions of pointers to `nullptr` and tests of +1. Avoid explicit comparisons of pointers to `nullptr` and tests of presence of `optional<>` values with `.has_value()` in the predicate expressions of control flow statements, but prefer them to implicit conversions to `bool` when initializing `bool` variables and arguments, and to the use of the idiom `!!`. +(But please use `.has_value()` or `.value_or()` with `optional<bool>` +to avoid a common pitfall or the appearance of having fallen into it.) #### Classes 1. Define POD structures with `struct`. diff --git a/flang/include/flang/Evaluate/common.h b/flang/include/flang/Evaluate/common.h index fb800c6..0263f15 100644 --- a/flang/include/flang/Evaluate/common.h +++ b/flang/include/flang/Evaluate/common.h @@ -231,14 +231,20 @@ public: : messages_{that.messages_}, defaults_{that.defaults_}, intrinsics_{that.intrinsics_}, targetCharacteristics_{that.targetCharacteristics_}, - pdtInstance_{that.pdtInstance_}, impliedDos_{that.impliedDos_}, + pdtInstance_{that.pdtInstance_}, + analyzingPDTComponentKindSelector_{ + that.analyzingPDTComponentKindSelector_}, + impliedDos_{that.impliedDos_}, languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_} { } FoldingContext( const FoldingContext &that, const parser::ContextualMessages &m) : messages_{m}, defaults_{that.defaults_}, intrinsics_{that.intrinsics_}, targetCharacteristics_{that.targetCharacteristics_}, - pdtInstance_{that.pdtInstance_}, impliedDos_{that.impliedDos_}, + pdtInstance_{that.pdtInstance_}, + analyzingPDTComponentKindSelector_{ + that.analyzingPDTComponentKindSelector_}, + impliedDos_{that.impliedDos_}, languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_} { } @@ -248,6 +254,9 @@ public: return defaults_; } const semantics::DerivedTypeSpec *pdtInstance() const { return pdtInstance_; } + bool analyzingPDTComponentKindSelector() const { + return analyzingPDTComponentKindSelector_; + } const IntrinsicProcTable &intrinsics() const { return intrinsics_; } const TargetCharacteristics &targetCharacteristics() const { return targetCharacteristics_; @@ -290,6 +299,10 @@ public: return common::ScopedSet(pdtInstance_, nullptr); } + common::Restorer<bool> AnalyzingPDTComponentKindSelector() { + return common::ScopedSet(analyzingPDTComponentKindSelector_, true); + } + parser::CharBlock SaveTempName(std::string &&name) { return {*tempNames_.emplace(std::move(name)).first}; } @@ -300,6 +313,7 @@ private: const IntrinsicProcTable &intrinsics_; const TargetCharacteristics &targetCharacteristics_; const semantics::DerivedTypeSpec *pdtInstance_{nullptr}; + bool analyzingPDTComponentKindSelector_{false}; std::optional<parser::CharBlock> moduleFileName_; std::map<parser::CharBlock, ConstantSubscript> impliedDos_; const common::LanguageFeatureControl &languageFeatures_; diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 320f913..695221c 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -208,6 +208,7 @@ struct IntrinsicLibrary { fir::ExtendedValue genAssociated(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); mlir::Value genAtand(mlir::Type, llvm::ArrayRef<mlir::Value>); + void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genBesselJn(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genBesselYn(mlir::Type, diff --git a/flang/include/flang/Semantics/expression.h b/flang/include/flang/Semantics/expression.h index 30f5dfd..95c97f2 100644 --- a/flang/include/flang/Semantics/expression.h +++ b/flang/include/flang/Semantics/expression.h @@ -535,6 +535,7 @@ public: return true; } void Post(const parser::ComponentDefStmt &) { inComponentDefStmt_ = false; } + bool Pre(const parser::KindSelector &) { return !inComponentDefStmt_; } bool Pre(const parser::Initialization &x) { // Default component initialization expressions (but not DATA-like ones // as in DEC STRUCTUREs) were already analyzed in name resolution diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 98e2a5f..77f567e 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -510,6 +510,11 @@ public: const std::list<SourceName> &componentNames() const { return componentNames_; } + const std::map<SourceName, const parser::Expr *> & + originalKindParameterMap() const { + return originalKindParameterMap_; + } + void add_originalKindParameter(SourceName, const parser::Expr *); // If this derived type extends another, locate the parent component's symbol. const Symbol *GetParentComponent(const Scope &) const; @@ -538,6 +543,8 @@ private: bool sequence_{false}; bool isDECStructure_{false}; bool isForwardReferenced_{false}; + std::map<SourceName, const parser::Expr *> originalKindParameterMap_; + friend llvm::raw_ostream &operator<<( llvm::raw_ostream &, const DerivedTypeDetails &); }; diff --git a/flang/include/flang/Semantics/type.h b/flang/include/flang/Semantics/type.h index 3bd638b..87583a0 100644 --- a/flang/include/flang/Semantics/type.h +++ b/flang/include/flang/Semantics/type.h @@ -120,7 +120,9 @@ public: bool IsEquivalentInInterface(const ParamValue &that) const { return (category_ == that.category_ && expr_.has_value() == that.expr_.has_value() && - (!expr_ || evaluate::AreEquivalentInInterface(*expr_, *that.expr_))); + (!expr_ || + evaluate::AreEquivalentInInterface(*expr_, *that.expr_) + .value_or(false))); } std::string AsFortran() const; diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp index 647eebaaa0..839717d 100644 --- a/flang/lib/Evaluate/check-expression.cpp +++ b/flang/lib/Evaluate/check-expression.cpp @@ -1304,10 +1304,12 @@ std::optional<bool> IsContiguous(const A &x, FoldingContext &context, std::optional<bool> IsContiguous(const ActualArgument &actual, FoldingContext &fc, bool namedConstantSectionsAreContiguous, bool firstDimensionStride1) { - auto *expr{actual.UnwrapExpr()}; - return expr && - IsContiguous( - *expr, fc, namedConstantSectionsAreContiguous, firstDimensionStride1); + if (auto *expr{actual.UnwrapExpr()}) { + return IsContiguous( + *expr, fc, namedConstantSectionsAreContiguous, firstDimensionStride1); + } else { + return std::nullopt; + } } template std::optional<bool> IsContiguous(const Expr<SomeType> &, diff --git a/flang/lib/Evaluate/fold-logical.cpp b/flang/lib/Evaluate/fold-logical.cpp index 449c316..457b2f6 100644 --- a/flang/lib/Evaluate/fold-logical.cpp +++ b/flang/lib/Evaluate/fold-logical.cpp @@ -799,22 +799,20 @@ Expr<Type<TypeCategory::Logical, KIND>> FoldIntrinsicFunction( } } else if (name == "is_contiguous") { if (args.at(0)) { - auto warnContiguous{[&]() { - if (auto source{args[0]->sourceLocation()}) { - context.Warn(common::UsageWarning::ConstantIsContiguous, *source, - "is_contiguous() is always true for named constants and subobjects of named constants"_warn_en_US); - } - }}; + std::optional<bool> knownContiguous; if (auto *expr{args[0]->UnwrapExpr()}) { - if (auto contiguous{IsContiguous(*expr, context)}) { - warnContiguous(); - return Expr<T>{*contiguous}; - } + knownContiguous = IsContiguous(*expr, context); } else if (auto *assumedType{args[0]->GetAssumedTypeDummy()}) { - if (auto contiguous{IsContiguous(*assumedType, context)}) { - warnContiguous(); - return Expr<T>{*contiguous}; + knownContiguous = IsContiguous(*assumedType, context); + } + if (knownContiguous) { + if (*knownContiguous) { + if (auto source{args[0]->sourceLocation()}) { + context.Warn(common::UsageWarning::ConstantIsContiguous, *source, + "is_contiguous() is always true for named constants and subobjects of named constants"_warn_en_US); + } } + return Expr<T>{*knownContiguous}; } } } else if (name == "is_iostat_end") { diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index fe679da..f204eef 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -2515,7 +2515,8 @@ std::optional<SpecificCall> IntrinsicInterface::Match( CHECK(kindDummyArg); CHECK(result.categorySet == CategorySet{*category}); if (kindArg) { - if (auto *expr{kindArg->UnwrapExpr()}) { + auto *expr{kindArg->UnwrapExpr()}; + if (expr) { CHECK(expr->Rank() == 0); if (auto code{ToInt64(Fold(context, common::Clone(*expr)))}) { if (context.targetCharacteristics().IsTypeEnabled( @@ -2529,8 +2530,16 @@ std::optional<SpecificCall> IntrinsicInterface::Match( } } } - messages.Say( - "'kind=' argument must be a constant scalar integer whose value is a supported kind for the intrinsic result type"_err_en_US); + if (context.analyzingPDTComponentKindSelector() && expr && + IsConstantExpr(*expr)) { + // Don't emit an error about a KIND= actual argument value when + // processing a kind selector in a PDT component declaration before + // it is instantianted, so long as it's a constant expression. + // It will be renanalyzed later during instantiation. + } else { + messages.Say( + "'kind=' argument must be a constant scalar integer whose value is a supported kind for the intrinsic result type"_err_en_US); + } // use default kind below for error recovery } else if (kindDummyArg->flags.test(ArgFlag::defaultsToSameKind)) { CHECK(sameArg); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index de7694f..2c21868 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -346,6 +346,10 @@ static constexpr IntrinsicHandler handlers[]{ &I::genVoteSync<mlir::NVVM::VoteSyncKind::ballot>, {{{"mask", asValue}, {"pred", asValue}}}, /*isElemental=*/false}, + {"barrier_init", + &I::genBarrierInit, + {{{"barrier", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, {"bessel_jn", &I::genBesselJn, {{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}}, @@ -3176,6 +3180,22 @@ IntrinsicLibrary::genAssociated(mlir::Type resultType, return fir::runtime::genAssociated(builder, loc, pointerBox, targetBox); } +// BARRIER_INIT (CUDA) +void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 2); + auto llvmPtr = fir::ConvertOp::create( + builder, loc, mlir::LLVM::LLVMPointerType::get(builder.getContext()), + fir::getBase(args[0])); + auto addrCast = mlir::LLVM::AddrSpaceCastOp::create( + builder, loc, + mlir::LLVM::LLVMPointerType::get( + builder.getContext(), + static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared)), + llvmPtr); + mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, addrCast, + fir::getBase(args[1]), {}); +} + // BESSEL_JN fir::ExtendedValue IntrinsicLibrary::genBesselJn(mlir::Type resultType, diff --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp index 03952da9..265e268 100644 --- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -2383,7 +2383,7 @@ PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType, auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; - mlir::vector::SplatOp splatOp{nullptr}; + mlir::vector::BroadcastOp splatOp{nullptr}; mlir::Type retTy{nullptr}; switch (vop) { case VecOp::Splat: { @@ -2391,9 +2391,9 @@ PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType, auto vecTyInfo{getVecTypeFromFir(argBases[0])}; auto extractOp{genVecExtract(resultType, args)}; - splatOp = - mlir::vector::SplatOp::create(builder, loc, *(extractOp.getUnboxed()), - vecTyInfo.toMlirVectorType(context)); + splatOp = mlir::vector::BroadcastOp::create( + builder, loc, vecTyInfo.toMlirVectorType(context), + *(extractOp.getUnboxed())); retTy = vecTyInfo.toFirVectorType(); break; } @@ -2401,8 +2401,8 @@ PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType, assert(args.size() == 1); auto vecTyInfo{getVecTypeFromEle(argBases[0])}; - splatOp = mlir::vector::SplatOp::create( - builder, loc, argBases[0], vecTyInfo.toMlirVectorType(context)); + splatOp = mlir::vector::BroadcastOp::create( + builder, loc, vecTyInfo.toMlirVectorType(context), argBases[0]); retTy = vecTyInfo.toFirVectorType(); break; } @@ -2412,8 +2412,8 @@ PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType, auto intOp{builder.createConvert(loc, eleTy, argBases[0])}; // the intrinsic always returns vector(integer(4)) - splatOp = mlir::vector::SplatOp::create(builder, loc, intOp, - mlir::VectorType::get(4, eleTy)); + splatOp = mlir::vector::BroadcastOp::create( + builder, loc, mlir::VectorType::get(4, eleTy), intOp); retTy = fir::VectorType::get(4, eleTy); break; } @@ -2444,7 +2444,8 @@ PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType, auto addrConv{fir::ConvertOp::create(builder, loc, i64RefTy, addr)}; auto addrVal{fir::LoadOp::create(builder, loc, addrConv)}; - auto splatRes{mlir::vector::SplatOp::create(builder, loc, addrVal, i64VecTy)}; + auto splatRes{ + mlir::vector::BroadcastOp::create(builder, loc, i64VecTy, addrVal)}; mlir::Value result{nullptr}; if (mlirTy != splatRes.getType()) { diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 81c53aa..e4d2a0d 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -185,7 +185,8 @@ static void CheckCharacterActual(evaluate::Expr<evaluate::SomeType> &actual, } else if (static_cast<std::size_t>(actualOffset->offset()) >= actualOffset->symbol().size() || !evaluate::IsContiguous( - actualOffset->symbol(), foldingContext)) { + actualOffset->symbol(), foldingContext) + .value_or(false)) { // If substring, take rest of substring if (*actualLength > 0) { actualChars -= @@ -598,7 +599,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, context.IsEnabled( common::LanguageFeature::ContiguousOkForSeqAssociation) && actualLastSymbol && - evaluate::IsContiguous(*actualLastSymbol, foldingContext)}; + evaluate::IsContiguous(*actualLastSymbol, foldingContext) + .value_or(false)}; if (actualIsArrayElement && actualLastSymbol && !dummy.ignoreTKR.test(common::IgnoreTKR::Contiguous)) { if (IsPointer(*actualLastSymbol)) { @@ -663,7 +665,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, } else if (static_cast<std::size_t>(actualOffset->offset()) >= actualOffset->symbol().size() || !evaluate::IsContiguous( - actualOffset->symbol(), foldingContext)) { + actualOffset->symbol(), foldingContext) + .value_or(false)) { actualElements = 1; } else if (auto actualSymType{evaluate::DynamicType::From( actualOffset->symbol())}) { @@ -1566,10 +1569,10 @@ static bool CheckElementalConformance(parser::ContextualMessages &messages, ") corresponding to dummy argument #" + std::to_string(index) + " ('" + dummy.name + "')"}; if (shape) { - auto tristate{evaluate::CheckConformance(messages, *shape, - *argShape, evaluate::CheckConformanceFlags::None, - shapeName.c_str(), argName.c_str())}; - if (tristate && !*tristate) { + if (!evaluate::CheckConformance(messages, *shape, *argShape, + evaluate::CheckConformanceFlags::None, shapeName.c_str(), + argName.c_str()) + .value_or(true)) { return false; } } else { diff --git a/flang/lib/Semantics/check-data.cpp b/flang/lib/Semantics/check-data.cpp index d6f1351..5459290 100644 --- a/flang/lib/Semantics/check-data.cpp +++ b/flang/lib/Semantics/check-data.cpp @@ -257,9 +257,7 @@ void DataChecker::Leave(const parser::DataStmtSet &set) { currentSetHasFatalErrors_ = false; } -// Handle legacy DATA-style initialization, e.g. REAL PI/3.14159/, for -// variables and components (esp. for DEC STRUCTUREs) -template <typename A> void DataChecker::LegacyDataInit(const A &decl) { +void DataChecker::Leave(const parser::EntityDecl &decl) { if (const auto &init{ std::get<std::optional<parser::Initialization>>(decl.t)}) { const Symbol *name{std::get<parser::Name>(decl.t).symbol}; @@ -272,14 +270,6 @@ template <typename A> void DataChecker::LegacyDataInit(const A &decl) { } } -void DataChecker::Leave(const parser::ComponentDecl &decl) { - LegacyDataInit(decl); -} - -void DataChecker::Leave(const parser::EntityDecl &decl) { - LegacyDataInit(decl); -} - void DataChecker::CompileDataInitializationsIntoInitializers() { ConvertToInitializers(inits_, exprAnalyzer_); } diff --git a/flang/lib/Semantics/check-data.h b/flang/lib/Semantics/check-data.h index 479d325..8cd2ac9 100644 --- a/flang/lib/Semantics/check-data.h +++ b/flang/lib/Semantics/check-data.h @@ -37,10 +37,7 @@ public: void Enter(const parser::DataImpliedDo &); void Leave(const parser::DataImpliedDo &); void Leave(const parser::DataStmtSet &); - // These cases are for legacy DATA-like /initializations/ - void Leave(const parser::ComponentDecl &); void Leave(const parser::EntityDecl &); - // After all DATA statements have been processed, converts their // initializations into per-symbol static initializers. void CompileDataInitializationsIntoInitializers(); diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 7593424..ea5e2c0 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -1984,9 +1984,8 @@ bool CheckHelper::CheckDistinguishableFinals(const Symbol &f1, const Procedure *p1{Characterize(f1)}; const Procedure *p2{Characterize(f2)}; if (p1 && p2) { - std::optional<bool> areDistinct{characteristics::Distinguishable( - context_.languageFeatures(), *p1, *p2)}; - if (areDistinct.value_or(false)) { + if (characteristics::Distinguishable(context_.languageFeatures(), *p1, *p2) + .value_or(false)) { return true; } if (auto *msg{messages_.Say(f1Name, diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index c0c41c1..d65a89e 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -5085,7 +5085,7 @@ void OmpStructureChecker::CheckWorkdistributeBlockStmts( } void OmpStructureChecker::CheckIfContiguous(const parser::OmpObject &object) { - if (auto contig{IsContiguous(context_, object)}; contig && !*contig) { + if (!IsContiguous(context_, object).value_or(true)) { // known discontiguous const parser::Name *name{GetObjectName(object)}; assert(name && "Expecting name component"); context_.Say(name->source, diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index fc26888..2feec98 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -2171,17 +2171,29 @@ MaybeExpr ExpressionAnalyzer::CheckStructureConstructor( // T(1) or T(PT=PT(1)). There may be multiple parent components. if (nextAnonymous == components.begin() && parentComponent && valueType && context().IsEnabled(LanguageFeature::AnonymousParents)) { - for (auto parent{components.begin()}; - parent != afterLastParentComponentIter; ++parent) { - if (auto parentType{DynamicType::From(*parent)}; parentType && - parent->test(Symbol::Flag::ParentComp) && - valueType->IsEquivalentTo(*parentType)) { - symbol = &*parent; - nextAnonymous = ++parent; - Warn(LanguageFeature::AnonymousParents, source, - "Whole parent component '%s' in structure constructor should not be anonymous"_port_en_US, - symbol->name()); - break; + auto parent{components.begin()}; + if (!parent->test(Symbol::Flag::ParentComp)) { + // Ensure that the first value can't initialize the first actual + // component. + if (auto firstComponentType{DynamicType::From(*parent)}) { + if (firstComponentType->IsTkCompatibleWith(*valueType) && + value.Rank() == parent->Rank()) { + parent = afterLastParentComponentIter; // skip next loop + } + } + } + for (; parent != afterLastParentComponentIter; ++parent) { + if (auto parentType{DynamicType::From(*parent)}) { + if (parent->test(Symbol::Flag::ParentComp) && + valueType->IsEquivalentTo(*parentType) && + value.Rank() == 0 /* scalar only */) { + symbol = &*parent; + nextAnonymous = ++parent; + Warn(LanguageFeature::AnonymousParents, source, + "Whole parent component '%s' in structure constructor should not be anonymous"_port_en_US, + symbol->name()); + break; + } } } } @@ -2317,7 +2329,7 @@ MaybeExpr ExpressionAnalyzer::CheckStructureConstructor( auto checked{CheckConformance(messages, *componentShape, *valueShape, CheckConformanceFlags::RightIsExpandableDeferred, "component", "value")}; - if (checked && *checked && GetRank(*componentShape) > 0 && + if (checked.value_or(false) && GetRank(*componentShape) > 0 && GetRank(*valueShape) == 0 && (IsDeferredShape(*symbol) || !IsExpandableScalar(*converted, foldingContext, diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index b7c7603d..86121880 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -7,6 +7,7 @@ #include "resolve-names.h" #include "assignment.h" +#include "data-to-inits.h" #include "definable.h" #include "mod-file.h" #include "pointer-assignment.h" @@ -357,6 +358,7 @@ protected: DeclTypeSpec::Category category{DeclTypeSpec::TypeDerived}; } derived; bool allowForwardReferenceToDerivedType{false}; + const parser::Expr *originalKindParameter{nullptr}; }; bool allowForwardReferenceToDerivedType() const { @@ -365,8 +367,10 @@ protected: void set_allowForwardReferenceToDerivedType(bool yes) { state_.allowForwardReferenceToDerivedType = yes; } + void set_inPDTDefinition(bool yes) { inPDTDefinition_ = yes; } - const DeclTypeSpec *GetDeclTypeSpec(); + const DeclTypeSpec *GetDeclTypeSpec() const; + const parser::Expr *GetOriginalKindParameter() const; void BeginDeclTypeSpec(); void EndDeclTypeSpec(); void SetDeclTypeSpec(const DeclTypeSpec &); @@ -380,6 +384,7 @@ protected: private: State state_; + bool inPDTDefinition_{false}; void MakeNumericType(TypeCategory, int kind); }; @@ -1081,8 +1086,12 @@ public: const parser::Name &, const parser::InitialDataTarget &); void PointerInitialization( const parser::Name &, const parser::ProcPointerInit &); + bool CheckNonPointerInitialization( + const parser::Name &, bool inLegacyDataInitialization); void NonPointerInitialization( const parser::Name &, const parser::ConstantExpr &); + void LegacyDataInitialization(const parser::Name &, + const std::list<common::Indirection<parser::DataStmtValue>> &values); void CheckExplicitInterface(const parser::Name &); void CheckBindings(const parser::TypeBoundProcedureStmt::WithoutInterface &); @@ -2454,9 +2463,12 @@ bool AttrsVisitor::Pre(const common::CUDADataAttr x) { // DeclTypeSpecVisitor implementation -const DeclTypeSpec *DeclTypeSpecVisitor::GetDeclTypeSpec() { +const DeclTypeSpec *DeclTypeSpecVisitor::GetDeclTypeSpec() const { return state_.declTypeSpec; } +const parser::Expr *DeclTypeSpecVisitor::GetOriginalKindParameter() const { + return state_.originalKindParameter; +} void DeclTypeSpecVisitor::BeginDeclTypeSpec() { CHECK(!state_.expectDeclTypeSpec); @@ -2541,6 +2553,21 @@ void DeclTypeSpecVisitor::SetDeclTypeSpec(const DeclTypeSpec &declTypeSpec) { KindExpr DeclTypeSpecVisitor::GetKindParamExpr( TypeCategory category, const std::optional<parser::KindSelector> &kind) { + if (inPDTDefinition_) { + if (category != TypeCategory::Derived && kind) { + if (const auto *expr{ + std::get_if<parser::ScalarIntConstantExpr>(&kind->u)}) { + CHECK(!state_.originalKindParameter); + // Save a pointer to the KIND= expression in the parse tree + // in case we need to reanalyze it during PDT instantiation. + state_.originalKindParameter = &expr->thing.thing.thing.value(); + } + } + // Inhibit some errors now that will be caught later during instantiations. + auto restorer{ + context().foldingContext().AnalyzingPDTComponentKindSelector()}; + return AnalyzeKindSelector(context(), category, kind); + } return AnalyzeKindSelector(context(), category, kind); } @@ -6410,6 +6437,7 @@ bool DeclarationVisitor::Pre(const parser::DerivedTypeDef &x) { details.set_isForwardReferenced(false); derivedTypeInfo_ = {}; PopScope(); + set_inPDTDefinition(false); return false; } @@ -6437,6 +6465,10 @@ void DeclarationVisitor::Post(const parser::DerivedTypeStmt &x) { // component without producing spurious errors about already // existing. const Symbol &extendsSymbol{extendsType->typeSymbol()}; + if (extendsSymbol.scope() && + extendsSymbol.scope()->IsParameterizedDerivedType()) { + set_inPDTDefinition(true); + } auto restorer{common::ScopedSet(extendsName->symbol, nullptr)}; if (OkToAddComponent(*extendsName, &extendsSymbol)) { auto &comp{DeclareEntity<ObjectEntityDetails>(*extendsName, Attrs{})}; @@ -6455,8 +6487,12 @@ void DeclarationVisitor::Post(const parser::DerivedTypeStmt &x) { } // Create symbols now for type parameters so that they shadow names // from the enclosing specification part. + const auto ¶mNames{std::get<std::list<parser::Name>>(x.t)}; + if (!paramNames.empty()) { + set_inPDTDefinition(true); + } if (auto *details{symbol.detailsIf<DerivedTypeDetails>()}) { - for (const auto &name : std::get<std::list<parser::Name>>(x.t)) { + for (const auto &name : paramNames) { if (Symbol * symbol{MakeTypeSymbol(name, TypeParamDetails{})}) { details->add_paramNameOrder(*symbol); } @@ -6544,8 +6580,7 @@ void DeclarationVisitor::Post(const parser::ComponentDecl &x) { if (const auto *derived{declType->AsDerived()}) { if (!attrs.HasAny({Attr::POINTER, Attr::ALLOCATABLE})) { if (derivedTypeInfo_.type == &derived->typeSymbol()) { // C744 - Say("Recursive use of the derived type requires " - "POINTER or ALLOCATABLE"_err_en_US); + Say("Recursive use of the derived type requires POINTER or ALLOCATABLE"_err_en_US); } } } @@ -6558,7 +6593,11 @@ void DeclarationVisitor::Post(const parser::ComponentDecl &x) { Initialization(name, *init, /*inComponentDecl=*/true); } } - currScope().symbol()->get<DerivedTypeDetails>().add_component(symbol); + auto &details{currScope().symbol()->get<DerivedTypeDetails>()}; + details.add_component(symbol); + if (const parser::Expr *kindExpr{GetOriginalKindParameter()}) { + details.add_originalKindParameter(name.source, kindExpr); + } } ClearArraySpec(); ClearCoarraySpec(); @@ -8995,6 +9034,14 @@ void DeclarationVisitor::Initialization(const parser::Name &name, ultimate.set(Symbol::Flag::InDataStmt); } }, + [&](const std::list<Indirection<parser::DataStmtValue>> &values) { + Walk(values); + if (inComponentDecl) { + LegacyDataInitialization(name, values); + } else { + ultimate.set(Symbol::Flag::InDataStmt); + } + }, [&](const parser::NullInit &null) { // => NULL() Walk(null); if (auto nullInit{EvaluateExpr(null)}) { @@ -9028,11 +9075,6 @@ void DeclarationVisitor::Initialization(const parser::Name &name, ultimate.set(Symbol::Flag::InDataStmt); } }, - [&](const std::list<Indirection<parser::DataStmtValue>> &values) { - // Handled later in data-to-inits conversion - ultimate.set(Symbol::Flag::InDataStmt); - Walk(values); - }, }, init.u); } @@ -9103,36 +9145,82 @@ void DeclarationVisitor::PointerInitialization( } } -void DeclarationVisitor::NonPointerInitialization( - const parser::Name &name, const parser::ConstantExpr &expr) { +bool DeclarationVisitor::CheckNonPointerInitialization( + const parser::Name &name, bool inLegacyDataInitialization) { if (!context().HasError(name.symbol)) { Symbol &ultimate{name.symbol->GetUltimate()}; if (!context().HasError(ultimate)) { - if (IsPointer(ultimate)) { + if (IsPointer(ultimate) && !inLegacyDataInitialization) { Say(name, "'%s' is a pointer but is not initialized like one"_err_en_US); } else if (auto *details{ultimate.detailsIf<ObjectEntityDetails>()}) { if (details->init()) { SayWithDecl(name, *name.symbol, "'%s' has already been initialized"_err_en_US); - } else if (details->isCDefined()) { - context().Warn(common::UsageWarning::CdefinedInit, name.source, - "CDEFINED variable should not have an initializer"_warn_en_US); } else if (IsAllocatable(ultimate)) { Say(name, "Allocatable object '%s' cannot be initialized"_err_en_US); - } else if (ultimate.owner().IsParameterizedDerivedType()) { - // Save the expression for per-instantiation analysis. - details->set_unanalyzedPDTComponentInit(&expr.thing.value()); - } else if (MaybeExpr folded{EvaluateNonPointerInitializer( - ultimate, expr, expr.thing.value().source)}) { - details->set_init(std::move(*folded)); - ultimate.set(Symbol::Flag::InDataStmt, false); + } else { + if (details->isCDefined()) { + context().Warn(common::UsageWarning::CdefinedInit, name.source, + "CDEFINED variable should not have an initializer"_warn_en_US); + } + return true; } } else { Say(name, "'%s' is not an object that can be initialized"_err_en_US); } } } + return false; +} + +void DeclarationVisitor::NonPointerInitialization( + const parser::Name &name, const parser::ConstantExpr &expr) { + if (CheckNonPointerInitialization( + name, /*inLegacyDataInitialization=*/false)) { + Symbol &ultimate{name.symbol->GetUltimate()}; + auto &details{ultimate.get<ObjectEntityDetails>()}; + if (ultimate.owner().IsParameterizedDerivedType()) { + // Save the expression for per-instantiation analysis. + details.set_unanalyzedPDTComponentInit(&expr.thing.value()); + } else if (MaybeExpr folded{EvaluateNonPointerInitializer( + ultimate, expr, expr.thing.value().source)}) { + details.set_init(std::move(*folded)); + ultimate.set(Symbol::Flag::InDataStmt, false); + } + } +} + +void DeclarationVisitor::LegacyDataInitialization(const parser::Name &name, + const std::list<common::Indirection<parser::DataStmtValue>> &values) { + if (CheckNonPointerInitialization( + name, /*inLegacyDataInitialization=*/true)) { + Symbol &ultimate{name.symbol->GetUltimate()}; + if (ultimate.owner().IsParameterizedDerivedType()) { + Say(name, + "Component '%s' in a parameterized data type may not be initialized with a legacy DATA-style value list"_err_en_US, + name.source); + } else { + evaluate::ExpressionAnalyzer exprAnalyzer{context()}; + for (const auto &value : values) { + exprAnalyzer.Analyze(value.value()); + } + DataInitializations inits; + auto oldSize{ultimate.size()}; + if (auto chars{evaluate::characteristics::TypeAndShape::Characterize( + ultimate, GetFoldingContext())}) { + if (auto size{evaluate::ToInt64( + chars->MeasureSizeInBytes(GetFoldingContext()))}) { + // Temporarily set the byte size of the component so that we don't + // get bogus "initialization out of range" errors below. + ultimate.set_size(*size); + } + } + AccumulateDataInitializations(inits, exprAnalyzer, ultimate, values); + ConvertToInitializers(inits, exprAnalyzer); + ultimate.set_size(oldSize); + } + } } void ResolveNamesVisitor::HandleCall( @@ -10482,12 +10570,16 @@ private: if (const auto *target{ std::get_if<parser::InitialDataTarget>(&init->u)}) { resolver_.PointerInitialization(name, *target); - } else if (const auto *expr{ - std::get_if<parser::ConstantExpr>(&init->u)}) { - if (name.symbol) { - if (const auto *object{name.symbol->detailsIf<ObjectEntityDetails>()}; - !object || !object->init()) { + } else if (name.symbol) { + if (const auto *object{name.symbol->detailsIf<ObjectEntityDetails>()}; + !object || !object->init()) { + if (const auto *expr{std::get_if<parser::ConstantExpr>(&init->u)}) { resolver_.NonPointerInitialization(name, *expr); + } else { + // Don't check legacy DATA /initialization/ here. Component + // initializations will have already been handled, and variable + // initializations need to be done in DATA checking so that + // EQUIVALENCE storage association can be handled. } } } diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index 6152f61..69169469 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -769,6 +769,11 @@ void DerivedTypeDetails::add_component(const Symbol &symbol) { componentNames_.push_back(symbol.name()); } +void DerivedTypeDetails::add_originalKindParameter( + SourceName name, const parser::Expr *expr) { + originalKindParameterMap_.emplace(name, expr); +} + const Symbol *DerivedTypeDetails::GetParentComponent(const Scope &scope) const { if (auto extends{GetParentComponentName()}) { if (auto iter{scope.find(*extends)}; iter != scope.cend()) { diff --git a/flang/lib/Semantics/type.cpp b/flang/lib/Semantics/type.cpp index 69e6ffa..dba15e6 100644 --- a/flang/lib/Semantics/type.cpp +++ b/flang/lib/Semantics/type.cpp @@ -443,9 +443,9 @@ void InstantiateHelper::InstantiateComponents(const Scope &fromScope) { // Walks a parsed expression to prepare it for (re)analysis; // clears out the typedExpr analysis results and re-resolves // symbol table pointers of type parameters. -class ComponentInitResetHelper { +class ResetHelper { public: - explicit ComponentInitResetHelper(Scope &scope) : scope_{scope} {} + explicit ResetHelper(Scope &scope) : scope_{scope} {} template <typename A> bool Pre(const A &) { return true; } @@ -498,7 +498,7 @@ void InstantiateHelper::InstantiateComponent(const Symbol &oldSymbol) { } if (const auto *parsedExpr{details->unanalyzedPDTComponentInit()}) { // Analyze the parsed expression in this PDT instantiation context. - ComponentInitResetHelper resetter{scope_}; + ResetHelper resetter{scope_}; parser::Walk(*parsedExpr, resetter); auto restorer{foldingContext().messages().SetLocation(newSymbol.name())}; details->set_init(evaluate::Fold( @@ -564,16 +564,44 @@ static ParamValue FoldCharacterLength(evaluate::FoldingContext &foldingContext, // Apply type parameter values to an intrinsic type spec. const DeclTypeSpec &InstantiateHelper::InstantiateIntrinsicType( SourceName symbolName, const DeclTypeSpec &spec) { + const parser::Expr *originalKindExpr{nullptr}; + if (const DerivedTypeSpec *derived{scope_.derivedTypeSpec()}) { + if (const auto *details{derived->originalTypeSymbol() + .GetUltimate() + .detailsIf<DerivedTypeDetails>()}) { + const auto &originalKindMap{details->originalKindParameterMap()}; + if (auto iter{originalKindMap.find(symbolName)}; + iter != originalKindMap.end()) { + originalKindExpr = iter->second; + } + } + } const IntrinsicTypeSpec &intrinsic{DEREF(spec.AsIntrinsic())}; - if (spec.category() != DeclTypeSpec::Character && + if (spec.category() != DeclTypeSpec::Character && !originalKindExpr && evaluate::IsActuallyConstant(intrinsic.kind())) { return spec; // KIND is already a known constant } // The expression was not originally constant, but now it must be so // in the context of a parameterized derived type instantiation. - KindExpr copy{Fold(common::Clone(intrinsic.kind()))}; + std::optional<KindExpr> kindExpr; + if (originalKindExpr) { + ResetHelper resetter{scope_}; + parser::Walk(*originalKindExpr, resetter); + auto restorer{foldingContext().messages().DiscardMessages()}; + if (MaybeExpr analyzed{AnalyzeExpr(scope_.context(), *originalKindExpr)}) { + if (auto *intExpr{evaluate::UnwrapExpr<SomeIntExpr>(*analyzed)}) { + kindExpr = evaluate::ConvertToType<evaluate::SubscriptInteger>( + std::move(*intExpr)); + } + } + } + if (!kindExpr) { + kindExpr = KindExpr{intrinsic.kind()}; + CHECK(kindExpr.has_value()); + } + KindExpr folded{Fold(std::move(*kindExpr))}; int kind{context().GetDefaultKind(intrinsic.category())}; - if (auto value{evaluate::ToInt64(copy)}) { + if (auto value{evaluate::ToInt64(folded)}) { if (foldingContext().targetCharacteristics().IsTypeEnabled( intrinsic.category(), *value)) { kind = *value; @@ -586,7 +614,7 @@ const DeclTypeSpec &InstantiateHelper::InstantiateIntrinsicType( } else { std::string exprString; llvm::raw_string_ostream sstream(exprString); - copy.AsFortran(sstream); + folded.AsFortran(sstream); foldingContext().messages().Say(symbolName, "KIND parameter expression (%s) of intrinsic type %s did not resolve to a constant value"_err_en_US, exprString, diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index 1598c64..4f552dcf 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -1987,6 +1987,13 @@ implicit none end function end interface + interface + attributes(device) subroutine barrier_init(barrier, count) + integer(8) :: barrier + integer(4) :: count + end subroutine + end interface + contains attributes(device) subroutine syncthreads() diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 5e1f6b6..cdb337b 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -392,28 +392,17 @@ end subroutine ! CHECK: %{{.*}} = nvvm.vote.sync any %{{.*}}, %{{.*}} -> i1 ! CHECK: %{{.*}} = nvvm.vote.sync ballot %{{.*}}, %{{.*}} -> i32 -! CHECK-DAG: func.func private @__ldca_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldcg_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldcs_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldlu_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldcv_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) -! CHECK-DAG: func.func private @__ldca_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldcg_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldcs_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldlu_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldcv_i8x2_(!fir.ref<!fir.array<2xi64>>, !fir.ref<!fir.array<2xi64>>) -! CHECK-DAG: func.func private @__ldca_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldcg_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldcs_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldlu_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldcv_r4x4_(!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>) -! CHECK-DAG: func.func private @__ldca_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldcg_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldcs_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldlu_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldcv_r2x2_(!fir.ref<!fir.array<2xf16>>, !fir.ref<!fir.array<2xf16>>) -! CHECK-DAG: func.func private @__ldca_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) -! CHECK-DAG: func.func private @__ldcg_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) -! CHECK-DAG: func.func private @__ldcs_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) -! CHECK-DAG: func.func private @__ldlu_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) -! CHECK-DAG: func.func private @__ldcv_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>) +attributes(global) subroutine test_barrier() + integer(8), shared :: barrier + call barrier_init(barrier, 256) +end subroutine + + +! CHECK-LABEL: func.func @_QPtest_barrier() + +! CHECK: %[[SHARED:.*]] = cuf.shared_memory i64 {bindc_name = "barrier", uniq_name = "_QFtest_barrierEbarrier"} -> !fir.ref<i64> +! CHECK: %[[DECL_SHARED:.*]]:2 = hlfir.declare %[[SHARED]] {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_barrierEbarrier"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[COUNT:.*]] = arith.constant 256 : i32 +! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3> +! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32 diff --git a/flang/test/Semantics/bug161989.f90 b/flang/test/Semantics/bug161989.f90 new file mode 100644 index 0000000..6185cde --- /dev/null +++ b/flang/test/Semantics/bug161989.f90 @@ -0,0 +1,28 @@ +!RUN: %python %S/test_errors.py %s %flang_fc1 +program test + real, target :: x + type t1 + integer :: j/1/ + real, pointer :: ap/x/ + end type + type, extends(t1) :: t2 + integer :: k/2/ + end type + type t3(k) + integer, kind :: k + !ERROR: Component 'j' in a parameterized data type may not be initialized with a legacy DATA-style value list + integer :: j/3/ + end type + type t4 + !ERROR: DATA statement set has more values than objects + integer j(1) /4, 5/ + end type + type t5 + integer uninitialized + end type + type(t2), parameter :: x2 = t2() !ok + integer(kind=merge(1,-1,x2%j==1)) tx2j + integer(kind=merge(2,-1,x2%k==2)) tx2k + !ERROR: Structure constructor lacks a value for component 'uninitialized' + type(t5), parameter :: x5 = t5() +end diff --git a/flang/test/Semantics/data21.f90 b/flang/test/Semantics/data21.f90 index 639f784..181ae44 100644 --- a/flang/test/Semantics/data21.f90 +++ b/flang/test/Semantics/data21.f90 @@ -1,6 +1,6 @@ ! RUN: %flang_fc1 -fdebug-dump-symbols %s 2>&1 | FileCheck %s ! Ensure that DATA-like default component initializers work. -! CHECK: j (InDataStmt) size=4 offset=0: ObjectEntity type: INTEGER(4) init:123_4 +! CHECK: j size=4 offset=0: ObjectEntity type: INTEGER(4) init:123_4 type t integer j/123/ end type diff --git a/flang/test/Semantics/kinds03.f90 b/flang/test/Semantics/kinds03.f90 index a15a4a9..ed915bd 100644 --- a/flang/test/Semantics/kinds03.f90 +++ b/flang/test/Semantics/kinds03.f90 @@ -5,7 +5,7 @@ type :: ipdt(k) !REF: /MainProgram1/ipdt/k integer, kind :: k - !REF: /MainProgram1/ipdt/k + !DEF: /MainProgram1/DerivedType9/k TypeParam INTEGER(4) !DEF: /MainProgram1/ipdt/x ObjectEntity INTEGER(int(int(k,kind=4),kind=8)) integer(kind=k) :: x end type ipdt @@ -14,7 +14,7 @@ type :: rpdt(k) !REF: /MainProgram1/rpdt/k integer, kind :: k - !REF: /MainProgram1/rpdt/k + !DEF: /MainProgram1/DerivedType13/k TypeParam INTEGER(4) !DEF: /MainProgram1/rpdt/x ObjectEntity REAL(int(int(k,kind=4),kind=8)) real(kind=k) :: x end type rpdt @@ -23,7 +23,7 @@ type :: zpdt(k) !REF: /MainProgram1/zpdt/k integer, kind :: k - !REF: /MainProgram1/zpdt/k + !DEF: /MainProgram1/DerivedType17/k TypeParam INTEGER(4) !DEF: /MainProgram1/zpdt/x ObjectEntity COMPLEX(int(int(k,kind=4),kind=8)) complex(kind=k) :: x end type zpdt @@ -32,7 +32,7 @@ type :: lpdt(k) !REF: /MainProgram1/lpdt/k integer, kind :: k - !REF: /MainProgram1/lpdt/k + !DEF: /MainProgram1/DerivedType21/k TypeParam INTEGER(4) !DEF: /MainProgram1/lpdt/x ObjectEntity LOGICAL(int(int(k,kind=4),kind=8)) logical(kind=k) :: x end type lpdt diff --git a/flang/test/Semantics/pdt05.f90 b/flang/test/Semantics/pdt05.f90 new file mode 100644 index 0000000..ec6b171 --- /dev/null +++ b/flang/test/Semantics/pdt05.f90 @@ -0,0 +1,24 @@ +!RUN: %flang_fc1 -fdebug-unparse %s | FileCheck %s + +module pdt05 + type base(k1,k2) + integer(1),kind :: k1 + integer(k1),kind :: k2 + integer(kind(int(k1,1)+int(k2,k1))) j + integer(kind(int(k1,1)+int(k2,kind(k2)))) k + end type +end + +use pdt05 +type(base(2,7)) x27 +type(base(8,7)) x87 +print *, 'x27%j', kind(x27%j) +print *, 'x27%k', kind(x27%k) +print *, 'x87%j', kind(x87%j) +print *, 'x87%k', kind(x87%k) +end + +!CHECK: PRINT *, "x27%j", 2_4 +!CHECK: PRINT *, "x27%k", 2_4 +!CHECK: PRINT *, "x87%j", 8_4 +!CHECK: PRINT *, "x87%k", 8_4 diff --git a/flang/test/Semantics/real10-x86-01.f90 b/flang/test/Semantics/real10-x86-01.f90 index ccaf34d..4215de3 100644 --- a/flang/test/Semantics/real10-x86-01.f90 +++ b/flang/test/Semantics/real10-x86-01.f90 @@ -6,7 +6,7 @@ type :: rpdt(k) !REF: /MainProgram1/rpdt/k integer, kind :: k - !REF: /MainProgram1/rpdt/k + !DEF: /MainProgram1/DerivedType3/k TypeParam INTEGER(4) !DEF: /MainProgram1/rpdt/x ObjectEntity REAL(int(int(k,kind=4),kind=8)) real(kind=k) :: x end type rpdt @@ -15,7 +15,7 @@ type :: zpdt(k) !REF: /MainProgram1/zpdt/k integer, kind :: k - !REF: /MainProgram1/zpdt/k + !DEF: /MainProgram1/DerivedType4/k TypeParam INTEGER(4) !DEF: /MainProgram1/zpdt/x ObjectEntity COMPLEX(int(int(k,kind=4),kind=8)) complex(kind=k) :: x end type zpdt diff --git a/flang/test/Semantics/structconst11.f90 b/flang/test/Semantics/structconst11.f90 new file mode 100644 index 0000000..8cf4e6a --- /dev/null +++ b/flang/test/Semantics/structconst11.f90 @@ -0,0 +1,89 @@ +!RUN: %flang_fc1 -fdebug-unparse %s | FileCheck %s +program test + + type t1p + type(t1p), pointer :: arr(:) + end type + type, extends(t1p) :: t1c + end type + type t2p + type(t2p), pointer :: scalar + end type + type, extends(t2p) :: t2c + end type + type t3p + type(t3p), allocatable :: arr(:) + end type + type, extends(t3p) :: t3c + end type + type t4p + type(t4p), allocatable :: scalar + end type + type, extends(t4p) :: t4c + end type + type t5p + class(*), pointer :: arr(:) + end type + type, extends(t5p) :: t5c + end type + type t6p + class(*), pointer :: scalar + end type + type, extends(t6p) :: t6c + end type + type t7p + class(*), allocatable :: arr(:) + end type + type, extends(t7p) :: t7c + end type + type t8p + class(*), allocatable :: scalar + end type + type, extends(t8p) :: t8c + end type + + type(t1p), target :: t1pt(1) + type(t1p), pointer :: t1pp(:) + type(t2p), target :: t2pt + type(t2p), pointer :: t2pp + type(t3p) t3pa(1) + type(t4p) t4ps + + type(t1c) x1 + type(t2c) x2 + type(t3c) x3 + type(t4c) x4 + type(t5c) x5 + type(t6c) x6 + type(t7c) x7 + type(t8c) x8 + +!CHECK: x1=t1c(arr=t1pt) + x1 = t1c(t1pt) +!CHECK: x1=t1c(arr=t1pp) + x1 = t1c(t1pp) +!CHECK: x2=t2c(scalar=t2pt) + x2 = t2c(t2pt) +!CHECK: x2=t2c(scalar=t2pp) + x2 = t2c(t2pp) +!CHECK: x3=t3c(arr=t3pa) + x3 = t3c(t3pa) +!CHECK: x4=t4c(scalar=t4ps) + x4 = t4c(t4ps) +!CHECK: x4=t4c(scalar=t4p(scalar=NULL())) + x4 = t4c(t4p()) +!CHECK: x5=t5c(arr=t1pt) + x5 = t5c(t1pt) +!CHECK: x5=t5c(arr=t1pp) + x5 = t5c(t1pp) +!CHECK: x6=t6c(scalar=t2pt) + x6 = t6c(t2pt) +!CHECK: x6=t6c(scalar=t2pp) + x6 = t6c(t2pp) +!CHECK: x7=t7c(arr=t3pa) + x7 = t7c(t3pa) +!CHECK: x8=t8c(scalar=t4ps) + x8 = t8c(t4ps) +!CHECK: x8=t8c(scalar=t4p(scalar=NULL())) + x8 = t8c(t4p()) +end diff --git a/flang/test/Semantics/symbol17.f90 b/flang/test/Semantics/symbol17.f90 index a0d916e..f5f7222 100644 --- a/flang/test/Semantics/symbol17.f90 +++ b/flang/test/Semantics/symbol17.f90 @@ -79,7 +79,7 @@ type(fwdpdt(kind(0))) function f2(n) type :: fwdpdt(k) !REF: /f2/fwdpdt/k integer, kind :: k - !REF: /f2/fwdpdt/k + !DEF: /f2/DerivedType2/k TypeParam INTEGER(4) !DEF: /f2/fwdpdt/n ObjectEntity INTEGER(int(int(k,kind=4),kind=8)) integer(kind=k) :: n end type @@ -99,7 +99,7 @@ subroutine s2 (q1) type :: fwdpdt(k) !REF: /s2/fwdpdt/k integer, kind :: k - !REF: /s2/fwdpdt/k + !DEF: /s2/DerivedType2/k TypeParam INTEGER(4) !DEF: /s2/fwdpdt/n ObjectEntity INTEGER(int(int(k,kind=4),kind=8)) integer(kind=k) :: n end type @@ -110,31 +110,31 @@ end subroutine !DEF: /m1 Module module m1 !DEF: /m1/forward PRIVATE DerivedType - private :: forward + private :: forward !DEF: /m1/base PUBLIC DerivedType - type :: base + type :: base !REF: /m1/forward !DEF: /m1/base/p POINTER ObjectEntity CLASS(forward) - class(forward), pointer :: p - end type + class(forward), pointer :: p + end type !REF: /m1/base !REF: /m1/forward - type, extends(base) :: forward + type, extends(base) :: forward !DEF: /m1/forward/n ObjectEntity INTEGER(4) - integer :: n - end type - contains + integer :: n + end type +contains !DEF: /m1/test PUBLIC (Subroutine) Subprogram - subroutine test + subroutine test !REF: /m1/forward !DEF: /m1/test/object TARGET ObjectEntity TYPE(forward) - type(forward), target :: object + type(forward), target :: object !REF: /m1/test/object !REF: /m1/base/p - object%p => object + object%p => object !REF: /m1/test/object !REF: /m1/base/p !REF: /m1/forward/n - object%p%n = 666 - end subroutine + object%p%n = 666 + end subroutine end module diff --git a/flang/test/Semantics/type-parameter-constant.f90 b/flang/test/Semantics/type-parameter-constant.f90 index 376bffd..681012c 100644 --- a/flang/test/Semantics/type-parameter-constant.f90 +++ b/flang/test/Semantics/type-parameter-constant.f90 @@ -10,6 +10,6 @@ end type !ERROR: Value of KIND type parameter 'r' must be constant !WARNING: specification expression refers to local object 'six' (initialized and saved) [-Wsaved-local-in-spec-expr] !WARNING: specification expression refers to local object 'twenty_three' (initialized and saved) [-Wsaved-local-in-spec-expr] - type(a(six, twenty_three)) :: a2 + type(a(six, twenty_three)) :: a2 print *, a1%data%kind -end
\ No newline at end of file +end diff --git a/libcxx/docs/VendorDocumentation.rst b/libcxx/docs/VendorDocumentation.rst index aede8f9..7eba598 100644 --- a/libcxx/docs/VendorDocumentation.rst +++ b/libcxx/docs/VendorDocumentation.rst @@ -443,7 +443,7 @@ e.g. the ``mingw-w64-x86_64-clang`` package), together with CMake and ninja. -DCMAKE_C_COMPILER=clang \ -DCMAKE_CXX_COMPILER=clang++ \ -DLLVM_ENABLE_LLD=ON \ - -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi" \ + -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \ -DLIBCXXABI_ENABLE_SHARED=OFF \ -DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON > ninja -C build cxx diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index e7fda65..d564ea6 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -103,7 +103,7 @@ steps: queue: libcxx-builders os: aix <<: *common - skip: "Until https://github.com/llvm/llvm-project/issues/162516 has been resolved" + skip: "https://github.com/llvm/llvm-project/issues/162516" - label: AIX (64-bit) command: libcxx/utils/ci/run-buildbot aix @@ -115,7 +115,7 @@ steps: queue: libcxx-builders os: aix <<: *common - skip: "Until https://github.com/llvm/llvm-project/issues/162516 has been resolved" + skip: "https://github.com/llvm/llvm-project/issues/162516" - group: ':freebsd: FreeBSD' steps: diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 1bcd205..1077d80 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#if !defined(__wasm__) + #include "assembly.h" #define FROM_0_TO_15 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 @@ -20,7 +22,7 @@ .text #endif -#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) +#if !defined(__USING_SJLJ_EXCEPTIONS__) #if defined(__i386__) DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto) @@ -1253,7 +1255,8 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind19Registers_loongarch6jumptoEv) #endif -#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) */ +#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ NO_EXEC_STACK_DIRECTIVE +#endif /* !defined(__wasm__) */ diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index 5139a55..8bf99eb 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#if !defined(__wasm__) + #include "assembly.h" #define FROM_0_TO_15 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 @@ -20,7 +22,7 @@ .text #endif -#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) +#if !defined(__USING_SJLJ_EXCEPTIONS__) #if defined(__i386__) @@ -1232,6 +1234,8 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) WEAK_ALIAS(__unw_getcontext, unw_getcontext) #endif -#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) */ +#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ NO_EXEC_STACK_DIRECTIVE + +#endif /* !defined(__wasm__) */ diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index bdd6f73..5d58abf 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -170,7 +170,19 @@ function(add_lldb_executable name) if(WIN32) list(FIND ARG_LINK_LIBS liblldb LIBLLDB_INDEX) if(NOT LIBLLDB_INDEX EQUAL -1) - target_link_options(${name} PRIVATE "/DELAYLOAD:$<TARGET_FILE_BASE_NAME:liblldb>.dll") + if (MSVC) + target_link_options(${name} PRIVATE "/DELAYLOAD:$<TARGET_FILE_NAME:liblldb>") + elseif (MINGW AND LINKER_IS_LLD) + # LLD can delay load just by passing a --delayload flag, as long as the import + # library is a short type import library (which LLD and MS link.exe produce). + # With ld.bfd, with long import libraries (as produced by GNU binutils), one + # has to generate a separate delayload import library with dlltool. + target_link_options(${name} PRIVATE "-Wl,--delayload=$<TARGET_FILE_NAME:liblldb>") + elseif (DEFINED LLDB_PYTHON_DLL_RELATIVE_PATH) + # If liblldb can't be delayloaded, then LLDB_PYTHON_DLL_RELATIVE_PATH will not + # have any effect. + message(WARNING "liblldb is not delay loaded, LLDB_PYTHON_DLL_RELATIVE_PATH has no effect") + endif() endif() endif() if(CLANG_LINK_CLANG_DYLIB) diff --git a/lldb/docs/resources/lldbgdbremote.md b/lldb/docs/resources/lldbgdbremote.md index 36b95f1..93fb0c9 100644 --- a/lldb/docs/resources/lldbgdbremote.md +++ b/lldb/docs/resources/lldbgdbremote.md @@ -1,3 +1,6 @@ +<!-- Packets are listed in alpabetical order, and if in a section, alphabetical + order within that section. --> + # GDB Remote Protocol Extensions LLDB has added new GDB server packets to better support multi-threaded and @@ -2427,43 +2430,6 @@ Response is `F` plus the return value of `unlink()`, base 16 encoding. Return value may optionally be followed by a comma and the base16 value of errno if unlink failed. -## "x" - Binary memory read - -> **Warning:** The format of this packet was decided before GDB 16 -> introduced its own format for `x`. Future versions of LLDB may not -> support the format described here, and new code should produce and -> expect the format used by GDB. - -Like the `m` (read) and `M` (write) packets, this is a partner to the -`X` (write binary data) packet, `x`. - -It is called like -``` -xADDRESS,LENGTH -``` - -where both `ADDRESS` and `LENGTH` are big-endian base 16 values. - -To test if this packet is available, send a addr/len of 0: -``` -x0,0 -``` -You will get an `OK` response if it is supported. - -The reply will be the data requested in 8-bit binary data format. -The standard quoting is applied to the payload. Characters `} # $ *` -will all be escaped with `}` (`0x7d`) character and then XOR'ed with `0x20`. - -A typical use to read 512 bytes at 0x1000 would look like: -``` -x0x1000,0x200 -``` -The `0x` prefixes are optional - like most of the gdb-remote packets, -omitting them will work fine; these numbers are always base 16. - -The length of the payload is not provided. A reliable, 8-bit clean, -transport layer is assumed. - ## Wasm Packets The packet below are supported by the @@ -2530,3 +2496,40 @@ read packet: $e0030100#b9 **Priority to Implement:** Only required for Wasm support. Necessary to show variables. + +## "x" - Binary memory read + +> **Warning:** The format of this packet was decided before GDB 16 +> introduced its own format for `x`. Future versions of LLDB may not +> support the format described here, and new code should produce and +> expect the format used by GDB. + +Like the `m` (read) and `M` (write) packets, this is a partner to the +`X` (write binary data) packet, `x`. + +It is called like +``` +xADDRESS,LENGTH +``` + +where both `ADDRESS` and `LENGTH` are big-endian base 16 values. + +To test if this packet is available, send a addr/len of 0: +``` +x0,0 +``` +You will get an `OK` response if it is supported. + +The reply will be the data requested in 8-bit binary data format. +The standard quoting is applied to the payload. Characters `} # $ *` +will all be escaped with `}` (`0x7d`) character and then XOR'ed with `0x20`. + +A typical use to read 512 bytes at 0x1000 would look like: +``` +x0x1000,0x200 +``` +The `0x` prefixes are optional - like most of the gdb-remote packets, +omitting them will work fine; these numbers are always base 16. + +The length of the payload is not provided. A reliable, 8-bit clean, +transport layer is assumed. diff --git a/lldb/include/lldb/Host/JSONTransport.h b/lldb/include/lldb/Host/JSONTransport.h index 1453316..892821b 100644 --- a/lldb/include/lldb/Host/JSONTransport.h +++ b/lldb/include/lldb/Host/JSONTransport.h @@ -26,6 +26,7 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" +#include <atomic> #include <functional> #include <mutex> #include <optional> diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index e30d549..63f7df4 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -788,6 +788,10 @@ def canRunLibcxxTests(): return True, "libc++ always present" if platform == "linux": + if not configuration.libcxx_include_dir or not configuration.libcxx_library_dir: + return False, "API tests require a locally built libc++." + + # Make sure -stdlib=libc++ works since that's how the tests will be built. with temp_file.OnDiskTempFile() as f: cmd = [configuration.compiler, "-xc++", "-stdlib=libc++", "-o", f.path, "-"] p = subprocess.Popen( diff --git a/lldb/source/Commands/CommandObjectProtocolServer.cpp b/lldb/source/Commands/CommandObjectProtocolServer.cpp index c5ab9e9..1a95089 100644 --- a/lldb/source/Commands/CommandObjectProtocolServer.cpp +++ b/lldb/source/Commands/CommandObjectProtocolServer.cpp @@ -131,15 +131,57 @@ protected: } }; +class CommandObjectProtocolServerGet : public CommandObjectParsed { +public: + CommandObjectProtocolServerGet(CommandInterpreter &interpreter) + : CommandObjectParsed(interpreter, "protocol-server get", + "get protocol server connection information", + "protocol-server get <protocol>") { + AddSimpleArgumentList(lldb::eArgTypeProtocol, eArgRepeatPlain); + } + + ~CommandObjectProtocolServerGet() override = default; + +protected: + void DoExecute(Args &args, CommandReturnObject &result) override { + if (args.GetArgumentCount() < 1) { + result.AppendError("no protocol specified"); + return; + } + + llvm::StringRef protocol = args.GetArgumentAtIndex(0); + ProtocolServer *server = ProtocolServer::GetOrCreate(protocol); + if (!server) { + result.AppendErrorWithFormatv( + "unsupported protocol: {0}. Supported protocols are: {1}", protocol, + llvm::join(ProtocolServer::GetSupportedProtocols(), ", ")); + return; + } + + Socket *socket = server->GetSocket(); + if (!socket) { + result.AppendErrorWithFormatv("{0} server is not running", protocol); + return; + } + + std::string address = llvm::join(socket->GetListeningConnectionURI(), ", "); + result.AppendMessageWithFormatv("{0} server connection listeners: {1}", + protocol, address); + result.SetStatus(eReturnStatusSuccessFinishNoResult); + } +}; + CommandObjectProtocolServer::CommandObjectProtocolServer( CommandInterpreter &interpreter) : CommandObjectMultiword(interpreter, "protocol-server", - "Start and stop a protocol server.", + "Start, stop, and query protocol servers.", "protocol-server") { LoadSubCommand("start", CommandObjectSP(new CommandObjectProtocolServerStart( interpreter))); LoadSubCommand("stop", CommandObjectSP( new CommandObjectProtocolServerStop(interpreter))); + LoadSubCommand( + "get", CommandObjectSP(new CommandObjectProtocolServerGet(interpreter))); } CommandObjectProtocolServer::~CommandObjectProtocolServer() = default; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index 3c49c91..6b121c9 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -74,6 +74,7 @@ #include "lldb/Core/Debugger.h" #include "lldb/Core/Disassembler.h" #include "lldb/Core/Module.h" +#include "lldb/Expression/DiagnosticManager.h" #include "lldb/Expression/IRExecutionUnit.h" #include "lldb/Expression/IRInterpreter.h" #include "lldb/Host/File.h" @@ -96,6 +97,7 @@ #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" #include "Plugins/Platform/MacOSX/PlatformDarwin.h" #include "lldb/Utility/XcodeSDK.h" +#include "lldb/lldb-enumerations.h" #include <cctype> #include <memory> @@ -527,7 +529,8 @@ static void SetupTargetOpts(CompilerInstance &compiler, static void SetupLangOpts(CompilerInstance &compiler, ExecutionContextScope &exe_scope, - const Expression &expr) { + const Expression &expr, + DiagnosticManager &diagnostic_manager) { Log *log = GetLog(LLDBLog::Expressions); // If the expression is being evaluated in the context of an existing stack @@ -547,6 +550,9 @@ static void SetupLangOpts(CompilerInstance &compiler, : lldb::eLanguageTypeUnknown), lldb_private::Language::GetNameForLanguageType(language)); + lldb::LanguageType language_for_note = language; + std::string language_fallback_reason; + LangOptions &lang_opts = compiler.getLangOpts(); switch (language) { @@ -560,6 +566,10 @@ static void SetupLangOpts(CompilerInstance &compiler, // family language, because the expression parser uses features of C++ to // capture values. lang_opts.CPlusPlus = true; + + language_for_note = lldb::eLanguageTypeC_plus_plus; + language_fallback_reason = + "Expression evaluation in pure C not supported. "; break; case lldb::eLanguageTypeObjC: lang_opts.ObjC = true; @@ -567,6 +577,10 @@ static void SetupLangOpts(CompilerInstance &compiler, // to "ask for ObjC, get ObjC++" (see comment above). lang_opts.CPlusPlus = true; + language_for_note = lldb::eLanguageTypeObjC_plus_plus; + language_fallback_reason = + "Expression evaluation in pure Objective-C not supported. "; + // Clang now sets as default C++14 as the default standard (with // GNU extensions), so we do the same here to avoid mismatches that // cause compiler error when evaluating expressions (e.g. nullptr not found @@ -607,9 +621,27 @@ static void SetupLangOpts(CompilerInstance &compiler, lang_opts.CPlusPlus = true; lang_opts.CPlusPlus11 = true; compiler.getHeaderSearchOpts().UseLibcxx = true; + + language_for_note = lldb::eLanguageTypeObjC_plus_plus; + if (language != language_for_note) { + if (language != lldb::eLanguageTypeUnknown) + language_fallback_reason = llvm::formatv( + "Expression evaluation in {0} not supported. ", + lldb_private::Language::GetDisplayNameForLanguageType(language)); + + language_fallback_reason += + llvm::formatv("Falling back to default language. "); + } break; } + diagnostic_manager.AddDiagnostic( + llvm::formatv("{0}Ran expression as '{1}'.", language_fallback_reason, + lldb_private::Language::GetDisplayNameForLanguageType( + language_for_note)) + .str(), + lldb::Severity::eSeverityInfo, DiagnosticOrigin::eDiagnosticOriginLLDB); + lang_opts.Bool = true; lang_opts.WChar = true; lang_opts.Blocks = true; @@ -687,8 +719,8 @@ static void SetupImportStdModuleLangOpts(CompilerInstance &compiler, ClangExpressionParser::ClangExpressionParser( ExecutionContextScope *exe_scope, Expression &expr, - bool generate_debug_info, std::vector<std::string> include_directories, - std::string filename) + bool generate_debug_info, DiagnosticManager &diagnostic_manager, + std::vector<std::string> include_directories, std::string filename) : ExpressionParser(exe_scope, expr, generate_debug_info), m_compiler(), m_pp_callbacks(nullptr), m_include_directories(std::move(include_directories)), @@ -754,7 +786,7 @@ ClangExpressionParser::ClangExpressionParser( } // 4. Set language options. - SetupLangOpts(*m_compiler, *exe_scope, expr); + SetupLangOpts(*m_compiler, *exe_scope, expr, diagnostic_manager); auto *clang_expr = dyn_cast<ClangUserExpression>(&m_expr); if (clang_expr && clang_expr->DidImportCxxModules()) { LLDB_LOG(log, "Adding lang options for importing C++ modules"); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.h index 93e0b007..734ad51 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.h @@ -65,6 +65,7 @@ public: /// diagnostics (i.e. errors, warnings or notes from Clang). ClangExpressionParser(ExecutionContextScope *exe_scope, Expression &expr, bool generate_debug_info, + DiagnosticManager &diagnostic_manager, std::vector<std::string> include_directories = {}, std::string filename = "<clang expression>"); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.cpp index e4a094f..d2db319 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.cpp @@ -189,8 +189,8 @@ ClangFunctionCaller::CompileFunction(lldb::ThreadSP thread_to_use_sp, lldb::ProcessSP jit_process_sp(m_jit_process_wp.lock()); if (jit_process_sp) { const bool generate_debug_info = true; - auto *clang_parser = new ClangExpressionParser(jit_process_sp.get(), *this, - generate_debug_info); + auto *clang_parser = new ClangExpressionParser( + jit_process_sp.get(), *this, generate_debug_info, diagnostic_manager); num_errors = clang_parser->Parse(diagnostic_manager); m_parser.reset(clang_parser); } else { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp index 6b743e2..e8d5ec3 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp @@ -574,7 +574,7 @@ bool ClangUserExpression::TryParse( m_parser = std::make_unique<ClangExpressionParser>( exe_ctx.GetBestExecutionContextScope(), *this, generate_debug_info, - m_include_directories, m_filename); + diagnostic_manager, m_include_directories, m_filename); unsigned num_errors = m_parser->Parse(diagnostic_manager); @@ -818,7 +818,7 @@ bool ClangUserExpression::Complete(ExecutionContext &exe_ctx, } ClangExpressionParser parser(exe_ctx.GetBestExecutionContextScope(), *this, - false); + false, diagnostic_manager); // We have to find the source code location where the user text is inside // the transformed expression code. When creating the transformed text, we diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp index 1f44200..e698306 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp @@ -120,7 +120,7 @@ bool ClangUtilityFunction::Install(DiagnosticManager &diagnostic_manager, const bool generate_debug_info = true; ClangExpressionParser parser(exe_ctx.GetBestExecutionContextScope(), *this, - generate_debug_info); + generate_debug_info, diagnostic_manager); unsigned num_errors = parser.Parse(diagnostic_manager); diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp index 33bdd5e..390cf3e 100644 --- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp +++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp @@ -144,6 +144,7 @@ llvm::Error ProtocolServerMCP::Stop() { m_server.reset(nullptr); m_server_info_handle.Remove(); + m_listener.reset(); return llvm::Error::success(); } diff --git a/lldb/test/API/commands/expression/diagnostics/TestExprDiagnostics.py b/lldb/test/API/commands/expression/diagnostics/TestExprDiagnostics.py index 0cc505a..ec208f2 100644 --- a/lldb/test/API/commands/expression/diagnostics/TestExprDiagnostics.py +++ b/lldb/test/API/commands/expression/diagnostics/TestExprDiagnostics.py @@ -215,8 +215,22 @@ note: candidate function not viable: requires single argument 'x', but 2 argumen details = diags.GetValueForKey("details") - # Detail 1/2: undeclared 'a' + # Detail 1/3: note: requested expression language diag = details.GetItemAtIndex(0) + self.assertEqual(str(diag.GetValueForKey("severity")), "note") + self.assertEqual( + str(diag.GetValueForKey("message")), "Ran expression as 'C++11'." + ) + self.assertEqual( + str(diag.GetValueForKey("rendered")), "Ran expression as 'C++11'." + ) + self.assertEqual(str(diag.GetValueForKey("source_location")), "") + self.assertEqual(str(diag.GetValueForKey("file")), "") + self.assertFalse(diag.GetValueForKey("hidden").GetBooleanValue()) + self.assertFalse(diag.GetValueForKey("in_user_input").GetBooleanValue()) + + # Detail 2/3: undeclared 'a' + diag = details.GetItemAtIndex(1) severity = diag.GetValueForKey("severity") message = diag.GetValueForKey("message") @@ -234,8 +248,8 @@ note: candidate function not viable: requires single argument 'x', but 2 argumen self.assertFalse(hidden.GetBooleanValue()) self.assertTrue(in_user_input.GetBooleanValue()) - # Detail 1/2: undeclared 'b' - diag = details.GetItemAtIndex(1) + # Detail 3/3: undeclared 'b' + diag = details.GetItemAtIndex(2) message = diag.GetValueForKey("message") self.assertIn("undeclared identifier 'b'", str(message)) diff --git a/lldb/test/API/commands/protocol/TestMCPUnixSocket.py b/lldb/test/API/commands/protocol/TestMCPUnixSocket.py index ea9255c..9edb97e 100644 --- a/lldb/test/API/commands/protocol/TestMCPUnixSocket.py +++ b/lldb/test/API/commands/protocol/TestMCPUnixSocket.py @@ -32,3 +32,16 @@ class MCPUnixSocketCommandTestCase(TestBase): startstr="MCP server started with connection listeners:", substrs=[f"unix-connect://{socket_file}"], ) + + self.expect( + "protocol-server get MCP", + startstr="MCP server connection listeners:", + substrs=[f"unix-connect://{socket_file}"], + ) + + self.runCmd("protocol-server stop MCP", check=False) + self.expect( + "protocol-server get MCP", + error=True, + substrs=["MCP server is not running"], + ) diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py index ebba4d1..10cbd26 100644 --- a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py +++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py @@ -28,6 +28,9 @@ class TestCortexMExceptionUnwind(TestBase): core = self.getBuildArtifact("core") self.yaml2macho_core("armv7m-nofpu-exception.yaml", core, exe_uuid) + if self.TraceOn(): + self.runCmd("log enable lldb unwind") + process = target.LoadCore(core) self.assertTrue(process.IsValid()) diff --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py index 1029bdc..01ed11a 100644 --- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py +++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py @@ -1,4 +1,4 @@ -"""Test the SBCommandInterpreter APIs.""" +"""tESt the SBCommandInterpreter APIs.""" import json import lldb @@ -156,13 +156,15 @@ class CommandInterpreterAPICase(TestBase): self.assertEqual(transcript[0]["error"], "") # (lldb) an-unknown-command - self.assertEqual(transcript[1], + self.assertEqual( + transcript[1], { "command": "an-unknown-command", # Unresolved commands don't have "commandName"/"commandArguments" "output": "", "error": "error: 'an-unknown-command' is not a valid command.\n", - }) + }, + ) # (lldb) br s -f main.c -l <line> self.assertEqual(transcript[2]["command"], "br s -f main.c -l %d" % self.line) @@ -175,14 +177,17 @@ class CommandInterpreterAPICase(TestBase): self.assertEqual(transcript[2]["error"], "") # (lldb) p a - self.assertEqual(transcript[3], + self.assertEqual( + transcript[3], { "command": "p a", "commandName": "dwim-print", "commandArguments": "-- a", "output": "", - "error": "error: <user expression 0>:1:1: use of undeclared identifier 'a'\n 1 | a\n | ^\n", - }) + "error": "note: Falling back to default language. Ran expression as 'Objective C++'.\n" + "error: <user expression 0>:1:1: use of undeclared identifier 'a'\n 1 | a\n | ^\n", + }, + ) # (lldb) statistics dump self.assertEqual(transcript[4]["command"], "statistics dump") @@ -203,7 +208,10 @@ class CommandInterpreterAPICase(TestBase): self.assertTrue(ci, VALID_COMMAND_INTERPRETER) # The setting's default value should be "false" - self.runCmd("settings show interpreter.save-transcript", "interpreter.save-transcript (boolean) = false\n") + self.runCmd( + "settings show interpreter.save-transcript", + "interpreter.save-transcript (boolean) = false\n", + ) def test_save_transcript_setting_off(self): ci = self.dbg.GetCommandInterpreter() @@ -250,17 +258,37 @@ class CommandInterpreterAPICase(TestBase): structured_data_1 = ci.GetTranscript() self.assertTrue(structured_data_1.IsValid()) self.assertEqual(structured_data_1.GetSize(), 1) - self.assertEqual(structured_data_1.GetItemAtIndex(0).GetValueForKey("command").GetStringValue(100), "version") + self.assertEqual( + structured_data_1.GetItemAtIndex(0) + .GetValueForKey("command") + .GetStringValue(100), + "version", + ) # Run some more commands and get the transcript as structured data again self.runCmd("help") structured_data_2 = ci.GetTranscript() self.assertTrue(structured_data_2.IsValid()) self.assertEqual(structured_data_2.GetSize(), 2) - self.assertEqual(structured_data_2.GetItemAtIndex(0).GetValueForKey("command").GetStringValue(100), "version") - self.assertEqual(structured_data_2.GetItemAtIndex(1).GetValueForKey("command").GetStringValue(100), "help") + self.assertEqual( + structured_data_2.GetItemAtIndex(0) + .GetValueForKey("command") + .GetStringValue(100), + "version", + ) + self.assertEqual( + structured_data_2.GetItemAtIndex(1) + .GetValueForKey("command") + .GetStringValue(100), + "help", + ) # Now, the first structured data should remain unchanged self.assertTrue(structured_data_1.IsValid()) self.assertEqual(structured_data_1.GetSize(), 1) - self.assertEqual(structured_data_1.GetItemAtIndex(0).GetValueForKey("command").GetStringValue(100), "version") + self.assertEqual( + structured_data_1.GetItemAtIndex(0) + .GetValueForKey("command") + .GetStringValue(100), + "version", + ) diff --git a/lldb/test/Shell/Expr/TestExprLanguageNote.test b/lldb/test/Shell/Expr/TestExprLanguageNote.test new file mode 100644 index 0000000..f3dc592 --- /dev/null +++ b/lldb/test/Shell/Expr/TestExprLanguageNote.test @@ -0,0 +1,87 @@ +# RUN: split-file %s %t +# RUN: %clang_host -g %t/main.cpp -o %t.out +# +# RUN: %lldb -x -b -o "settings set interpreter.stop-command-source-on-error false" \ +# RUN: -s %t/no-target.input 2>&1 | FileCheck %s --check-prefix=CHECK-NO-TARGET +# +# RUN: %lldb %t.out -x -b -o "settings set interpreter.stop-command-source-on-error false" \ +# RUN: -s %t/with-target.input 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET + +#--- main.cpp + +int main() { + int x = 10; + __builtin_debugtrap(); +} + +#--- with-target.input + +expr blah + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET: note: Falling back to default language. Ran expression as 'Objective C++'. + +run + +expr blah + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET: note: Ran expression as 'C++14'. + +expr -l objc -- blah + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET: note: Expression evaluation in pure Objective-C not supported. Ran expression as 'Objective C++'. + +expr -l c -- blah + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET: note: Expression evaluation in pure C not supported. Ran expression as 'ISO C++'. + +expr -l c++14 -- blah + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET: note: Ran expression as 'C++14' + +expr -l c++20 -- blah + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET: note: Ran expression as 'C++20' + +expr -l objective-c++ -- blah + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET: note: Ran expression as 'Objective C++' + +# D uses TypeSystemClang but running expressions in it isn't supported. Test that we warn about this. +expr -l D -- blah + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET: note: Expression evaluation in D not supported. Falling back to default language. Ran expression as 'Objective C++'. + +expr -l c++17 -- x = 5 + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET-NOT: note: + +expr x = 5 + +# CHECK-TARGET: (lldb) expr +# CHECK-TARGET-NOT: note: + +#--- no-target.input + +expr blah + +# CHECK-NO-TARGET: (lldb) expr +# CHECK-NO-TARGET: note: Falling back to default language. Ran expression as 'Objective C++'. + +expr -l c++ -- 1 + 1 + +# CHECK-NO-TARGET: (lldb) expr +# CHECK-NO-TARGET-NOT: note: + +expr 1 + 1 + +# CHECK-NO-TARGET: (lldb) expr +# CHECK-NO-TARGET-NOT: note: diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index c84e2cc..22ecf4d 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -948,6 +948,15 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) # Enable -Wstring-conversion to catch misuse of string literals. if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") append("-Wstring-conversion" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + + # Disable -Wno-pass-failed flag, which reports failure to perform + # optimizations suggested by pragmas. This warning is not relevant for LLVM + # projects and may be injected by pragmas in libstdc++. + # FIXME: Reconsider this choice if warnings from STL headers can be reliably + # avoided (https://github.com/llvm/llvm-project/issues/157666). + # This option has been available since Clang 3.5, and we do require a newer + # version. + append("-Wno-pass-failed" CMAKE_CXX_FLAGS) endif() if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") @@ -962,13 +971,6 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) # Enable -Wctad-maybe-unsupported to catch unintended use of CTAD. add_flag_if_supported("-Wctad-maybe-unsupported" CTAD_MAYBE_UNSPPORTED_FLAG) - - # Disable -Wno-pass-failed flag, which reports failure to perform - # optimizations suggested by pragmas. This warning is not relevant for LLVM - # projects and may be injected by pragmas in libstdc++. - # FIXME: Reconsider this choice if warnings from STL headers can be reliably - # avoided (https://github.com/llvm/llvm-project/issues/157666). - add_flag_if_supported("-Wno-pass-failed" NO_PASS_FAILED_FLAG) endif (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) if (LLVM_COMPILER_IS_GCC_COMPATIBLE AND NOT LLVM_ENABLE_WARNINGS) diff --git a/llvm/docs/CallGraphSection.md b/llvm/docs/CallGraphSection.md new file mode 100644 index 0000000..8b18727 --- /dev/null +++ b/llvm/docs/CallGraphSection.md @@ -0,0 +1,18 @@ +# .callgraph Section Layout + +The `.callgraph` section is used to store call graph information for each function. The section contains a series of records, with each record corresponding to a single function. + +## Per Function Record Layout + +Each record in the `.callgraph` section has the following binary layout: + +| Field | Type | Size (bits) | Description | +| -------------------------------------- | ------------- | ----------- | ------------------------------------------------------------------------------------------------------- | +| Format Version | `uint8_t` | 8 | The version of the record format. The current version is 0. | +| Flags | `uint8_t` | 8 | A bitfield where: Bit 0 is set if the function is a potential indirect call target; Bit 1 is set if there are direct callees; Bit 2 is set if there are indirect callees. The remaining 5 bits are reserved. | +| Function Entry PC | `uintptr_t` | 32/64 | The address of the function's entry point. | +| Function Type ID | `uint64_t` | 64 | The type ID of the function. This field is non-zero if the function is a potential indirect call target and its type is known. | +| Number of Unique Direct Callees | `ULEB128` | Variable | The number of unique direct call destinations from this function. This field is only present if there is at least one direct callee. | +| Direct Callees Array | `uintptr_t[]` | Variable | An array of unique direct callee entry point addresses. This field is only present if there is at least one direct callee. | +| Number of Unique Indirect Target Type IDs| `ULEB128` | Variable | The number of unique indirect call target type IDs. This field is only present if there is at least one indirect target type ID. | +| Indirect Target Type IDs Array | `uint64_t[]` | Variable | An array of unique indirect call target type IDs. This field is only present if there is at least one indirect target type ID. | diff --git a/llvm/docs/CodeGenerator.rst b/llvm/docs/CodeGenerator.rst index 7486054..fc704a3 100644 --- a/llvm/docs/CodeGenerator.rst +++ b/llvm/docs/CodeGenerator.rst @@ -1662,6 +1662,13 @@ and stack sizes (unsigned LEB128). The stack size values only include the space allocated in the function prologue. Functions with dynamic stack allocations are not included. +Emitting function call graph information +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A section containing metadata on function call graph will be emitted when +``TargetOptions::EmitCallGraphSection`` is set (--call-graph-section). Layout of +this section is documented in detail at :doc:`CallGraphSection`. + VLIW Packetizer --------------- diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst index 5d842d3..9b1bf1b 100644 --- a/llvm/docs/Reference.rst +++ b/llvm/docs/Reference.rst @@ -15,6 +15,7 @@ LLVM and API reference documentation. BranchWeightMetadata Bugpoint CalleeTypeMetadata + CallGraphSection CIBestPractices CommandGuide/index ContentAddressableStorage diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h index 310539f..017585a4 100644 --- a/llvm/include/llvm/ADT/ImmutableSet.h +++ b/llvm/include/llvm/ADT/ImmutableSet.h @@ -21,9 +21,7 @@ #include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Signals.h" #include <cassert> #include <cstdint> #include <functional> diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index 815e85d..5039a3f 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -390,7 +390,7 @@ toDW_LNAME(SourceLanguage language) { case DW_LANG_C11: return {{DW_LNAME_C, 201112}}; case DW_LANG_C17: - return {{DW_LNAME_C, 201712}}; + return {{DW_LNAME_C, 201710}}; case DW_LANG_C_plus_plus: return {{DW_LNAME_C_plus_plus, 0}}; case DW_LANG_C_plus_plus_03: diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 4c744a2..19ca444 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -198,26 +198,13 @@ private: /// and targets. using CGTypeId = uint64_t; - /// Map type identifiers to callsite labels. Labels are generated for each - /// indirect callsite in the function. - SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels; + /// Unique target type IDs. + SmallSet<CGTypeId, 4> IndirectCalleeTypeIDs; + /// Unique direct callees. SmallSet<MCSymbol *, 4> DirectCallees; }; - /// Enumeration of function kinds, and their mapping to function kind values - /// stored in callgraph section entries. - enum class FunctionKind : uint64_t { - /// Function cannot be target to indirect calls. - NOT_INDIRECT_TARGET = 0, - - /// Function may be target to indirect calls but its type id is unknown. - INDIRECT_TARGET_UNKNOWN_TID = 1, - - /// Function may be target to indirect calls and its type id is known. - INDIRECT_TARGET_KNOWN_TID = 2, - }; - - enum CallGraphSectionFormatVersion : uint64_t { + enum CallGraphSectionFormatVersion : uint8_t { V_0 = 0, }; @@ -386,9 +373,9 @@ public: /// are available. Returns empty string otherwise. StringRef getConstantSectionSuffix(const Constant *C) const; - /// Iff MI is an indirect call, generate and emit a label after the callsites - /// which will be used to populate the .callgraph section. For direct - /// callsites add the callee symbol to direct callsites list of FuncCGInfo. + /// If MI is an indirect call, add expected type IDs to indirect type ids + /// list. If MI is a direct call add the callee symbol to direct callsites + /// list of FuncCGInfo. void handleCallsiteForCallgraph( FunctionCallGraphInfo &FuncCGInfo, const MachineFunction::CallSiteInfoMap &CallSitesInfoMap, diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h index 4a54caa..face5da 100644 --- a/llvm/include/llvm/IR/ConstantFPRange.h +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -206,6 +206,16 @@ public: /// Calculate range of negated values. LLVM_ABI ConstantFPRange negate() const; + + /// Get the range without NaNs. It is useful when we apply nnan flag to range + /// of operands/results. + ConstantFPRange getWithoutNaN() const { + return ConstantFPRange(Lower, Upper, false, false); + } + + /// Get the range without infinities. It is useful when we apply ninf flag to + /// range of operands/results. + LLVM_ABI ConstantFPRange getWithoutInf() const; }; inline raw_ostream &operator<<(raw_ostream &OS, const ConstantFPRange &CR) { diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index de7a237..27930bb 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1715,7 +1715,7 @@ public: static SelectInst *Create(Value *C, Value *S1, Value *S2, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr, - Instruction *MDFrom = nullptr) { + const Instruction *MDFrom = nullptr) { SelectInst *Sel = new (AllocMarker) SelectInst(C, S1, S2, NameStr, InsertBefore); if (MDFrom) diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 774063b..632be7a 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -119,11 +119,12 @@ def SDTIntShiftOp : SDTypeProfile<1, 2, [ // shl, sra, srl SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, SDTCisSameNumEltsAs<0, 2> ]>; def SDTIntShiftPairOp : SDTypeProfile<2, 3, [ // shl_parts, sra_parts, srl_parts - SDTCisInt<0>, SDTCisSameAs<1, 0>, - SDTCisSameAs<2, 0>, SDTCisSameAs<3, 0>, SDTCisInt<4> + SDTCisInt<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisSameAs<3, 0>, + SDTCisInt<4>, SDTCisSameNumEltsAs<0, 4> ]>; def SDTIntShiftDOp: SDTypeProfile<1, 3, [ // fshl, fshr - SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>, + SDTCisSameNumEltsAs<0, 3> ]>; def SDTIntSatNoShOp : SDTypeProfile<1, 2, [ // ssat with no shift SDTCisSameAs<0, 1>, SDTCisInt<2> @@ -139,7 +140,7 @@ def SDTFPBinOp : SDTypeProfile<1, 2, [ // fadd, fmul, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0> ]>; def SDTFPSignOp : SDTypeProfile<1, 2, [ // fcopysign. - SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisFP<2> + SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisFP<2>, SDTCisSameNumEltsAs<0, 2> ]>; def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0> @@ -148,7 +149,7 @@ def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse SDTCisSameAs<0, 1>, SDTCisInt<0> ]>; def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz - SDTCisInt<0>, SDTCisInt<1> + SDTCisInt<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1> ]>; def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> @@ -300,10 +301,10 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3> ]>; def SDTVecReduce : SDTypeProfile<1, 1, [ // vector reduction - SDTCisInt<0>, SDTCisVec<1> + SDTCisInt<0>, SDTCisVec<1>, SDTCisInt<1> ]>; def SDTFPVecReduce : SDTypeProfile<1, 1, [ // FP vector reduction - SDTCisFP<0>, SDTCisVec<1> + SDTCisFP<0>, SDTCisEltOfVec<0, 1> ]>; def SDTVecReverse : SDTypeProfile<1, 1, [ // vector reverse diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index ed2e01c..dc8cd86d 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -277,6 +277,7 @@ public: MuslF32, MuslSF, MuslX32, + MuslWALI, LLVM, MSVC, @@ -798,6 +799,12 @@ public: return getObjectFormat() == Triple::DXContainer; } + /// Tests whether the target uses WALI Wasm + bool isWALI() const { + return getArch() == Triple::wasm32 && isOSLinux() && + getEnvironment() == Triple::MuslWALI; + } + /// Tests whether the target is the PS4 platform. bool isPS4() const { return getArch() == Triple::x86_64 && @@ -840,6 +847,7 @@ public: getEnvironment() == Triple::MuslF32 || getEnvironment() == Triple::MuslSF || getEnvironment() == Triple::MuslX32 || + getEnvironment() == Triple::MuslWALI || getEnvironment() == Triple::OpenHOS || isOSLiteOS(); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 10df9c1..219bbc9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -20,6 +20,7 @@ #include "WinException.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -205,6 +206,17 @@ public: }; } // namespace +namespace callgraph { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +enum Flags : uint8_t { + None = 0, + IsIndirectTarget = 1u << 0, + HasDirectCallees = 1u << 1, + HasIndirectCallees = 1u << 2, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ HasIndirectCallees) +}; +} // namespace callgraph + class llvm::AddrLabelMap { MCContext &Context; struct AddrLabelSymEntry { @@ -1683,63 +1695,65 @@ void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, OutStreamer->pushSection(); OutStreamer->switchSection(FuncCGSection); - // Emit format version number. - OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0); - - // Emit function's self information, which is composed of: - // 1) FunctionEntryPc - // 2) FunctionKind: Whether the function is indirect target, and if so, - // whether its type id is known. - // 3) FunctionTypeId: Emit only when the function is an indirect target - // and its type id is known. - - // Emit function entry pc. const MCSymbol *FunctionSymbol = getFunctionBegin(); - OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); - + const Function &F = MF.getFunction(); // If this function has external linkage or has its address taken and // it is not a callback, then anything could call it. - const Function &F = MF.getFunction(); bool IsIndirectTarget = !F.hasLocalLinkage() || F.hasAddressTaken(nullptr, /*IgnoreCallbackUses=*/true, /*IgnoreAssumeLikeCalls=*/true, /*IgnoreLLVMUsed=*/false); - // FIXME: FunctionKind takes a few values but emitted as a 64-bit value. - // Can be optimized to occupy 2 bits instead. - // Emit function kind, and type id if available. - if (!IsIndirectTarget) { - OutStreamer->emitInt64( - static_cast<uint64_t>(FunctionKind::NOT_INDIRECT_TARGET)); - } else { - if (const auto *TypeId = extractNumericCGTypeId(F)) { - OutStreamer->emitInt64( - static_cast<uint64_t>(FunctionKind::INDIRECT_TARGET_KNOWN_TID)); - OutStreamer->emitInt64(TypeId->getZExtValue()); - } else { - OutStreamer->emitInt64( - static_cast<uint64_t>(FunctionKind::INDIRECT_TARGET_UNKNOWN_TID)); - } - } + const auto &DirectCallees = FuncCGInfo.DirectCallees; + const auto &IndirectCalleeTypeIDs = FuncCGInfo.IndirectCalleeTypeIDs; + + using namespace callgraph; + Flags CGFlags = Flags::None; + if (IsIndirectTarget) + CGFlags |= Flags::IsIndirectTarget; + if (DirectCallees.size() > 0) + CGFlags |= Flags::HasDirectCallees; + if (IndirectCalleeTypeIDs.size() > 0) + CGFlags |= Flags::HasIndirectCallees; + + // Emit function's call graph information. + // 1) CallGraphSectionFormatVersion + // 2) Flags + // a. LSB bit 0 is set to 1 if the function is a potential indirect + // target. + // b. LSB bit 1 is set to 1 if there are direct callees. + // c. LSB bit 2 is set to 1 if there are indirect callees. + // d. Rest of the 5 bits in Flags are reserved for any future use. + // 3) Function entry PC. + // 4) FunctionTypeID if the function is indirect target and its type id + // is known, otherwise it is set to 0. + // 5) Number of unique direct callees, if at least one exists. + // 6) For each unique direct callee, the callee's PC. + // 7) Number of unique indirect target type IDs, if at least one exists. + // 8) Each unique indirect target type id. + OutStreamer->emitInt8(CallGraphSectionFormatVersion::V_0); + OutStreamer->emitInt8(static_cast<uint8_t>(CGFlags)); + OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + const auto *TypeId = extractNumericCGTypeId(F); + if (IsIndirectTarget && TypeId) + OutStreamer->emitInt64(TypeId->getZExtValue()); + else + OutStreamer->emitInt64(0); - // Emit callsite labels, where each element is a pair of type id and - // indirect callsite pc. - const auto &CallSiteLabels = FuncCGInfo.CallSiteLabels; - OutStreamer->emitInt64(CallSiteLabels.size()); - for (const auto &[TypeId, Label] : CallSiteLabels) { - OutStreamer->emitInt64(TypeId); - OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize()); + if (DirectCallees.size() > 0) { + OutStreamer->emitULEB128IntValue(DirectCallees.size()); + for (const auto &CalleeSymbol : DirectCallees) + OutStreamer->emitSymbolValue(CalleeSymbol, TM.getProgramPointerSize()); + FuncCGInfo.DirectCallees.clear(); } - FuncCGInfo.CallSiteLabels.clear(); - - const auto &DirectCallees = FuncCGInfo.DirectCallees; - OutStreamer->emitInt64(DirectCallees.size()); - for (const auto &CalleeSymbol : DirectCallees) { - OutStreamer->emitSymbolValue(CalleeSymbol, TM.getProgramPointerSize()); + if (IndirectCalleeTypeIDs.size() > 0) { + OutStreamer->emitULEB128IntValue(IndirectCalleeTypeIDs.size()); + for (const auto &CalleeTypeId : IndirectCalleeTypeIDs) + OutStreamer->emitInt64(CalleeTypeId); + FuncCGInfo.IndirectCalleeTypeIDs.clear(); } - FuncCGInfo.DirectCallees.clear(); - + // End of emitting call graph section contents. OutStreamer->popSection(); } @@ -1877,8 +1891,7 @@ void AsmPrinter::handleCallsiteForCallgraph( FunctionCallGraphInfo &FuncCGInfo, const MachineFunction::CallSiteInfoMap &CallSitesInfoMap, const MachineInstr &MI) { - assert(MI.isCall() && - "Callsite labels are meant for call instructions only."); + assert(MI.isCall() && "This method is meant for call instructions only."); const MachineOperand &CalleeOperand = MI.getOperand(0); if (CalleeOperand.isGlobal() || CalleeOperand.isSymbol()) { // Handle direct calls. @@ -1903,10 +1916,8 @@ void AsmPrinter::handleCallsiteForCallgraph( // Handle indirect callsite info. // Only indirect calls have type identifiers set. for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) { - MCSymbol *S = MF->getContext().createTempSymbol(); - OutStreamer->emitLabel(S); uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue(); - FuncCGInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S); + FuncCGInfo.IndirectCalleeTypeIDs.insert(CalleeTypeIdVal); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 12d749c..e57ed24 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -569,40 +569,30 @@ void CodeViewDebug::emitCodeViewMagicVersion() { OS.emitInt32(COFF::DEBUG_SECTION_MAGIC); } -static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { - switch (DWLang) { - case dwarf::DW_LANG_C: - case dwarf::DW_LANG_C89: - case dwarf::DW_LANG_C99: - case dwarf::DW_LANG_C11: +static SourceLanguage +MapDWARFLanguageToCVLang(dwarf::SourceLanguageName DWLName) { + switch (DWLName) { + case dwarf::DW_LNAME_C: return SourceLanguage::C; - case dwarf::DW_LANG_C_plus_plus: - case dwarf::DW_LANG_C_plus_plus_03: - case dwarf::DW_LANG_C_plus_plus_11: - case dwarf::DW_LANG_C_plus_plus_14: + case dwarf::DW_LNAME_C_plus_plus: return SourceLanguage::Cpp; - case dwarf::DW_LANG_Fortran77: - case dwarf::DW_LANG_Fortran90: - case dwarf::DW_LANG_Fortran95: - case dwarf::DW_LANG_Fortran03: - case dwarf::DW_LANG_Fortran08: + case dwarf::DW_LNAME_Fortran: return SourceLanguage::Fortran; - case dwarf::DW_LANG_Pascal83: + case dwarf::DW_LNAME_Pascal: return SourceLanguage::Pascal; - case dwarf::DW_LANG_Cobol74: - case dwarf::DW_LANG_Cobol85: + case dwarf::DW_LNAME_Cobol: return SourceLanguage::Cobol; - case dwarf::DW_LANG_Java: + case dwarf::DW_LNAME_Java: return SourceLanguage::Java; - case dwarf::DW_LANG_D: + case dwarf::DW_LNAME_D: return SourceLanguage::D; - case dwarf::DW_LANG_Swift: + case dwarf::DW_LNAME_Swift: return SourceLanguage::Swift; - case dwarf::DW_LANG_Rust: + case dwarf::DW_LNAME_Rust: return SourceLanguage::Rust; - case dwarf::DW_LANG_ObjC: + case dwarf::DW_LNAME_ObjC: return SourceLanguage::ObjC; - case dwarf::DW_LANG_ObjC_plus_plus: + case dwarf::DW_LNAME_ObjC_plus_plus: return SourceLanguage::ObjCpp; default: // There's no CodeView representation for this language, and CV doesn't @@ -612,6 +602,14 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { } } +static SourceLanguage MapDWARFLanguageToCVLang(dwarf::SourceLanguage DWLang) { + auto MaybeLName = dwarf::toDW_LNAME(DWLang); + if (!MaybeLName) + return MapDWARFLanguageToCVLang(static_cast<dwarf::SourceLanguageName>(0)); + + return MapDWARFLanguageToCVLang(MaybeLName->first); +} + void CodeViewDebug::beginModule(Module *M) { // If COFF debug section is not available, skip any debug info related stuff. if (!Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) { @@ -633,8 +631,13 @@ void CodeViewDebug::beginModule(Module *M) { Node = *CUs->operands().begin(); } const auto *CU = cast<DICompileUnit>(Node); + DISourceLanguageName Lang = CU->getSourceLanguage(); CurrentSourceLanguage = - MapDWLangToCVLang(CU->getSourceLanguage().getUnversionedName()); + Lang.hasVersionedName() + ? MapDWARFLanguageToCVLang( + static_cast<dwarf::SourceLanguageName>(Lang.getName())) + : MapDWARFLanguageToCVLang( + static_cast<dwarf::SourceLanguage>(Lang.getName())); if (!M->getCodeViewFlag() || CU->getEmissionKind() == DICompileUnit::NoDebug) { Asm = nullptr; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index d751a7f..433877f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1039,8 +1039,12 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, } else NewCU.addString(Die, dwarf::DW_AT_producer, Producer); - NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit->getSourceLanguage().getUnversionedName()); + if (auto Lang = DIUnit->getSourceLanguage(); Lang.hasVersionedName()) + NewCU.addUInt(Die, dwarf::DW_AT_language_name, dwarf::DW_FORM_data2, + Lang.getName()); + else + NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + Lang.getName()); NewCU.addString(Die, dwarf::DW_AT_name, FN); StringRef SysRoot = DIUnit->getSysRoot(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5ffdc4e..b47274b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18870,27 +18870,38 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI) { - // We only do this if the target has legal ftrunc. Otherwise, we'd likely be - // replacing casts with a libcall. We also must be allowed to ignore -0.0 - // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer - // conversions would return +0.0. + // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc. + // If NoSignedZerosFPMath is enabled, this is a direct replacement. + // Otherwise, for strict math, we must handle edge cases: + // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0 + // as example, it first becomes integer 0, and is converted back to +0.0. + // FTRUNC on its own could produce -0.0. + // FIXME: We should be able to use node-level FMF here. - // TODO: If strict math, should we use FABS (+ range check for signed cast)? EVT VT = N->getValueType(0); - if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || - !DAG.getTarget().Options.NoSignedZerosFPMath) + if (!TLI.isOperationLegal(ISD::FTRUNC, VT)) return SDValue(); // fptosi/fptoui round towards zero, so converting from FP to integer and // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X SDValue N0 = N->getOperand(0); if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && - N0.getOperand(0).getValueType() == VT) - return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + N0.getOperand(0).getValueType() == VT) { + if (DAG.getTarget().Options.NoSignedZerosFPMath) + return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + } if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && - N0.getOperand(0).getValueType() == VT) - return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + N0.getOperand(0).getValueType() == VT) { + if (DAG.getTarget().Options.NoSignedZerosFPMath) + return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + + // Strict math: use FABS to handle negative inputs correctly. + if (TLI.isFAbsFree(VT)) { + SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::FTRUNC, DL, VT, Abs); + } + } return SDValue(); } diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index fba6942..2477e22 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -411,3 +411,17 @@ ConstantFPRange ConstantFPRange::abs() const { ConstantFPRange ConstantFPRange::negate() const { return ConstantFPRange(-Upper, -Lower, MayBeQNaN, MayBeSNaN); } + +ConstantFPRange ConstantFPRange::getWithoutInf() const { + if (isNaNOnly()) + return *this; + APFloat NewLower = Lower; + APFloat NewUpper = Upper; + if (Lower.isNegInfinity()) + NewLower = APFloat::getLargest(getSemantics(), /*Negative=*/true); + if (Upper.isPosInfinity()) + NewUpper = APFloat::getLargest(getSemantics(), /*Negative=*/false); + canonicalizeRange(NewLower, NewUpper); + return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN, + MayBeSNaN); +} diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp index c5a2ec2..7a8c8ad 100644 --- a/llvm/lib/Object/XCOFFObjectFile.cpp +++ b/llvm/lib/Object/XCOFFObjectFile.cpp @@ -379,7 +379,7 @@ Expected<StringRef> XCOFFObjectFile::getSectionName(DataRefImpl Sec) const { } uint64_t XCOFFObjectFile::getSectionAddress(DataRefImpl Sec) const { - // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t + // Avoid ternary due to failure to convert the ubig32_t value to a uint64_t // with MSVC. if (is64Bit()) return toSection64(Sec)->VirtualAddress; @@ -397,7 +397,7 @@ uint64_t XCOFFObjectFile::getSectionIndex(DataRefImpl Sec) const { } uint64_t XCOFFObjectFile::getSectionSize(DataRefImpl Sec) const { - // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t + // Avoid ternary due to failure to convert the ubig32_t value to a uint64_t // with MSVC. if (is64Bit()) return toSection64(Sec)->SectionSize; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 7392f4b..bfe2c80 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -633,6 +633,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}}) .Any({{DivS64, S64}, {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}}); + addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard) + .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}}) + .Div(S32, {{Vgpr32}, {Vgpr32, Imm}}) + .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}}) + .Div(S64, {{Vgpr64}, {Vgpr64, Imm}}); + bool hasSMRDx3 = ST->hasScalarDwordx3Loads(); bool hasSMRDSmall = ST->hasScalarSubwordLoads(); bool usesTrue16 = ST->useRealTrue16Insts(); diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 4d3331a..c684f9e 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -674,15 +674,9 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) { createUnpackedMI(I, UnpackedOpcode, /*IsHiBits=*/true); MachineOperand HiDstOp = Op0HOp1H->getOperand(0); - if (I.getFlag(MachineInstr::MIFlag::NoFPExcept)) { - Op0LOp1L->setFlag(MachineInstr::MIFlag::NoFPExcept); - Op0HOp1H->setFlag(MachineInstr::MIFlag::NoFPExcept); - } - if (I.getFlag(MachineInstr::MIFlag::FmContract)) { - Op0LOp1L->setFlag(MachineInstr::MIFlag::FmContract); - Op0HOp1H->setFlag(MachineInstr::MIFlag::FmContract); - } - + uint32_t IFlags = I.getFlags(); + Op0LOp1L->setFlags(IFlags); + Op0HOp1H->setFlags(IFlags); LoDstOp.setIsRenamable(DstOp.isRenamable()); HiDstOp.setIsRenamable(DstOp.isRenamable()); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 2a40fb9..83c7def 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -42,7 +42,6 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ComplexDeinterleavingPass.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp index e8c849e..28a1690 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp @@ -46,7 +46,6 @@ #include "SPIRVSubtarget.h" #include "SPIRVTargetMachine.h" #include "SPIRVUtils.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" diff --git a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp index 20f03b0..60d39c9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" diff --git a/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp index 278ad7c..e621bcd44 100644 --- a/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp @@ -14,7 +14,6 @@ #include "SPIRV.h" #include "SPIRVSubtarget.h" #include "SPIRVUtils.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Transforms/Utils/Cloning.h" diff --git a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp index 1811492..5b149f8 100644 --- a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9580ade..1cfcb1f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/VectorUtils.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index ac3626d..f021094 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -375,6 +375,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { case MuslSF: return "muslsf"; case MuslX32: return "muslx32"; + case MuslWALI: + return "muslwali"; case Simulator: return "simulator"; case Pixel: return "pixel"; case Vertex: return "vertex"; @@ -767,6 +769,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("muslf32", Triple::MuslF32) .StartsWith("muslsf", Triple::MuslSF) .StartsWith("muslx32", Triple::MuslX32) + .StartsWith("muslwali", Triple::MuslWALI) .StartsWith("musl", Triple::Musl) .StartsWith("msvc", Triple::MSVC) .StartsWith("itanium", Triple::Itanium) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 59e103cd..73ec451 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -880,11 +880,13 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { // zext(bool) + C -> bool ? C + 1 : C if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1); + return createSelectInstWithUnknownProfile(X, InstCombiner::AddOne(Op1C), + Op1); // sext(bool) + C -> bool ? C - 1 : C if (match(Op0, m_SExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1); + return createSelectInstWithUnknownProfile(X, InstCombiner::SubOne(Op1C), + Op1); // ~X + C --> (C-1) - X if (match(Op0, m_Not(m_Value(X)))) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 218aaf9..7071876 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -471,18 +471,19 @@ private: Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable, unsigned Depth = 0); - SelectInst *createSelectInst(Value *C, Value *S1, Value *S2, - const Twine &NameStr = "", - InsertPosition InsertBefore = nullptr, - Instruction *MDFrom = nullptr) { - SelectInst *SI = - SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom); - if (!MDFrom) - setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE); - return SI; +public: + /// Create `select C, S1, S2`. Use only when the profile cannot be calculated + /// from existing profile metadata: if the Function has profiles, this will + /// set the profile of this select to "unknown". + SelectInst * + createSelectInstWithUnknownProfile(Value *C, Value *S1, Value *S2, + const Twine &NameStr = "", + InsertPosition InsertBefore = nullptr) { + auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr); + setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE); + return Sel; } -public: /// Create and insert the idiom we use to indicate a block is unreachable /// without having to rewrite the CFG from within InstCombine. void CreateNonTerminatorUnreachable(Instruction *InsertAt) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index d457e0c..899a3c1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1253,7 +1253,8 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { // shl (zext i1 X), C1 --> select (X, 1 << C1, 0) if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1); - return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty)); + return createSelectInstWithUnknownProfile(X, NewC, + ConstantInt::getNullValue(Ty)); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 127a506..63e24a0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" @@ -107,7 +108,10 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I, Value *ShrAmtZ = IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()), ShrAmt->getName() + ".z"); - Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper); + // There is no existing !prof metadata we can derive the !prof metadata for + // this select. + Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper); + IC.Builder.Insert(Select); Select->takeName(I); return Select; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index d56a1af..82ac903 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1737,7 +1737,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { Constant *Zero = ConstantInt::getNullValue(BO.getType()); Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C); Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C); - return createSelectInst(X, TVal, FVal); + return createSelectInstWithUnknownProfile(X, TVal, FVal); } static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index e5935f4..ff5f390 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -386,22 +386,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { return OS; } -/// Helper to get the successor corresponding to a particular case value for -/// a switch statement. -static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, - const APInt &NextState) { - BasicBlock *NextCase = nullptr; - for (auto Case : Switch->cases()) { - if (Case.getCaseValue()->getValue() == NextState) { - NextCase = Case.getCaseSuccessor(); - break; - } - } - if (!NextCase) - NextCase = Switch->getDefaultDest(); - return NextCase; -} - namespace { /// ThreadingPath is a path in the control flow of a loop that can be threaded /// by cloning necessary basic blocks and replacing conditional branches with @@ -835,19 +819,32 @@ private: TPaths = std::move(TempList); } + /// Fast helper to get the successor corresponding to a particular case value + /// for a switch statement. + BasicBlock *getNextCaseSuccessor(const APInt &NextState) { + // Precompute the value => successor mapping + if (CaseValToDest.empty()) { + for (auto Case : Switch->cases()) { + APInt CaseVal = Case.getCaseValue()->getValue(); + CaseValToDest[CaseVal] = Case.getCaseSuccessor(); + } + } + + auto SuccIt = CaseValToDest.find(NextState); + return SuccIt == CaseValToDest.end() ? Switch->getDefaultDest() + : SuccIt->second; + } + // Two states are equivalent if they have the same switch destination. // Unify the states in different threading path if the states are equivalent. void unifyTPaths() { - llvm::SmallDenseMap<BasicBlock *, APInt> DestToState; + SmallDenseMap<BasicBlock *, APInt> DestToState; for (ThreadingPath &Path : TPaths) { APInt NextState = Path.getExitValue(); - BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState); - auto StateIt = DestToState.find(Dest); - if (StateIt == DestToState.end()) { - DestToState.insert({Dest, NextState}); + BasicBlock *Dest = getNextCaseSuccessor(NextState); + auto [StateIt, Inserted] = DestToState.try_emplace(Dest, NextState); + if (Inserted) continue; - } - if (NextState != StateIt->second) { LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to " << StateIt->second << "\n"); @@ -861,6 +858,7 @@ private: BasicBlock *SwitchBlock; OptimizationRemarkEmitter *ORE; std::vector<ThreadingPath> TPaths; + DenseMap<APInt, BasicBlock *> CaseValToDest; LoopInfo *LI; Loop *SwitchOuterLoop; }; @@ -1162,6 +1160,24 @@ private: SSAUpdate.RewriteAllUses(&DTU->getDomTree()); } + /// Helper to get the successor corresponding to a particular case value for + /// a switch statement. + /// TODO: Unify it with SwitchPaths->getNextCaseSuccessor(SwitchInst *Switch) + /// by updating cached value => successor mapping during threading. + static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, + const APInt &NextState) { + BasicBlock *NextCase = nullptr; + for (auto Case : Switch->cases()) { + if (Case.getCaseValue()->getValue() == NextState) { + NextCase = Case.getCaseSuccessor(); + break; + } + } + if (!NextCase) + NextCase = Switch->getDefaultDest(); + return NextCase; + } + /// Clones a basic block, and adds it to the CFG. /// /// This function also includes updating phi nodes in the successors of the diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 3a8ade8..42db424 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2156,6 +2156,9 @@ bool GVNPass::processLoad(LoadInst *L) { if (!L->isUnordered()) return false; + if (L->getType()->isTokenLikeTy()) + return false; + if (L->use_empty()) { salvageAndRemoveInstruction(L); return true; diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 3ae570c..4f1ff7b 100644 --- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -20,9 +20,8 @@ using namespace llvm; -namespace { -void nameInstructions(Function &F) { - for (auto &Arg : F.args()) { +static void nameInstructions(Function &F) { + for (Argument &Arg : F.args()) { if (!Arg.hasName()) Arg.setName("arg"); } @@ -38,8 +37,6 @@ void nameInstructions(Function &F) { } } -} // namespace - PreservedAnalyses InstructionNamerPass::run(Function &F, FunctionAnalysisManager &FAM) { nameInstructions(F); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cfa8d27..2388375 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2245,6 +2245,26 @@ public: Align Alignment, const int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const; + /// Return true if an array of scalar loads can be replaced with a strided + /// load (with run-time stride). + /// \param PointerOps list of pointer arguments of loads. + /// \param ScalarTy type of loads. + /// \param CommonAlignment common alignement of loads as computed by + /// `computeCommonAlignment<LoadInst>`. + /// \param SortedIndicies is a list of indicies computed by this function such + /// that the sequence `PointerOps[SortedIndices[0]], + /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is + /// ordered by the coefficient of the stride. For example, if PointerOps is + /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be + /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be + /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`. + /// \param SPtrInfo If the function return `true`, it also sets all the fields + /// of `SPtrInfo` necessary to generate the strided load later. + bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const; + /// Checks if the given array of loads can be represented as a vectorized, /// scatter or just simple gather. /// \param VL list of loads. @@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, return false; } +bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, + Type *ScalarTy, Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const { + const unsigned Sz = PointerOps.size(); + FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz); + if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) || + !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment)) + return false; + if (const SCEV *Stride = + calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) { + SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); + SPtrInfo.StrideSCEV = Stride; + return true; + } + return false; +} + BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order, SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo, @@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( auto *VecTy = getWidenedType(ScalarTy, Sz); Align CommonAlignment = computeCommonAlignment<LoadInst>(VL); if (!IsSorted) { - if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) { - if (const SCEV *Stride = - calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order); - Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) { - SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); - SPtrInfo.StrideSCEV = Stride; - return LoadsState::StridedVectorize; - } - } + if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order, + SPtrInfo)) + return LoadsState::StridedVectorize; if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) || TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) @@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis { void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) { BasicBlock *Parent = nullptr; // Checks if the instruction has supported opcode. - auto IsSupportedInstruction = [&](Instruction *I) { + auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) { + if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I))) + return false; return I && isSupportedOpcode(I->getOpcode()) && (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I)); }; @@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis { // will be unable to schedule anyway. SmallDenseSet<Value *, 8> Operands; SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates; + bool AnyUndef = false; for (Value *V : VL) { auto *I = dyn_cast<Instruction>(V); - if (!I) + if (!I) { + AnyUndef |= isa<UndefValue>(V); continue; + } if (!DT.isReachableFromEntry(I->getParent())) continue; if (Candidates.empty()) { @@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis { if (P.second.size() < BestOpcodeNum) continue; for (Instruction *I : P.second) { - if (IsSupportedInstruction(I) && !Operands.contains(I)) { + if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) { MainOp = I; BestOpcodeNum = P.second.size(); break; diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll index f4ae66a..4ad5b38 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll @@ -70,6 +70,12 @@ define half @t3(half %x) { ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; +; USE-NEON-NO-GPRS-LABEL: t3: +; USE-NEON-NO-GPRS: // %bb.0: // %entry +; USE-NEON-NO-GPRS-NEXT: fcvtzs h0, h0 +; USE-NEON-NO-GPRS-NEXT: scvtf h0, h0 +; USE-NEON-NO-GPRS-NEXT: ret +; ; NONEON-NOSVE-LABEL: t3: ; NONEON-NOSVE: // %bb.0: // %entry ; NONEON-NOSVE-NEXT: fcvt s0, h0 @@ -147,6 +153,12 @@ define half @t6(half %x) { ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; +; USE-NEON-NO-GPRS-LABEL: t6: +; USE-NEON-NO-GPRS: // %bb.0: // %entry +; USE-NEON-NO-GPRS-NEXT: fcvtzu h0, h0 +; USE-NEON-NO-GPRS-NEXT: ucvtf h0, h0 +; USE-NEON-NO-GPRS-NEXT: ret +; ; NONEON-NOSVE-LABEL: t6: ; NONEON-NOSVE: // %bb.0: // %entry ; NONEON-NOSVE-NEXT: fcvt s0, h0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll index c92e5c5..edb3607 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -stop-after=irtranslator -o - %s | FileCheck %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=fiji -stop-after=irtranslator -o - %s | FileCheck %s declare align(8) dereferenceable(8) ptr @declared_with_ret_deref() #0 declare align(8) ptr @unknown_decl() #0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index e6a8bac..2356dad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -global-isel -mattr=-promote-alloca < %s | FileCheck -check-prefix=GFX942 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-promote-alloca < %s | FileCheck -check-prefix=GFX11 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-promote-alloca < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -new-reg-bank-select -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -new-reg-bank-select -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -global-isel -new-reg-bank-select -mattr=-promote-alloca < %s | FileCheck -check-prefix=GFX942 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -new-reg-bank-select -mattr=-promote-alloca < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -new-reg-bank-select -mattr=-promote-alloca < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=UNALIGNED_GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=UNALIGNED_GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca < %s | FileCheck -check-prefixes=UNALIGNED_GFX942 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca < %s | FileCheck -check-prefixes=UNALIGNED_GFX11 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca < %s | FileCheck -check-prefixes=UNALIGNED_GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -new-reg-bank-select -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=UNALIGNED_GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -new-reg-bank-select -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=UNALIGNED_GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -global-isel -new-reg-bank-select -mattr=-unaligned-access-mode -mattr=-promote-alloca < %s | FileCheck -check-prefixes=UNALIGNED_GFX942 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -new-reg-bank-select -mattr=-unaligned-access-mode -mattr=-promote-alloca < %s | FileCheck -check-prefixes=UNALIGNED_GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -new-reg-bank-select -mattr=-unaligned-access-mode -mattr=-promote-alloca < %s | FileCheck -check-prefixes=UNALIGNED_GFX12 %s define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX9-LABEL: store_load_sindex_kernel: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll index 920d8fa..ae7f6ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addrspace(1) %ptr.out) #0 { ; GCN-LABEL: v_insert_v64i32_37: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll index cfbb429..aabf256 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll @@ -1,11 +1,11 @@ -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-WGP %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1011 < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-WGP %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-WGP %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-CU %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,UNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-CU %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-CU %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,UNALIGNED %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-WGP %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1011 < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-WGP %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-WGP %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-CU %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,UNALIGNED %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-CU %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-CU %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,UNALIGNED %s ; GCN-LABEL: test_local_misaligned_v2: ; GCN-DAG: ds_{{read2|load_2addr}}_b32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll index 66cdfc2..7b923f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -1,14 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -o %t.v4.ll ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -o %t.v6.ll -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa < %t.v4.ll | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa < %t.v4.ll | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global < %t.v4.ll | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global < %t.v4.ll | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii < %t.v4.ll | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga < %t.v4.ll | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -amdgpu-enable-vopd=0 < %t.v6.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-unknown-amdhsa < %t.v4.ll | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-unknown-amdhsa < %t.v4.ll | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global < %t.v4.ll | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global < %t.v4.ll | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii < %t.v4.ll | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga < %t.v4.ll | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -amdgpu-enable-vopd=0 < %t.v6.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-sext.mir new file mode 100644 index 0000000..87836e2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-sext.mir @@ -0,0 +1,170 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: assert_sext_vgpr +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: assert_sext_vgpr + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: %assert_sext:vgpr(s32) = G_ASSERT_SEXT %copy, 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_sext(s32) + %copy:_(s32) = COPY $vgpr0 + %assert_sext:_(s32) = G_ASSERT_SEXT %copy, 4 + S_ENDPGM 0, implicit %assert_sext +... + +--- +name: assert_sext_sgpr +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr8 + + ; CHECK-LABEL: name: assert_sext_sgpr + ; CHECK: liveins: $sgpr8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:sgpr(s32) = COPY $sgpr8 + ; CHECK-NEXT: %assert_sext:sgpr(s32) = G_ASSERT_SEXT %copy, 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_sext(s32) + %copy:_(s32) = COPY $sgpr8 + %assert_sext:_(s32) = G_ASSERT_SEXT %copy, 4 + S_ENDPGM 0, implicit %assert_sext +... + +--- +name: assert_sext_agpr +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0 + + ; CHECK-LABEL: name: assert_sext_agpr + ; CHECK: liveins: $agpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:vgpr(s32) = COPY $agpr0 + ; CHECK-NEXT: %assert_sext:vgpr(s32) = G_ASSERT_SEXT %copy, 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_sext(s32) + %copy:_(s32) = COPY $agpr0 + %assert_sext:_(s32) = G_ASSERT_SEXT %copy, 4 + S_ENDPGM 0, implicit %assert_sext +... + +--- +name: assert_sext_vgpr_regclass +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: assert_sext_vgpr_regclass + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY %copy(s32) + ; CHECK-NEXT: %assert_sext:vgpr(s32) = G_ASSERT_SEXT [[COPY]], 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_sext(s32) + %copy:vgpr_32(s32) = COPY $vgpr0 + %assert_sext:_(s32) = G_ASSERT_SEXT %copy, 4 + S_ENDPGM 0, implicit %assert_sext +... + +--- +name: assert_sext_sgpr_regcllass +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr8 + + ; CHECK-LABEL: name: assert_sext_sgpr_regcllass + ; CHECK: liveins: $sgpr8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:sgpr_32(s32) = COPY $sgpr8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY %copy(s32) + ; CHECK-NEXT: %assert_sext:sgpr(s32) = G_ASSERT_SEXT [[COPY]], 4 + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_sext(s32) + %copy:sgpr_32(s32) = COPY $sgpr8 + %assert_sext:_(s32) = G_ASSERT_SEXT %copy, 4 + S_ENDPGM 0, implicit %assert_sext +... + +--- +name: assert_sext_vgpr_64 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: assert_sext_vgpr_64 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:vreg_64(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY %copy(s64) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:vgpr(s64) = G_ASSERT_SEXT [[COPY]], 4 + ; CHECK-NEXT: %assert_sext:vreg_64(s64) = COPY [[ASSERT_SEXT]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_sext(s64) + %copy:vreg_64(s64) = COPY $vgpr0_vgpr1 + %assert_sext:vreg_64(s64) = G_ASSERT_SEXT %copy, 4 + S_ENDPGM 0, implicit %assert_sext +... + +--- +name: assert_sext_sgpr_64 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: assert_sext_sgpr_64 + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY %copy(s64) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:sgpr(s64) = G_ASSERT_SEXT [[COPY]], 4 + ; CHECK-NEXT: %assert_sext:sreg_64(s64) = COPY [[ASSERT_SEXT]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_sext(s64) + %copy:sreg_64(s64) = COPY $sgpr0_sgpr1 + %assert_sext:sreg_64(s64) = G_ASSERT_SEXT %copy, 4 + S_ENDPGM 0, implicit %assert_sext +... + +--- +name: assert_sext_agpr_64 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0_agpr1 + + ; CHECK-LABEL: name: assert_sext_agpr_64 + ; CHECK: liveins: $agpr0_agpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:areg_64(s64) = COPY $agpr0_agpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY %copy(s64) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:vgpr(s64) = G_ASSERT_SEXT [[COPY]], 4 + ; CHECK-NEXT: %assert_sext:areg_64(s64) = COPY [[ASSERT_SEXT]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_sext(s64) + %copy:areg_64(s64) = COPY $agpr0_agpr1 + %assert_sext:areg_64(s64) = G_ASSERT_SEXT %copy, 4 + S_ENDPGM 0, implicit %assert_sext +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-zext.mir index 0bce908..c64a8ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-zext.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s --- name: assert_zext_vgpr @@ -53,8 +53,8 @@ body: | ; CHECK-LABEL: name: assert_zext_agpr ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %copy:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: %assert_zext:agpr(s32) = G_ASSERT_ZEXT %copy, 4 + ; CHECK-NEXT: %copy:vgpr(s32) = COPY $agpr0 + ; CHECK-NEXT: %assert_zext:vgpr(s32) = G_ASSERT_ZEXT %copy, 4 ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32) %copy:_(s32) = COPY $agpr0 %assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4 @@ -74,7 +74,8 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: %assert_zext:vgpr(s32) = G_ASSERT_ZEXT %copy, 4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY %copy(s32) + ; CHECK-NEXT: %assert_zext:vgpr(s32) = G_ASSERT_ZEXT [[COPY]], 4 ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32) %copy:vgpr_32(s32) = COPY $vgpr0 %assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4 @@ -94,9 +95,76 @@ body: | ; CHECK: liveins: $sgpr8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:sgpr_32(s32) = COPY $sgpr8 - ; CHECK-NEXT: %assert_zext:sgpr(s32) = G_ASSERT_ZEXT %copy, 4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY %copy(s32) + ; CHECK-NEXT: %assert_zext:sgpr(s32) = G_ASSERT_ZEXT [[COPY]], 4 ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32) %copy:sgpr_32(s32) = COPY $sgpr8 %assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4 S_ENDPGM 0, implicit %assert_zext ... + +--- +name: assert_zext_vgpr_64 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: assert_zext_vgpr_64 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:vreg_64(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY %copy(s64) + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:vgpr(s64) = G_ASSERT_ZEXT [[COPY]], 4 + ; CHECK-NEXT: %assert_zext:vreg_64(s64) = COPY [[ASSERT_ZEXT]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s64) + %copy:vreg_64(s64) = COPY $vgpr0_vgpr1 + %assert_zext:vreg_64(s64) = G_ASSERT_ZEXT %copy, 4 + S_ENDPGM 0, implicit %assert_zext +... + +--- +name: assert_zext_sgpr_64 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: assert_zext_sgpr_64 + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY %copy(s64) + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:sgpr(s64) = G_ASSERT_ZEXT [[COPY]], 4 + ; CHECK-NEXT: %assert_zext:sreg_64(s64) = COPY [[ASSERT_ZEXT]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s64) + %copy:sreg_64(s64) = COPY $sgpr0_sgpr1 + %assert_zext:sreg_64(s64) = G_ASSERT_ZEXT %copy, 4 + S_ENDPGM 0, implicit %assert_zext +... + +--- +name: assert_zext_agpr_64 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0_agpr1 + + ; CHECK-LABEL: name: assert_zext_agpr_64 + ; CHECK: liveins: $agpr0_agpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:areg_64(s64) = COPY $agpr0_agpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY %copy(s64) + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:vgpr(s64) = G_ASSERT_ZEXT [[COPY]], 4 + ; CHECK-NEXT: %assert_zext:areg_64(s64) = COPY [[ASSERT_ZEXT]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s64) + %copy:areg_64(s64) = COPY $agpr0_agpr1 + %assert_zext:areg_64(s64) = G_ASSERT_ZEXT %copy, 4 + S_ENDPGM 0, implicit %assert_zext +... diff --git a/llvm/test/CodeGen/AMDGPU/fptoui_uitofp.ll b/llvm/test/CodeGen/AMDGPU/fptoui_uitofp.ll new file mode 100644 index 0000000..49204f8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fptoui_uitofp.ll @@ -0,0 +1,296 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s + +define amdgpu_kernel void @fptoui_f32_to_i16_to_f32(ptr addrspace(1) %out, float %x) { +; GFX6-LABEL: fptoui_f32_to_i16_to_f32: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_trunc_f32_e64 v0, |s6| +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX9-LABEL: fptoui_f32_to_i16_to_f32: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_trunc_f32_e64 v1, |s2| +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +entry: + %ui = fptoui float %x to i16 + %fp = uitofp i16 %ui to float + store float %fp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptoui_f32_to_i32_to_f32(ptr addrspace(1) %out, float %x) { +; GFX6-LABEL: fptoui_f32_to_i32_to_f32: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_trunc_f32_e64 v0, |s6| +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX9-LABEL: fptoui_f32_to_i32_to_f32: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_trunc_f32_e64 v1, |s2| +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +entry: + %ui = fptoui float %x to i32 + %fp = uitofp i32 %ui to float + store float %fp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptoui_f32_to_i64_to_f32(ptr addrspace(1) %out, float %x) { +; GFX6-LABEL: fptoui_f32_to_i64_to_f32: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_trunc_f32_e64 v0, |s6| +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX9-LABEL: fptoui_f32_to_i64_to_f32: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_trunc_f32_e64 v1, |s2| +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +entry: + %ui = fptoui float %x to i64 + %fp = uitofp i64 %ui to float + store float %fp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptoui_f16_to_i16_to_f16(ptr addrspace(1) %out, half %x) { +; GFX6-LABEL: fptoui_f16_to_i16_to_f16: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, s0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_u32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX9-LABEL: fptoui_f16_to_i16_to_f16: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_trunc_f16_e64 v1, |s2| +; GFX9-NEXT: global_store_short v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +entry: + %ui = fptoui half %x to i16 + %fp = uitofp i16 %ui to half + store half %fp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptoui_f16_to_i32_to_f16(ptr addrspace(1) %out, half %x) { +; GFX6-LABEL: fptoui_f16_to_i32_to_f16: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |s0| +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GFX6-NEXT: v_trunc_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX9-LABEL: fptoui_f16_to_i32_to_f16: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_trunc_f16_e64 v1, |s2| +; GFX9-NEXT: global_store_short v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +entry: + %ui = fptoui half %x to i32 + %fp = uitofp i32 %ui to half + store half %fp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptoui_f16_to_i64_to_f16(ptr addrspace(1) %out, half %x) { +; GFX6-LABEL: fptoui_f16_to_i64_to_f16: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |s0| +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GFX6-NEXT: v_trunc_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX9-LABEL: fptoui_f16_to_i64_to_f16: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_trunc_f16_e64 v1, |s2| +; GFX9-NEXT: global_store_short v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +entry: + %ui = fptoui half %x to i64 + %fp = uitofp i64 %ui to half + store half %fp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptoui_f64_to_i16_to_f64(ptr addrspace(1) %out, double %x) { +; GFX6-LABEL: fptoui_f64_to_i16_to_f64: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_cvt_u32_f64_e32 v0, s[2:3] +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 +; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX9-LABEL: fptoui_f64_to_i16_to_f64: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_trunc_f64_e64 v[0:1], |s[2:3]| +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm +entry: + %ui = fptoui double %x to i16 + %fp = uitofp i16 %ui to double + store double %fp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptoui_f64_to_i32_to_f64(ptr addrspace(1) %out, double %x) { +; GFX6-LABEL: fptoui_f64_to_i32_to_f64: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_cvt_u32_f64_e32 v0, s[2:3] +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 +; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX9-LABEL: fptoui_f64_to_i32_to_f64: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_trunc_f64_e64 v[0:1], |s[2:3]| +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm +entry: + %ui = fptoui double %x to i32 + %fp = uitofp i32 %ui to double + store double %fp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptoui_f64_to_i64_to_f64(ptr addrspace(1) %out, double %x) { +; GFX6-LABEL: fptoui_f64_to_i64_to_f64: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s5, 0xfffff +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: v_not_b32_e32 v0, 31 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_bfe_u32 s7, s3, 0xb0014 +; GFX6-NEXT: s_addk_i32 s7, 0xfc01 +; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s7 +; GFX6-NEXT: s_and_b32 s8, s3, 0x80000000 +; GFX6-NEXT: s_andn2_b64 s[4:5], s[2:3], s[4:5] +; GFX6-NEXT: s_cmp_lt_i32 s7, 0 +; GFX6-NEXT: s_cselect_b32 s4, 0, s4 +; GFX6-NEXT: s_cselect_b32 s5, s8, s5 +; GFX6-NEXT: s_cmp_gt_i32 s7, 51 +; GFX6-NEXT: s_cselect_b32 s3, s3, s5 +; GFX6-NEXT: s_cselect_b32 s2, s2, s4 +; GFX6-NEXT: v_ldexp_f64 v[0:1], s[2:3], v0 +; GFX6-NEXT: v_mov_b32_e32 v4, -1 +; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] +; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff +; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] +; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3 +; GFX6-NEXT: s_mov_b32 s4, 0 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc +; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; GFX6-NEXT: v_mov_b32_e32 v2, s2 +; GFX6-NEXT: s_mov_b32 s5, 0xc1f00000 +; GFX6-NEXT: v_mov_b32_e32 v3, s3 +; GFX6-NEXT: v_fma_f64 v[2:3], v[0:1], s[4:5], v[2:3] +; GFX6-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] +; GFX6-NEXT: v_cvt_u32_f64_e32 v2, v[2:3] +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_mov_b32 s4, s0 +; GFX6-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 +; GFX6-NEXT: v_cvt_f64_u32_e32 v[2:3], v2 +; GFX6-NEXT: s_mov_b32 s5, s1 +; GFX6-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32 +; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GFX6-NEXT: s_endpgm +; +; GFX9-LABEL: fptoui_f64_to_i64_to_f64: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_trunc_f64_e64 v[0:1], |s[2:3]| +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm +entry: + %ui = fptoui double %x to i64 + %fp = uitofp i64 %ui to double + store double %fp, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll index 67d0410..3324018 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll @@ -3,11 +3,11 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SDAG %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) { ; SI-LABEL: is_private_vgpr: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll index 63333ed..355d002 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll @@ -3,11 +3,11 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SDAG %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) { ; CIT-LABEL: is_local_vgpr: diff --git a/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll b/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll new file mode 100644 index 0000000..f53aaaa --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll @@ -0,0 +1,625 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s + +declare i32 @llvm.ctpop.i32(i32) +declare i64 @llvm.ctpop.i64(i64) +declare i32 @llvm.amdgcn.s.quadmask.i32(i32) +declare i64 @llvm.amdgcn.s.quadmask.i64(i64) + +define amdgpu_ps i32 @shl32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: shl32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_lshl_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = shl i32 %val0, %val1 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @shl64(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: shl64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = shl i64 %val0, %val1 + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @lshr32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: lshr32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_lshr_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = lshr i32 %val0, %val1 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @lshr64(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: lshr64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = lshr i64 %val0, %val1 + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @ashr32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: ashr32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_ashr_i32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = ashr i32 %val0, %val1 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @ashr64(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: ashr64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = ashr i64 %val0, %val1 + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @abs32(i32 inreg %val0) { +; CHECK-LABEL: abs32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_abs_i32 s0, s0 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %neg = sub i32 0, %val0 + %cond = icmp sgt i32 %val0, %neg + %result = select i1 %cond, i32 %val0, i32 %neg + call void asm "; use $0", "s"(i32 %result) + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @and32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: and32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = and i32 %val0, %val1 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @and64(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: and64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = and i64 %val0, %val1 + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @or32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: or32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = or i32 %val0, %val1 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @or64(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: or64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = or i64 %val0, %val1 + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @xor32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: xor32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_xor_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = xor i32 %val0, %val1 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @xor64(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: xor64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = xor i64 %val0, %val1 + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @nand32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: nand32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_nand_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = and i32 %val0, %val1 + %result2 = xor i32 %result, -1 + call void asm "; use $0", "s"(i32 %result2) + %cmp = icmp ne i32 %result2, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @nand64(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: nand64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_nand_b64 s[0:1], s[0:1], s[2:3] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = and i64 %val0, %val1 + %result2 = xor i64 %result, -1 + call void asm "; use $0", "s"(i64 %result2) + %cmp = icmp ne i64 %result2, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @nor32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: nor32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_nor_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = or i32 %val0, %val1 + %result2 = xor i32 %result, -1 + call void asm "; use $0", "s"(i32 %result2) + %cmp = icmp ne i32 %result2, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @nor64(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: nor64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_nor_b64 s[0:1], s[0:1], s[2:3] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = or i64 %val0, %val1 + %result2 = xor i64 %result, -1 + call void asm "; use $0", "s"(i64 %result2) + %cmp = icmp ne i64 %result2, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @xnor32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: xnor32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_xnor_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = xor i32 %val0, %val1 + %result2 = xor i32 %result, -1 + call void asm "; use $0", "s"(i32 %result2) + %cmp = icmp ne i32 %result2, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @xnor64(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: xnor64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_xnor_b64 s[0:1], s[0:1], s[2:3] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = xor i64 %val0, %val1 + %result2 = xor i64 %result, -1 + call void asm "; use $0", "s"(i64 %result2) + %cmp = icmp ne i64 %result2, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @andn232(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: andn232: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_andn2_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %nval1 = xor i32 %val1, -1 + %result = and i32 %val0, %nval1 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @nandn264(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: nandn264: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %nval1 = xor i64 %val1, -1 + %result = and i64 %val0, %nval1 + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @orn232(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: orn232: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_orn2_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %nval1 = xor i32 %val1, -1 + %result = or i32 %val0, %nval1 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @orn264(i64 inreg %val0, i64 inreg %val1) { +; CHECK-LABEL: orn264: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_orn2_b64 s[0:1], s[0:1], s[2:3] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %nval1 = xor i64 %val1, -1 + %result = or i64 %val0, %nval1 + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @bfe_i32(i32 inreg %val0) { +; CHECK-LABEL: bfe_i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_bfe_i32 s0, s0, 0x80010 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %shl = shl i32 %val0, 8 + %result = ashr i32 %shl, 24 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @bfe_i64(i64 inreg %val0) { +; CHECK-LABEL: bfe_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_bfe_i64 s[2:3], s[0:1], 0x80000 +; CHECK-NEXT: s_and_b32 s0, s0, 0xff +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[2:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ; return to shader part epilog + %shl = shl i64 %val0, 56 + %result = ashr i64 %shl, 56 + call void asm "; use $0", "s"(i64 %result) + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @bfe_u32(i32 inreg %val0) { +; CHECK-LABEL: bfe_u32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_bfe_u32 s0, s0, 0x80010 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %shl = shl i32 %val0, 8 + %result = lshr i32 %shl, 24 + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @bfe_u64(i64 inreg %val0) { +; CHECK-LABEL: bfe_u64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, s0, 0xff +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %shl = shl i64 %val0, 56 + %result = lshr i64 %shl, 56 + call void asm "; use $0", "s"(i64 %result) + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @bcnt032(i32 inreg %val0) { +; CHECK-LABEL: bcnt032: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_bcnt1_i32_b32 s0, s0 +; CHECK-NEXT: s_sub_i32 s0, 32, s0 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone + %result2 = sub i32 32, %result + call void asm "; use $0", "s"(i32 %result2) + %cmp = icmp ne i32 %result2, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @bcnt064(i64 inreg %val0) { +; CHECK-LABEL: bcnt064: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; CHECK-NEXT: s_sub_u32 s0, 64, s0 +; CHECK-NEXT: s_subb_u32 s1, 0, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = call i64 @llvm.ctpop.i64(i64 %val0) nounwind readnone + %result2 = sub i64 64, %result + call void asm "; use $0", "s"(i64 %result2) + %cmp = icmp ne i64 %result2, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @bcnt132(i32 inreg %val0) { +; CHECK-LABEL: bcnt132: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_bcnt1_i32_b32 s0, s0 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone + call void asm "; use $0", "s"(i32 %result) + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @bcnt164(i64 inreg %val0) { +; CHECK-LABEL: bcnt164: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = call i64 @llvm.ctpop.i64(i64 %val0) nounwind readnone + call void asm "; use $0", "s"(i64 %result) + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @quadmask32(i32 inreg %val0) { +; CHECK-LABEL: quadmask32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_quadmask_b32 s0, s0 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = call i32 @llvm.amdgcn.s.quadmask.i32(i32 %val0) nounwind readnone + call void asm "; use $0", "s"(i32 %result) + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @quadmask64(i64 inreg %val0) { +; CHECK-LABEL: quadmask64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_quadmask_b64 s[0:1], s[0:1] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = call i64 @llvm.amdgcn.s.quadmask.i64(i64 %val0) nounwind readnone + call void asm "; use $0", "s"(i64 %result) + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @not32(i32 inreg %val0) { +; CHECK-LABEL: not32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_not_b32 s0, s0 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = xor i32 %val0, -1 + call void asm "; use $0", "s"(i32 %result) + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + +define amdgpu_ps i32 @not64(i64 inreg %val0) { +; CHECK-LABEL: not64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_not_b64 s[0:1], s[0:1] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %result = xor i64 %val0, -1 + call void asm "; use $0", "s"(i64 %result) + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} diff --git a/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir b/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir index 8b467eb..75ae76f 100644 --- a/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir +++ b/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir @@ -1,7 +1,7 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 # RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GFX950 %s # RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx942 -run-pass=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GFX942 %s -# RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GFX90a %s +# RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GFX90A %s --- name: test_pk_mul_unpacking_f32 @@ -57,26 +57,26 @@ body: | ; GFX942-NEXT: $vgpr17 = nofpexcept V_MUL_F32_e64 0, killed $sgpr31, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_pk_mul_unpacking_f32 - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 8, killed $sgpr30_sgpr31, 8, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_pk_mul_unpacking_f32 + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 8, killed $sgpr30_sgpr31, 8, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec S_WAITCNT 49279 @@ -150,26 +150,26 @@ body: | ; GFX942-NEXT: $vgpr17 = nofpexcept V_MUL_F32_e64 0, killed $sgpr31, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_op_sel_selection_unpacking_f32 - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 8, killed $sgpr30_sgpr31, 12, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_op_sel_selection_unpacking_f32 + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 8, killed $sgpr30_sgpr31, 12, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec S_WAITCNT 49279 @@ -243,26 +243,26 @@ body: | ; GFX942-NEXT: $vgpr17 = nofpexcept V_MUL_F32_e64 0, killed $sgpr30, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_op_sel_hi_selection_unpacking_f32 - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 0, killed $sgpr30_sgpr31, 8, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_op_sel_hi_selection_unpacking_f32 + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 0, killed $sgpr30_sgpr31, 8, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec S_WAITCNT 49279 @@ -370,41 +370,41 @@ body: | ; GFX942-NEXT: renamable $vgpr10_vgpr11 = nofpexcept V_PK_ADD_F32 8, killed $sgpr10_sgpr11, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_pk_add_unpacking_f32 - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr4 = V_MOV_B32_e32 2, implicit $exec - ; GFX90a-NEXT: renamable $vgpr5 = V_MOV_B32_e32 1, implicit $exec - ; GFX90a-NEXT: renamable $vgpr2 = V_MOV_B32_e32 4, implicit $exec - ; GFX90a-NEXT: renamable $vgpr3 = V_MOV_B32_e32 3, implicit $exec - ; GFX90a-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: renamable $vgpr16 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90a-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec - ; GFX90a-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr18, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr19, implicit $exec, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_BF8_BF8_e64 killed $vgpr4_vgpr5, killed $vgpr2_vgpr3, killed $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 1, 2, 3, implicit $mode, implicit $exec, implicit $mode, implicit $exec - ; GFX90a-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, killed $sgpr2_sgpr3, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: renamable $vgpr6_vgpr7 = nofpexcept V_PK_ADD_F32 8, killed $sgpr6_sgpr7, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: renamable $vgpr4_vgpr5 = nofpexcept V_PK_ADD_F32 8, killed $sgpr4_sgpr5, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: renamable $vgpr10_vgpr11 = nofpexcept V_PK_ADD_F32 8, killed $sgpr10_sgpr11, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_pk_add_unpacking_f32 + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr4 = V_MOV_B32_e32 2, implicit $exec + ; GFX90A-NEXT: renamable $vgpr5 = V_MOV_B32_e32 1, implicit $exec + ; GFX90A-NEXT: renamable $vgpr2 = V_MOV_B32_e32 4, implicit $exec + ; GFX90A-NEXT: renamable $vgpr3 = V_MOV_B32_e32 3, implicit $exec + ; GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: renamable $vgpr16 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec + ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr18, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr19, implicit $exec, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_BF8_BF8_e64 killed $vgpr4_vgpr5, killed $vgpr2_vgpr3, killed $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 1, 2, 3, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; GFX90A-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, killed $sgpr2_sgpr3, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = nofpexcept V_PK_ADD_F32 8, killed $sgpr6_sgpr7, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nofpexcept V_PK_ADD_F32 8, killed $sgpr4_sgpr5, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = nofpexcept V_PK_ADD_F32 8, killed $sgpr10_sgpr11, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr4 = V_MOV_B32_e32 2, implicit $exec renamable $vgpr5 = V_MOV_B32_e32 1, implicit $exec @@ -438,7 +438,6 @@ body: | renamable $vgpr10_vgpr11 = nofpexcept V_PK_ADD_F32 8, killed $sgpr10_sgpr11, 8, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec S_ENDPGM 0 - ... --- name: test_pk_fma_unpacking_f32 @@ -490,24 +489,24 @@ body: | ; GFX942-NEXT: $vgpr17 = nofpexcept V_FMA_F32_e64 0, killed $sgpr30, 0, killed $vgpr5, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_pk_fma_unpacking_f32 - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 0, killed $sgpr30_sgpr31, 8, killed $vgpr4_vgpr5, 8, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_pk_fma_unpacking_f32 + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 0, killed $sgpr30_sgpr31, 8, killed $vgpr4_vgpr5, 8, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 @@ -577,25 +576,25 @@ body: | ; GFX942-NEXT: renamable $vgpr4_vgpr5 = nofpexcept V_PK_FMA_F32 8, killed $sgpr30_sgpr31, 8, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_unpacking_does_not_introduce_rw_dependency - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: renamable $vgpr4_vgpr5 = nofpexcept V_PK_MUL_F32 8, $sgpr30_sgpr31, 12, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: renamable $vgpr4_vgpr5 = nofpexcept V_PK_FMA_F32 8, killed $sgpr30_sgpr31, 8, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_unpacking_does_not_introduce_rw_dependency + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nofpexcept V_PK_MUL_F32 8, $sgpr30_sgpr31, 12, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nofpexcept V_PK_FMA_F32 8, killed $sgpr30_sgpr31, 8, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 @@ -670,28 +669,28 @@ body: | ; GFX942-NEXT: $vgpr10_vgpr11 = V_PK_MOV_B32 12, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_opcodes_not_supported_for_unpacking_are_skipped - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_dpp $vgpr4, $vgpr4, 228, 15, 15, -1, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_CVT_PK_BF8_F32_e64 0, killed $vgpr4, 0, $vgpr4, $vgpr5, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: $vgpr8_vgpr9 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: $vgpr10_vgpr11 = V_PK_MOV_B32 12, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_opcodes_not_supported_for_unpacking_are_skipped + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_dpp $vgpr4, $vgpr4, 228, 15, 15, -1, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_CVT_PK_BF8_F32_e64 0, killed $vgpr4, 0, $vgpr4, $vgpr5, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: $vgpr8_vgpr9 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: $vgpr10_vgpr11 = V_PK_MOV_B32 12, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 @@ -705,14 +704,11 @@ body: | early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - $vgpr4 = V_MOV_B32_dpp $vgpr4, $vgpr4, 228, 15, 15, -1, implicit $exec $vgpr5 = V_CVT_PK_BF8_F32_e64 0, killed $vgpr4, 0, $vgpr4, $vgpr5, 0, implicit $mode, implicit $exec - $vgpr6_vgpr7 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec $vgpr8_vgpr9 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec $vgpr10_vgpr11 = V_PK_MOV_B32 12, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM 0 ... @@ -770,26 +766,26 @@ body: | ; GFX942-NEXT: $vgpr17 = nofpexcept V_FMA_F32_e64 0, killed $sgpr30, 0, killed $vgpr5, 0, killed $vgpr7, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_opsel_register_is_correctly_marked_as_killed - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr6 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr7 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 0, killed $sgpr30_sgpr31, 12, killed $vgpr4_vgpr5, 8, killed $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_opsel_register_is_correctly_marked_as_killed + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr6 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr7 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 0, killed $sgpr30_sgpr31, 12, killed $vgpr4_vgpr5, 8, killed $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 @@ -861,26 +857,26 @@ body: | ; GFX942-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 0, killed $sgpr30_sgpr31, 12, killed $vgpr4_vgpr5, 8, killed $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_inst_dependent_on_mfma_are_not_unpacked - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec - ; GFX90a-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec - ; GFX90a-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec - ; GFX90a-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 0, killed $sgpr30_sgpr31, 12, killed $vgpr4_vgpr5, 8, killed $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_inst_dependent_on_mfma_are_not_unpacked + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 0, killed $sgpr30_sgpr31, 12, killed $vgpr4_vgpr5, 8, killed $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 @@ -950,25 +946,25 @@ body: | ; GFX942-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 8, killed $sgpr30_sgpr31, 8, killed $vgpr4_vgpr5, 8, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_mfma_def_using_instr_blocks_unpacking - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec - ; GFX90a-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 8, killed $sgpr30_sgpr31, 8, killed $vgpr4_vgpr5, 8, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_mfma_def_using_instr_blocks_unpacking + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_FMA_F32 8, killed $sgpr30_sgpr31, 8, killed $vgpr4_vgpr5, 8, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 @@ -1041,26 +1037,26 @@ body: | ; GFX942-NEXT: $vgpr17 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_unpacking_with_imm_input - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 8, 1065353216, 8, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_unpacking_with_imm_input + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 8, 1065353216, 8, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec S_WAITCNT 49279 @@ -1134,26 +1130,26 @@ body: | ; GFX942-NEXT: $vgpr17 = nofpexcept V_MUL_F32_e64 0, killed $sgpr31, 1, killed $vgpr5, 0, 0, implicit $mode, implicit $exec ; GFX942-NEXT: S_ENDPGM 0 ; - ; GFX90a-LABEL: name: test_neg_lo_hi_post_unpacking - ; GFX90a: liveins: $sgpr4_sgpr5 - ; GFX90a-NEXT: {{ $}} - ; GFX90a-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 - ; GFX90a-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 - ; GFX90a-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 - ; GFX90a-NEXT: S_WAITCNT 49279 - ; GFX90a-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 - ; GFX90a-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec - ; GFX90a-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX90a-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec - ; GFX90a-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec - ; GFX90a-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec - ; GFX90a-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 8, killed $sgpr30_sgpr31, 11, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX90a-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_neg_lo_hi_post_unpacking + ; GFX90A: liveins: $sgpr4_sgpr5 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 + ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0 + ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0 + ; GFX90A-NEXT: S_WAITCNT 49279 + ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47 + ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec + ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec + ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 8, killed $sgpr30_sgpr31, 11, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0 early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0 renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec S_WAITCNT 49279 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll new file mode 100644 index 0000000..a2d6ca9 --- /dev/null +++ b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll @@ -0,0 +1,39 @@ +;; Test if a potential indirect call target function which has internal linkage and +;; address taken has its type ID emitted to callgraph section. +;; This test also makes sure that callback functions which meet the above constraint +;; are handled correctly. + +; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -o - < %s | FileCheck %s + +declare !type !0 void @_Z6doWorkPFviE(ptr) + +define i32 @_Z4testv() !type !1 { +entry: + call void @_Z6doWorkPFviE(ptr nonnull @_ZL10myCallbacki) + ret i32 0 +} + +; CHECK: _ZL10myCallbacki: +; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: +define internal void @_ZL10myCallbacki(i32 %value) !type !2 { +entry: + %sink = alloca i32, align 4 + store volatile i32 %value, ptr %sink, align 4 + %i1 = load volatile i32, ptr %sink, align 4 + ret void +} + +!0 = !{i64 0, !"_ZTSFvPFviEE.generalized"} +!1 = !{i64 0, !"_ZTSFivE.generalized"} +!2 = !{i64 0, !"_ZTSFviE.generalized"} + +; CHECK: .section .callgraph,"o",%progbits,.text +;; Version +; CHECK-NEXT: .byte 0 +;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. +; CHECK-NEXT: .byte 1 +;; Function Entry PC +; CHECK-NEXT: .long [[LABEL_FUNC]] +;; Function type ID -5212364466660467813 +; CHECK-NEXT: .long 1154849691 +; CHECK-NEXT: .long 3081369122 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll new file mode 100644 index 0000000..bf5249e --- /dev/null +++ b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll @@ -0,0 +1,63 @@ +;; Test if temporary labels are generated for each indirect callsite. +;; Test if the .callgraph section contains the MD5 hash of callees' type (type id) +;; is correctly paired with its corresponding temporary label generated for indirect +;; call sites annotated with !callee_type metadata. +;; Test if the .callgraph section contains unique direct callees. + +; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -o - < %s | FileCheck %s + +declare !type !0 void @direct_foo() +declare !type !1 i32 @direct_bar(i8) +declare !type !2 ptr @direct_baz(ptr) + +; CHECK: ball: +; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: +define ptr @ball() { +entry: + call void @direct_foo() + %fp_foo_val = load ptr, ptr null, align 8 + call void (...) %fp_foo_val(), !callee_type !0 + call void @direct_foo() + %fp_bar_val = load ptr, ptr null, align 8 + %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !2 + %call_fp_bar_direct = call i32 @direct_bar(i8 1) + %fp_baz_val = load ptr, ptr null, align 8 + %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4 + call void @direct_foo() + %call_fp_baz_direct = call ptr @direct_baz(ptr null) + call void @direct_foo() + ret ptr %call_fp_baz +} + +!0 = !{!1} +!1 = !{i64 0, !"_ZTSFvE.generalized"} +!2 = !{!3} +!3 = !{i64 0, !"_ZTSFicE.generalized"} +!4 = !{!5} +!5 = !{i64 0, !"_ZTSFPvS_E.generalized"} + +; CHECK: .section .callgraph,"o",%progbits,.text +;; Version +; CHECK-NEXT: .byte 0 +;; Flags +; CHECK-NEXT: .byte 7 +;; Function Entry PC +; CHECK-NEXT: .long [[LABEL_FUNC]] +;; Function type ID -- set to 0 as no type metadata attached to function. +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 0 +;; Number of unique direct callees. +; CHECK-NEXT: .byte 3 +;; Direct callees. +; CHECK-NEXT: .long direct_foo +; CHECK-NEXT: .long direct_bar +; CHECK-NEXT: .long direct_baz +;; Number of unique indirect target type IDs. +; CHECK-NEXT: .byte 3 +;; Indirect type IDs. +; CHECK-NEXT: .long 838288420 +; CHECK-NEXT: .long 1053552373 +; CHECK-NEXT: .long 1505527380 +; CHECK-NEXT: .long 814631809 +; CHECK-NEXT: .long 342417018 +; CHECK-NEXT: .long 2013108216 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll new file mode 100644 index 0000000..d577603 --- /dev/null +++ b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll @@ -0,0 +1,34 @@ +;; Tests that we store the type identifiers in .callgraph section of the object file for tailcalls. + +; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ +; RUN: llvm-readelf -x .callgraph - | FileCheck %s + +define i32 @check_tailcall(ptr %func, i8 %x) !type !0 { +entry: + %call = tail call i32 %func(i8 signext %x), !callee_type !1 + ret i32 %call +} + +define i32 @main(i32 %argc) !type !3 { +entry: + %andop = and i32 %argc, 1 + %cmp = icmp eq i32 %andop, 0 + %foo.bar = select i1 %cmp, ptr @foo, ptr @bar + %call.i = tail call i32 %foo.bar(i8 signext 97), !callee_type !1 + ret i32 %call.i +} + +declare !type !2 i32 @foo(i8 signext) + +declare !type !2 i32 @bar(i8 signext) + +!0 = !{i64 0, !"_ZTSFiPvcE.generalized"} +!1 = !{!2} +!2 = !{i64 0, !"_ZTSFicE.generalized"} +!3 = !{i64 0, !"_ZTSFiiE.generalized"} + +; CHECK: Hex dump of section '.callgraph': +; CHECK-NEXT: 0x00000000 00050000 00008e19 0b7f3326 e3000154 +; CHECK-NEXT: 0x00000010 86bc5981 4b8e3000 05100000 00a150b8 +;; Verify that the type id 0x308e4b8159bc8654 is in section. +; CHECK-NEXT: 0x00000020 3e0cfe3c b2015486 bc59814b 8e30 diff --git a/llvm/test/CodeGen/ARM/call-graph-section.ll b/llvm/test/CodeGen/ARM/call-graph-section.ll new file mode 100644 index 0000000..928a1067 --- /dev/null +++ b/llvm/test/CodeGen/ARM/call-graph-section.ll @@ -0,0 +1,37 @@ +;; Tests that we store the type identifiers in .callgraph section of the object file. + +; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ +; RUN: llvm-readelf -x .callgraph - | FileCheck %s + +declare !type !0 void @foo() + +declare !type !1 i32 @bar(i8) + +declare !type !2 ptr @baz(ptr) + +define void @main() { +entry: + %fp_foo_val = load ptr, ptr null, align 8 + call void (...) %fp_foo_val(), !callee_type !1 + %fp_bar_val = load ptr, ptr null, align 8 + %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !3 + %fp_baz_val = load ptr, ptr null, align 8 + %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4 + ret void +} + +;; Check that the numeric type id (md5 hash) for the below type ids are emitted +;; to the callgraph section. +!0 = !{i64 0, !"_ZTSFvE.generalized"} +!1 = !{!0} +!2 = !{i64 0, !"_ZTSFicE.generalized"} +!3 = !{!2} +!4 = !{!5} +!5 = !{i64 0, !"_ZTSFPvS_E.generalized"} + +;; Make sure following type IDs are in call graph section +;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814 +; CHECK: Hex dump of section '.callgraph': +; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000324 +; CHECK-NEXT: 0x00000010 44f731f5 eecb3e54 86bc5981 4b8e307a +; CHECK-NEXT: 0x00000020 de6814f8 97fd77 diff --git a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll new file mode 100644 index 0000000..2aea9c1 --- /dev/null +++ b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll @@ -0,0 +1,38 @@ +;; Test if a potential indirect call target function which has internal linkage and +;; address taken has its type ID emitted to callgraph section. +;; This test also makes sure that callback functions which meet the above constraint +;; are handled correctly. + +; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -o - < %s | FileCheck %s + +declare !type !0 void @_Z6doWorkPFviE(ptr) + +define i32 @_Z4testv() !type !1 { +entry: + call void @_Z6doWorkPFviE(ptr nonnull @_ZL10myCallbacki) + ret i32 0 +} + +; CHECK: _ZL10myCallbacki: +; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: +define internal void @_ZL10myCallbacki(i32 %value) !type !2 { +entry: + %sink = alloca i32, align 4 + store volatile i32 %value, ptr %sink, align 4 + %i1 = load volatile i32, ptr %sink, align 4 + ret void +} + +!0 = !{i64 0, !"_ZTSFvPFviEE.generalized"} +!1 = !{i64 0, !"_ZTSFivE.generalized"} +!2 = !{i64 0, !"_ZTSFviE.generalized"} + +; CHECK: .section .callgraph,"o",@progbits,.text +;; Version +; CHECK-NEXT: .byte 0 +;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. +; CHECK-NEXT: .byte 1 +;; Function Entry PC +; CHECK-NEXT: .quad [[LABEL_FUNC]] +;; Function type ID +; CHECK-NEXT: .quad -5212364466660467813 diff --git a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll index f0dbc31..1aabf66 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll @@ -15,16 +15,13 @@ declare !type !2 ptr @direct_baz(ptr) define ptr @ball() { entry: call void @direct_foo() - %fp_foo_val = load ptr, ptr null, align 8 - ; CHECK: [[LABEL_TMP0:\.L.*]]: + %fp_foo_val = load ptr, ptr null, align 8 call void (...) %fp_foo_val(), !callee_type !0 call void @direct_foo() - %fp_bar_val = load ptr, ptr null, align 8 - ; CHECK: [[LABEL_TMP1:\.L.*]]: + %fp_bar_val = load ptr, ptr null, align 8 %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !2 %call_fp_bar_direct = call i32 @direct_bar(i8 1) %fp_baz_val = load ptr, ptr null, align 8 - ; CHECK: [[LABEL_TMP2:\.L.*]]: %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4 call void @direct_foo() %call_fp_baz_direct = call ptr @direct_baz(ptr null) @@ -32,29 +29,31 @@ entry: ret ptr %call_fp_baz } -; CHECK: .section .callgraph,"o",@progbits,.text - -; CHECK-NEXT: .quad 0 -; CHECK-NEXT: .quad [[LABEL_FUNC]] -; CHECK-NEXT: .quad 1 -; CHECK-NEXT: .quad 3 !0 = !{!1} !1 = !{i64 0, !"_ZTSFvE.generalized"} -;; Test for MD5 hash of _ZTSFvE.generalized and the generated temporary callsite label. -; CHECK-NEXT: .quad 4524972987496481828 -; CHECK-NEXT: .quad [[LABEL_TMP0]] !2 = !{!3} !3 = !{i64 0, !"_ZTSFicE.generalized"} -;; Test for MD5 hash of _ZTSFicE.generalized and the generated temporary callsite label. -; CHECK-NEXT: .quad 3498816979441845844 -; CHECK-NEXT: .quad [[LABEL_TMP1]] !4 = !{!5} !5 = !{i64 0, !"_ZTSFPvS_E.generalized"} -;; Test for MD5 hash of _ZTSFPvS_E.generalized and the generated temporary callsite label. -; CHECK-NEXT: .quad 8646233951371320954 -; CHECK-NEXT: .quad [[LABEL_TMP2]] -;; Test for number of direct calls and {callsite_label, callee} pairs. -; CHECK-NEXT: .quad 3 + +; CHECK: .section .callgraph,"o",@progbits,.text +;; Version +; CHECK-NEXT: .byte 0 +;; Flags +; CHECK-NEXT: .byte 7 +;; Function Entry PC +; CHECK-NEXT: .quad [[LABEL_FUNC]] +;; Function type ID -- set to 0 as no type metadata attached to function. +; CHECK-NEXT: .quad 0 +;; Number of unique direct callees. +; CHECK-NEXT: .byte 3 +;; Direct callees. ; CHECK-NEXT: .quad direct_foo ; CHECK-NEXT: .quad direct_bar ; CHECK-NEXT: .quad direct_baz +;; Number of unique indirect target type IDs. +; CHECK-NEXT: .byte 3 +;; Indirect type IDs. +; CHECK-NEXT: .quad 4524972987496481828 +; CHECK-NEXT: .quad 3498816979441845844 +; CHECK-NEXT: .quad 8646233951371320954 diff --git a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll index fa14a98..34dc5b8 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll @@ -22,13 +22,14 @@ declare !type !2 i32 @foo(i8 signext) declare !type !2 i32 @bar(i8 signext) -;; Check that the numeric type id (md5 hash) for the below type ids are emitted -;; to the callgraph section. - -; CHECK: Hex dump of section '.callgraph': - !0 = !{i64 0, !"_ZTSFiPvcE.generalized"} !1 = !{!2} -; CHECK-DAG: 5486bc59 814b8e30 !2 = !{i64 0, !"_ZTSFicE.generalized"} !3 = !{i64 0, !"_ZTSFiiE.generalized"} + +; CHECK: Hex dump of section '.callgraph': +; CHECK-NEXT: 0x00000000 00050000 00000000 00008e19 0b7f3326 +; CHECK-NEXT: 0x00000010 e3000154 86bc5981 4b8e3000 05000000 +;; Verify that the type id 0x308e4b8159bc8654 is in section. +; CHECK-NEXT: 0x00000020 00000000 00a150b8 3e0cfe3c b2015486 +; CHECK-NEXT: 0x00000030 bc59814b 8e30 diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll index 66d009c..c144a24 100644 --- a/llvm/test/CodeGen/X86/call-graph-section.ll +++ b/llvm/test/CodeGen/X86/call-graph-section.ll @@ -22,15 +22,16 @@ entry: ;; Check that the numeric type id (md5 hash) for the below type ids are emitted ;; to the callgraph section. - -; CHECK: Hex dump of section '.callgraph': - -; CHECK-DAG: 2444f731 f5eecb3e !0 = !{i64 0, !"_ZTSFvE.generalized"} !1 = !{!0} -; CHECK-DAG: 5486bc59 814b8e30 !2 = !{i64 0, !"_ZTSFicE.generalized"} !3 = !{!2} -; CHECK-DAG: 7ade6814 f897fd77 !4 = !{!5} !5 = !{i64 0, !"_ZTSFPvS_E.generalized"} + +;; Make sure following type IDs are in call graph section +;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814 +; CHECK: Hex dump of section '.callgraph': +; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000000 +; CHECK-NEXT: 0x00000010 00000324 44f731f5 eecb3e54 86bc5981 +; CHECK-NEXT: 0x00000020 4b8e307a de6814f8 97fd77 diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll new file mode 100644 index 0000000..a0c243b --- /dev/null +++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll @@ -0,0 +1,43 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;; A minimal test case. llc will crash if global variables already has a section +;; prefix. Subsequent PRs will expand on this test case to test the hotness +;; reconciliation implementation. + +; RUN: not llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +; RUN: -partition-static-data-sections=true \ +; RUN: -data-sections=true -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=ERR + +; ERR: Global variable hot_bss already has a section prefix hot + +@hot_bss = internal global i32 0, !section_prefix !17 + +define void @hot_func() !prof !14 { + %9 = load i32, ptr @hot_bss + %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9) + ret void +} + +declare i32 @func_taking_arbitrary_param(...) + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460183} +!5 = !{!"MaxCount", i64 849024} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849024} +!8 = !{!"NumCounts", i64 23627} +!9 = !{!"NumFunctions", i64 3271} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13} +!12 = !{i32 990000, i64 166, i32 73} +!13 = !{i32 999999, i64 3, i32 1443} +!14 = !{!"function_entry_count", i64 100000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 99999} +!17 = !{!"section_prefix", !"hot"} diff --git a/llvm/test/CodeGen/X86/pr162812.ll b/llvm/test/CodeGen/X86/pr162812.ll new file mode 100644 index 0000000..4ea3101 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr162812.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE42 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512 + +define <32 x i8> @PR162812(<32 x i8> %a, <32 x i8> %mask) { +; SSE2-LABEL: PR162812: +; SSE2: # %bb.0: +; SSE2-NEXT: psrlw $2, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [8224,8224,8224,8224,8224,8224,8224,8224] +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: paddb %xmm2, %xmm2 +; SSE2-NEXT: paddb %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm5, %xmm5 +; SSE2-NEXT: pxor %xmm6, %xmm6 +; SSE2-NEXT: pcmpgtb %xmm2, %xmm6 +; SSE2-NEXT: movdqa %xmm6, %xmm2 +; SSE2-NEXT: pandn %xmm0, %xmm2 +; SSE2-NEXT: paddb %xmm0, %xmm0 +; SSE2-NEXT: pand %xmm6, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: psrlw $2, %xmm3 +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: paddb %xmm3, %xmm3 +; SSE2-NEXT: paddb %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtb %xmm3, %xmm5 +; SSE2-NEXT: movdqa %xmm5, %xmm2 +; SSE2-NEXT: pandn %xmm1, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm1 +; SSE2-NEXT: pand %xmm5, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: retq +; +; SSE42-LABEL: PR162812: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa %xmm2, %xmm5 +; SSE42-NEXT: movdqa %xmm0, %xmm2 +; SSE42-NEXT: movdqa %xmm0, %xmm6 +; SSE42-NEXT: psllw $2, %xmm6 +; SSE42-NEXT: movdqa {{.*#+}} xmm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] +; SSE42-NEXT: pand %xmm7, %xmm6 +; SSE42-NEXT: psrlw $2, %xmm5 +; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [8224,8224,8224,8224,8224,8224,8224,8224] +; SSE42-NEXT: pand %xmm4, %xmm5 +; SSE42-NEXT: paddb %xmm5, %xmm5 +; SSE42-NEXT: movdqa %xmm5, %xmm0 +; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm2 +; SSE42-NEXT: movdqa %xmm2, %xmm6 +; SSE42-NEXT: paddb %xmm2, %xmm6 +; SSE42-NEXT: paddb %xmm5, %xmm5 +; SSE42-NEXT: movdqa %xmm5, %xmm0 +; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm2 +; SSE42-NEXT: movdqa %xmm1, %xmm5 +; SSE42-NEXT: psllw $2, %xmm5 +; SSE42-NEXT: pand %xmm7, %xmm5 +; SSE42-NEXT: psrlw $2, %xmm3 +; SSE42-NEXT: pand %xmm3, %xmm4 +; SSE42-NEXT: paddb %xmm4, %xmm4 +; SSE42-NEXT: movdqa %xmm4, %xmm0 +; SSE42-NEXT: pblendvb %xmm0, %xmm5, %xmm1 +; SSE42-NEXT: movdqa %xmm1, %xmm3 +; SSE42-NEXT: paddb %xmm1, %xmm3 +; SSE42-NEXT: paddb %xmm4, %xmm4 +; SSE42-NEXT: movdqa %xmm4, %xmm0 +; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1 +; SSE42-NEXT: movdqa %xmm2, %xmm0 +; SSE42-NEXT: retq +; +; AVX2-LABEL: PR162812: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpsrlw $2, %ymm1, %ymm1 +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: PR162812: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2 +; AVX512-NEXT: vpsrlw $2, %ymm1, %ymm1 +; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1 +; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512-NEXT: retq + %1 = lshr <32 x i8> %mask, splat (i8 7) + %ret = shl <32 x i8> %a, %1 + ret <32 x i8> %ret +} diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll new file mode 100644 index 0000000..211a7bc --- /dev/null +++ b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll @@ -0,0 +1,17 @@ +; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-info - | FileCheck %s --implicit-check-not "DW_AT_language" + +; CHECK: DW_AT_language_name (DW_LNAME_ObjC_plus_plus) + +@x = global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!6, !7} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, file: !3, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") +!3 = !DIFile(filename: "cu.cpp", directory: "/tmp") +!4 = !{!0} +!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!6 = !{i32 7, !"Dwarf Version", i32 5} +!7 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language.ll new file mode 100644 index 0000000..bafe620 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/compileunit-source-language.ll @@ -0,0 +1,17 @@ +; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-info - | FileCheck %s --implicit-check-not "DW_AT_language_name" + +; CHECK: DW_AT_language (DW_LANG_C) + +@x = global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!6, !7} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") +!3 = !DIFile(filename: "cu.cpp", directory: "/tmp") +!4 = !{!0} +!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!6 = !{i32 7, !"Dwarf Version", i32 5} +!7 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/Transforms/GVN/PRE/no-pre-load-for-token-like.ll b/llvm/test/Transforms/GVN/PRE/no-pre-load-for-token-like.ll new file mode 100644 index 0000000..1b36aba --- /dev/null +++ b/llvm/test/Transforms/GVN/PRE/no-pre-load-for-token-like.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=gvn %s | FileCheck %s + +; NOTE: A test to confirm GVN doesn't collapse loads of token-like types into +; NOTE: phi nodes. + +define ptr @main() { +; CHECK-LABEL: define ptr @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[ENTRY_IF_END_I_CRIT_EDGE:.*]], label %[[IF_THEN_I:.*]] +; CHECK: [[ENTRY_IF_END_I_CRIT_EDGE]]: +; CHECK-NEXT: br label %[[IF_END_I:.*]] +; CHECK: [[IF_THEN_I]]: +; CHECK-NEXT: [[TMP0:%.*]] = load target("dx.RawBuffer", half, 1, 0), ptr null, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = tail call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP0]], i32 0) +; CHECK-NEXT: br label %[[IF_END_I]] +; CHECK: [[IF_END_I]]: +; CHECK-NEXT: [[TMP2:%.*]] = load target("dx.RawBuffer", half, 1, 0), ptr null, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = tail call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP2]], i32 0) +; CHECK-NEXT: ret ptr [[TMP3]] +; +entry: + br i1 false, label %if.end.i, label %if.then.i + +if.then.i: ; preds = %entry + %0 = load target("dx.RawBuffer", half, 1, 0), ptr null, align 4 + %1 = tail call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %0, i32 0) + br label %if.end.i + +if.end.i: ; preds = %if.then.i, %entry + %2 = load target("dx.RawBuffer", half, 1, 0), ptr null, align 4 + %3 = tail call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %2, i32 0) + ret ptr %3 +} diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/x264-satd-8x4.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/x264-satd-8x4.ll new file mode 100644 index 0000000..c1042f18 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/x264-satd-8x4.ll @@ -0,0 +1,526 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=riscv64 -mattr=+m,+v,+unaligned-vector-mem \ +; RUN: -passes=slp-vectorizer -S < %s | FileCheck %s +; Function Attrs: nounwind uwtable vscale_range(8,1024) +define i32 @x264_pixel_satd_8x4(ptr %pix1, i32 %i_pix1, ptr %pix2, i32 %i_pix2) { +; CHECK-LABEL: define i32 @x264_pixel_satd_8x4( +; CHECK-SAME: ptr [[PIX1:%.*]], i32 [[I_PIX1:%.*]], ptr [[PIX2:%.*]], i32 [[I_PIX2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[I_PIX1]] to i64 +; CHECK-NEXT: [[IDX_EXT63:%.*]] = sext i32 [[I_PIX2]] to i64 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[PIX1]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[PIX2]], i64 4 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[PIX1]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[PIX2]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64]], i64 4 +; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR_1]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64_1]], i64 4 +; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR_2]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64_2]], i64 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x i8> [[TMP17]], <16 x i8> [[TMP18]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; CHECK-NEXT: [[TMP20:%.*]] = zext <16 x i8> [[TMP19]] to <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP24]], <16 x i8> [[TMP25]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; CHECK-NEXT: [[TMP29:%.*]] = zext <16 x i8> [[TMP28]] to <16 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i32> [[TMP20]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP31]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; CHECK-NEXT: [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP40]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; CHECK-NEXT: [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = sub nsw <16 x i32> [[TMP39]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = shl nsw <16 x i32> [[TMP49]], splat (i32 16) +; CHECK-NEXT: [[TMP51:%.*]] = add nsw <16 x i32> [[TMP50]], [[TMP30]] +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> +; CHECK-NEXT: [[TMP53:%.*]] = add nsw <16 x i32> [[TMP52]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = sub nsw <16 x i32> [[TMP52]], [[TMP51]] +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> +; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 20, i32 21, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 29, i32 14, i32 15> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11> +; CHECK-NEXT: [[TMP61:%.*]] = sub nsw <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP63]], <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[TMP65:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = sub nsw <16 x i32> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP66]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEXT: [[TMP68:%.*]] = lshr <16 x i32> [[TMP67]], splat (i32 15) +; CHECK-NEXT: [[TMP69:%.*]] = and <16 x i32> [[TMP68]], splat (i32 65537) +; CHECK-NEXT: [[TMP70:%.*]] = mul nuw <16 x i32> [[TMP69]], splat (i32 65535) +; CHECK-NEXT: [[TMP71:%.*]] = add <16 x i32> [[TMP70]], [[TMP67]] +; CHECK-NEXT: [[TMP72:%.*]] = xor <16 x i32> [[TMP71]], [[TMP70]] +; CHECK-NEXT: [[TMP73:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP72]]) +; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP73]], 65535 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP73]], 16 +; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]] +; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 +; CHECK-NEXT: ret i32 [[SHR120]] +; +entry: + %idx.ext = sext i32 %i_pix1 to i64 + %idx.ext63 = sext i32 %i_pix2 to i64 + %0 = load i8, ptr %pix1, align 1 + %conv = zext i8 %0 to i32 + %1 = load i8, ptr %pix2, align 1 + %conv2 = zext i8 %1 to i32 + %sub = sub nsw i32 %conv, %conv2 + %arrayidx3 = getelementptr inbounds nuw i8, ptr %pix1, i64 4 + %2 = load i8, ptr %arrayidx3, align 1 + %conv4 = zext i8 %2 to i32 + %arrayidx5 = getelementptr inbounds nuw i8, ptr %pix2, i64 4 + %3 = load i8, ptr %arrayidx5, align 1 + %conv6 = zext i8 %3 to i32 + %sub7 = sub nsw i32 %conv4, %conv6 + %shl = shl nsw i32 %sub7, 16 + %add = add nsw i32 %shl, %sub + %arrayidx8 = getelementptr inbounds nuw i8, ptr %pix1, i64 1 + %4 = load i8, ptr %arrayidx8, align 1 + %conv9 = zext i8 %4 to i32 + %arrayidx10 = getelementptr inbounds nuw i8, ptr %pix2, i64 1 + %5 = load i8, ptr %arrayidx10, align 1 + %conv11 = zext i8 %5 to i32 + %sub12 = sub nsw i32 %conv9, %conv11 + %arrayidx13 = getelementptr inbounds nuw i8, ptr %pix1, i64 5 + %6 = load i8, ptr %arrayidx13, align 1 + %conv14 = zext i8 %6 to i32 + %arrayidx15 = getelementptr inbounds nuw i8, ptr %pix2, i64 5 + %7 = load i8, ptr %arrayidx15, align 1 + %conv16 = zext i8 %7 to i32 + %sub17 = sub nsw i32 %conv14, %conv16 + %shl18 = shl nsw i32 %sub17, 16 + %add19 = add nsw i32 %shl18, %sub12 + %arrayidx20 = getelementptr inbounds nuw i8, ptr %pix1, i64 2 + %8 = load i8, ptr %arrayidx20, align 1 + %conv21 = zext i8 %8 to i32 + %arrayidx22 = getelementptr inbounds nuw i8, ptr %pix2, i64 2 + %9 = load i8, ptr %arrayidx22, align 1 + %conv23 = zext i8 %9 to i32 + %sub24 = sub nsw i32 %conv21, %conv23 + %arrayidx25 = getelementptr inbounds nuw i8, ptr %pix1, i64 6 + %10 = load i8, ptr %arrayidx25, align 1 + %conv26 = zext i8 %10 to i32 + %arrayidx27 = getelementptr inbounds nuw i8, ptr %pix2, i64 6 + %11 = load i8, ptr %arrayidx27, align 1 + %conv28 = zext i8 %11 to i32 + %sub29 = sub nsw i32 %conv26, %conv28 + %shl30 = shl nsw i32 %sub29, 16 + %add31 = add nsw i32 %shl30, %sub24 + %arrayidx32 = getelementptr inbounds nuw i8, ptr %pix1, i64 3 + %12 = load i8, ptr %arrayidx32, align 1 + %conv33 = zext i8 %12 to i32 + %arrayidx34 = getelementptr inbounds nuw i8, ptr %pix2, i64 3 + %13 = load i8, ptr %arrayidx34, align 1 + %conv35 = zext i8 %13 to i32 + %sub36 = sub nsw i32 %conv33, %conv35 + %arrayidx37 = getelementptr inbounds nuw i8, ptr %pix1, i64 7 + %14 = load i8, ptr %arrayidx37, align 1 + %conv38 = zext i8 %14 to i32 + %arrayidx39 = getelementptr inbounds nuw i8, ptr %pix2, i64 7 + %15 = load i8, ptr %arrayidx39, align 1 + %conv40 = zext i8 %15 to i32 + %sub41 = sub nsw i32 %conv38, %conv40 + %shl42 = shl nsw i32 %sub41, 16 + %add43 = add nsw i32 %shl42, %sub36 + %add44 = add nsw i32 %add19, %add + %sub45 = sub nsw i32 %add, %add19 + %add46 = add nsw i32 %add43, %add31 + %sub47 = sub nsw i32 %add31, %add43 + %add48 = add nsw i32 %add46, %add44 + %sub51 = sub nsw i32 %add44, %add46 + %add55 = add nsw i32 %sub47, %sub45 + %sub59 = sub nsw i32 %sub45, %sub47 + %add.ptr = getelementptr inbounds i8, ptr %pix1, i64 %idx.ext + %add.ptr64 = getelementptr inbounds i8, ptr %pix2, i64 %idx.ext63 + %16 = load i8, ptr %add.ptr, align 1 + %conv.1 = zext i8 %16 to i32 + %17 = load i8, ptr %add.ptr64, align 1 + %conv2.1 = zext i8 %17 to i32 + %sub.1 = sub nsw i32 %conv.1, %conv2.1 + %arrayidx3.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 4 + %18 = load i8, ptr %arrayidx3.1, align 1 + %conv4.1 = zext i8 %18 to i32 + %arrayidx5.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 4 + %19 = load i8, ptr %arrayidx5.1, align 1 + %conv6.1 = zext i8 %19 to i32 + %sub7.1 = sub nsw i32 %conv4.1, %conv6.1 + %shl.1 = shl nsw i32 %sub7.1, 16 + %add.1 = add nsw i32 %shl.1, %sub.1 + %arrayidx8.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 1 + %20 = load i8, ptr %arrayidx8.1, align 1 + %conv9.1 = zext i8 %20 to i32 + %arrayidx10.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 1 + %21 = load i8, ptr %arrayidx10.1, align 1 + %conv11.1 = zext i8 %21 to i32 + %sub12.1 = sub nsw i32 %conv9.1, %conv11.1 + %arrayidx13.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 5 + %22 = load i8, ptr %arrayidx13.1, align 1 + %conv14.1 = zext i8 %22 to i32 + %arrayidx15.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 5 + %23 = load i8, ptr %arrayidx15.1, align 1 + %conv16.1 = zext i8 %23 to i32 + %sub17.1 = sub nsw i32 %conv14.1, %conv16.1 + %shl18.1 = shl nsw i32 %sub17.1, 16 + %add19.1 = add nsw i32 %shl18.1, %sub12.1 + %arrayidx20.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 2 + %24 = load i8, ptr %arrayidx20.1, align 1 + %conv21.1 = zext i8 %24 to i32 + %arrayidx22.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 2 + %25 = load i8, ptr %arrayidx22.1, align 1 + %conv23.1 = zext i8 %25 to i32 + %sub24.1 = sub nsw i32 %conv21.1, %conv23.1 + %arrayidx25.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 6 + %26 = load i8, ptr %arrayidx25.1, align 1 + %conv26.1 = zext i8 %26 to i32 + %arrayidx27.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 6 + %27 = load i8, ptr %arrayidx27.1, align 1 + %conv28.1 = zext i8 %27 to i32 + %sub29.1 = sub nsw i32 %conv26.1, %conv28.1 + %shl30.1 = shl nsw i32 %sub29.1, 16 + %add31.1 = add nsw i32 %shl30.1, %sub24.1 + %arrayidx32.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 3 + %28 = load i8, ptr %arrayidx32.1, align 1 + %conv33.1 = zext i8 %28 to i32 + %arrayidx34.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 3 + %29 = load i8, ptr %arrayidx34.1, align 1 + %conv35.1 = zext i8 %29 to i32 + %sub36.1 = sub nsw i32 %conv33.1, %conv35.1 + %arrayidx37.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 7 + %30 = load i8, ptr %arrayidx37.1, align 1 + %conv38.1 = zext i8 %30 to i32 + %arrayidx39.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 7 + %31 = load i8, ptr %arrayidx39.1, align 1 + %conv40.1 = zext i8 %31 to i32 + %sub41.1 = sub nsw i32 %conv38.1, %conv40.1 + %shl42.1 = shl nsw i32 %sub41.1, 16 + %add43.1 = add nsw i32 %shl42.1, %sub36.1 + %add44.1 = add nsw i32 %add19.1, %add.1 + %sub45.1 = sub nsw i32 %add.1, %add19.1 + %add46.1 = add nsw i32 %add43.1, %add31.1 + %sub47.1 = sub nsw i32 %add31.1, %add43.1 + %add48.1 = add nsw i32 %add46.1, %add44.1 + %sub51.1 = sub nsw i32 %add44.1, %add46.1 + %add55.1 = add nsw i32 %sub47.1, %sub45.1 + %sub59.1 = sub nsw i32 %sub45.1, %sub47.1 + %add.ptr.1 = getelementptr inbounds i8, ptr %add.ptr, i64 %idx.ext + %add.ptr64.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 %idx.ext63 + %32 = load i8, ptr %add.ptr.1, align 1 + %conv.2 = zext i8 %32 to i32 + %33 = load i8, ptr %add.ptr64.1, align 1 + %conv2.2 = zext i8 %33 to i32 + %sub.2 = sub nsw i32 %conv.2, %conv2.2 + %arrayidx3.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 4 + %34 = load i8, ptr %arrayidx3.2, align 1 + %conv4.2 = zext i8 %34 to i32 + %arrayidx5.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 4 + %35 = load i8, ptr %arrayidx5.2, align 1 + %conv6.2 = zext i8 %35 to i32 + %sub7.2 = sub nsw i32 %conv4.2, %conv6.2 + %shl.2 = shl nsw i32 %sub7.2, 16 + %add.2 = add nsw i32 %shl.2, %sub.2 + %arrayidx8.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 1 + %36 = load i8, ptr %arrayidx8.2, align 1 + %conv9.2 = zext i8 %36 to i32 + %arrayidx10.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 1 + %37 = load i8, ptr %arrayidx10.2, align 1 + %conv11.2 = zext i8 %37 to i32 + %sub12.2 = sub nsw i32 %conv9.2, %conv11.2 + %arrayidx13.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 5 + %38 = load i8, ptr %arrayidx13.2, align 1 + %conv14.2 = zext i8 %38 to i32 + %arrayidx15.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 5 + %39 = load i8, ptr %arrayidx15.2, align 1 + %conv16.2 = zext i8 %39 to i32 + %sub17.2 = sub nsw i32 %conv14.2, %conv16.2 + %shl18.2 = shl nsw i32 %sub17.2, 16 + %add19.2 = add nsw i32 %shl18.2, %sub12.2 + %arrayidx20.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 2 + %40 = load i8, ptr %arrayidx20.2, align 1 + %conv21.2 = zext i8 %40 to i32 + %arrayidx22.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 2 + %41 = load i8, ptr %arrayidx22.2, align 1 + %conv23.2 = zext i8 %41 to i32 + %sub24.2 = sub nsw i32 %conv21.2, %conv23.2 + %arrayidx25.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 6 + %42 = load i8, ptr %arrayidx25.2, align 1 + %conv26.2 = zext i8 %42 to i32 + %arrayidx27.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 6 + %43 = load i8, ptr %arrayidx27.2, align 1 + %conv28.2 = zext i8 %43 to i32 + %sub29.2 = sub nsw i32 %conv26.2, %conv28.2 + %shl30.2 = shl nsw i32 %sub29.2, 16 + %add31.2 = add nsw i32 %shl30.2, %sub24.2 + %arrayidx32.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 3 + %44 = load i8, ptr %arrayidx32.2, align 1 + %conv33.2 = zext i8 %44 to i32 + %arrayidx34.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 3 + %45 = load i8, ptr %arrayidx34.2, align 1 + %conv35.2 = zext i8 %45 to i32 + %sub36.2 = sub nsw i32 %conv33.2, %conv35.2 + %arrayidx37.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 7 + %46 = load i8, ptr %arrayidx37.2, align 1 + %conv38.2 = zext i8 %46 to i32 + %arrayidx39.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 7 + %47 = load i8, ptr %arrayidx39.2, align 1 + %conv40.2 = zext i8 %47 to i32 + %sub41.2 = sub nsw i32 %conv38.2, %conv40.2 + %shl42.2 = shl nsw i32 %sub41.2, 16 + %add43.2 = add nsw i32 %shl42.2, %sub36.2 + %add44.2 = add nsw i32 %add19.2, %add.2 + %sub45.2 = sub nsw i32 %add.2, %add19.2 + %add46.2 = add nsw i32 %add43.2, %add31.2 + %sub47.2 = sub nsw i32 %add31.2, %add43.2 + %add48.2 = add nsw i32 %add46.2, %add44.2 + %sub51.2 = sub nsw i32 %add44.2, %add46.2 + %add55.2 = add nsw i32 %sub47.2, %sub45.2 + %sub59.2 = sub nsw i32 %sub45.2, %sub47.2 + %add.ptr.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 %idx.ext + %add.ptr64.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 %idx.ext63 + %48 = load i8, ptr %add.ptr.2, align 1 + %conv.3 = zext i8 %48 to i32 + %49 = load i8, ptr %add.ptr64.2, align 1 + %conv2.3 = zext i8 %49 to i32 + %sub.3 = sub nsw i32 %conv.3, %conv2.3 + %arrayidx3.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 4 + %50 = load i8, ptr %arrayidx3.3, align 1 + %conv4.3 = zext i8 %50 to i32 + %arrayidx5.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 4 + %51 = load i8, ptr %arrayidx5.3, align 1 + %conv6.3 = zext i8 %51 to i32 + %sub7.3 = sub nsw i32 %conv4.3, %conv6.3 + %shl.3 = shl nsw i32 %sub7.3, 16 + %add.3 = add nsw i32 %shl.3, %sub.3 + %arrayidx8.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 1 + %52 = load i8, ptr %arrayidx8.3, align 1 + %conv9.3 = zext i8 %52 to i32 + %arrayidx10.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 1 + %53 = load i8, ptr %arrayidx10.3, align 1 + %conv11.3 = zext i8 %53 to i32 + %sub12.3 = sub nsw i32 %conv9.3, %conv11.3 + %arrayidx13.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 5 + %54 = load i8, ptr %arrayidx13.3, align 1 + %conv14.3 = zext i8 %54 to i32 + %arrayidx15.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 5 + %55 = load i8, ptr %arrayidx15.3, align 1 + %conv16.3 = zext i8 %55 to i32 + %sub17.3 = sub nsw i32 %conv14.3, %conv16.3 + %shl18.3 = shl nsw i32 %sub17.3, 16 + %add19.3 = add nsw i32 %shl18.3, %sub12.3 + %arrayidx20.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 2 + %56 = load i8, ptr %arrayidx20.3, align 1 + %conv21.3 = zext i8 %56 to i32 + %arrayidx22.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 2 + %57 = load i8, ptr %arrayidx22.3, align 1 + %conv23.3 = zext i8 %57 to i32 + %sub24.3 = sub nsw i32 %conv21.3, %conv23.3 + %arrayidx25.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 6 + %58 = load i8, ptr %arrayidx25.3, align 1 + %conv26.3 = zext i8 %58 to i32 + %arrayidx27.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 6 + %59 = load i8, ptr %arrayidx27.3, align 1 + %conv28.3 = zext i8 %59 to i32 + %sub29.3 = sub nsw i32 %conv26.3, %conv28.3 + %shl30.3 = shl nsw i32 %sub29.3, 16 + %add31.3 = add nsw i32 %shl30.3, %sub24.3 + %arrayidx32.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 3 + %60 = load i8, ptr %arrayidx32.3, align 1 + %conv33.3 = zext i8 %60 to i32 + %arrayidx34.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 3 + %61 = load i8, ptr %arrayidx34.3, align 1 + %conv35.3 = zext i8 %61 to i32 + %sub36.3 = sub nsw i32 %conv33.3, %conv35.3 + %arrayidx37.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 7 + %62 = load i8, ptr %arrayidx37.3, align 1 + %conv38.3 = zext i8 %62 to i32 + %arrayidx39.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 7 + %63 = load i8, ptr %arrayidx39.3, align 1 + %conv40.3 = zext i8 %63 to i32 + %sub41.3 = sub nsw i32 %conv38.3, %conv40.3 + %shl42.3 = shl nsw i32 %sub41.3, 16 + %add43.3 = add nsw i32 %shl42.3, %sub36.3 + %add44.3 = add nsw i32 %add19.3, %add.3 + %sub45.3 = sub nsw i32 %add.3, %add19.3 + %add46.3 = add nsw i32 %add43.3, %add31.3 + %sub47.3 = sub nsw i32 %add31.3, %add43.3 + %add48.3 = add nsw i32 %add46.3, %add44.3 + %sub51.3 = sub nsw i32 %add44.3, %add46.3 + %add55.3 = add nsw i32 %sub47.3, %sub45.3 + %sub59.3 = sub nsw i32 %sub45.3, %sub47.3 + %add78 = add nsw i32 %add48.1, %add48 + %sub86 = sub nsw i32 %add48, %add48.1 + %add94 = add nsw i32 %add48.3, %add48.2 + %sub102 = sub nsw i32 %add48.2, %add48.3 + %add103 = add nsw i32 %add94, %add78 + %sub104 = sub nsw i32 %add78, %add94 + %add105 = add nsw i32 %sub102, %sub86 + %sub106 = sub nsw i32 %sub86, %sub102 + %shr.i = lshr i32 %add103, 15 + %and.i = and i32 %shr.i, 65537 + %mul.i = mul nuw i32 %and.i, 65535 + %add.i = add i32 %mul.i, %add103 + %xor.i = xor i32 %add.i, %mul.i + %shr.i169 = lshr i32 %add105, 15 + %and.i170 = and i32 %shr.i169, 65537 + %mul.i171 = mul nuw i32 %and.i170, 65535 + %add.i172 = add i32 %mul.i171, %add105 + %xor.i173 = xor i32 %add.i172, %mul.i171 + %shr.i174 = lshr i32 %sub104, 15 + %and.i175 = and i32 %shr.i174, 65537 + %mul.i176 = mul nuw i32 %and.i175, 65535 + %add.i177 = add i32 %mul.i176, %sub104 + %xor.i178 = xor i32 %add.i177, %mul.i176 + %shr.i179 = lshr i32 %sub106, 15 + %and.i180 = and i32 %shr.i179, 65537 + %mul.i181 = mul nuw i32 %and.i180, 65535 + %add.i182 = add i32 %mul.i181, %sub106 + %xor.i183 = xor i32 %add.i182, %mul.i181 + %add110 = add i32 %xor.i173, %xor.i + %add112 = add i32 %add110, %xor.i178 + %add113 = add i32 %add112, %xor.i183 + %add78.1 = add nsw i32 %add55.1, %add55 + %sub86.1 = sub nsw i32 %add55, %add55.1 + %add94.1 = add nsw i32 %add55.3, %add55.2 + %sub102.1 = sub nsw i32 %add55.2, %add55.3 + %add103.1 = add nsw i32 %add94.1, %add78.1 + %sub104.1 = sub nsw i32 %add78.1, %add94.1 + %add105.1 = add nsw i32 %sub102.1, %sub86.1 + %sub106.1 = sub nsw i32 %sub86.1, %sub102.1 + %shr.i.1 = lshr i32 %add103.1, 15 + %and.i.1 = and i32 %shr.i.1, 65537 + %mul.i.1 = mul nuw i32 %and.i.1, 65535 + %add.i.1 = add i32 %mul.i.1, %add103.1 + %xor.i.1 = xor i32 %add.i.1, %mul.i.1 + %shr.i169.1 = lshr i32 %add105.1, 15 + %and.i170.1 = and i32 %shr.i169.1, 65537 + %mul.i171.1 = mul nuw i32 %and.i170.1, 65535 + %add.i172.1 = add i32 %mul.i171.1, %add105.1 + %xor.i173.1 = xor i32 %add.i172.1, %mul.i171.1 + %shr.i174.1 = lshr i32 %sub104.1, 15 + %and.i175.1 = and i32 %shr.i174.1, 65537 + %mul.i176.1 = mul nuw i32 %and.i175.1, 65535 + %add.i177.1 = add i32 %mul.i176.1, %sub104.1 + %xor.i178.1 = xor i32 %add.i177.1, %mul.i176.1 + %shr.i179.1 = lshr i32 %sub106.1, 15 + %and.i180.1 = and i32 %shr.i179.1, 65537 + %mul.i181.1 = mul nuw i32 %and.i180.1, 65535 + %add.i182.1 = add i32 %mul.i181.1, %sub106.1 + %xor.i183.1 = xor i32 %add.i182.1, %mul.i181.1 + %add108.1 = add i32 %xor.i173.1, %add113 + %add110.1 = add i32 %add108.1, %xor.i.1 + %add112.1 = add i32 %add110.1, %xor.i178.1 + %add113.1 = add i32 %add112.1, %xor.i183.1 + %add78.2 = add nsw i32 %sub51.1, %sub51 + %sub86.2 = sub nsw i32 %sub51, %sub51.1 + %add94.2 = add nsw i32 %sub51.3, %sub51.2 + %sub102.2 = sub nsw i32 %sub51.2, %sub51.3 + %add103.2 = add nsw i32 %add94.2, %add78.2 + %sub104.2 = sub nsw i32 %add78.2, %add94.2 + %add105.2 = add nsw i32 %sub102.2, %sub86.2 + %sub106.2 = sub nsw i32 %sub86.2, %sub102.2 + %shr.i.2 = lshr i32 %add103.2, 15 + %and.i.2 = and i32 %shr.i.2, 65537 + %mul.i.2 = mul nuw i32 %and.i.2, 65535 + %add.i.2 = add i32 %mul.i.2, %add103.2 + %xor.i.2 = xor i32 %add.i.2, %mul.i.2 + %shr.i169.2 = lshr i32 %add105.2, 15 + %and.i170.2 = and i32 %shr.i169.2, 65537 + %mul.i171.2 = mul nuw i32 %and.i170.2, 65535 + %add.i172.2 = add i32 %mul.i171.2, %add105.2 + %xor.i173.2 = xor i32 %add.i172.2, %mul.i171.2 + %shr.i174.2 = lshr i32 %sub104.2, 15 + %and.i175.2 = and i32 %shr.i174.2, 65537 + %mul.i176.2 = mul nuw i32 %and.i175.2, 65535 + %add.i177.2 = add i32 %mul.i176.2, %sub104.2 + %xor.i178.2 = xor i32 %add.i177.2, %mul.i176.2 + %shr.i179.2 = lshr i32 %sub106.2, 15 + %and.i180.2 = and i32 %shr.i179.2, 65537 + %mul.i181.2 = mul nuw i32 %and.i180.2, 65535 + %add.i182.2 = add i32 %mul.i181.2, %sub106.2 + %xor.i183.2 = xor i32 %add.i182.2, %mul.i181.2 + %add108.2 = add i32 %xor.i173.2, %add113.1 + %add110.2 = add i32 %add108.2, %xor.i.2 + %add112.2 = add i32 %add110.2, %xor.i178.2 + %add113.2 = add i32 %add112.2, %xor.i183.2 + %add78.3 = add nsw i32 %sub59.1, %sub59 + %sub86.3 = sub nsw i32 %sub59, %sub59.1 + %add94.3 = add nsw i32 %sub59.3, %sub59.2 + %sub102.3 = sub nsw i32 %sub59.2, %sub59.3 + %add103.3 = add nsw i32 %add94.3, %add78.3 + %sub104.3 = sub nsw i32 %add78.3, %add94.3 + %add105.3 = add nsw i32 %sub102.3, %sub86.3 + %sub106.3 = sub nsw i32 %sub86.3, %sub102.3 + %shr.i.3 = lshr i32 %add103.3, 15 + %and.i.3 = and i32 %shr.i.3, 65537 + %mul.i.3 = mul nuw i32 %and.i.3, 65535 + %add.i.3 = add i32 %mul.i.3, %add103.3 + %xor.i.3 = xor i32 %add.i.3, %mul.i.3 + %shr.i169.3 = lshr i32 %add105.3, 15 + %and.i170.3 = and i32 %shr.i169.3, 65537 + %mul.i171.3 = mul nuw i32 %and.i170.3, 65535 + %add.i172.3 = add i32 %mul.i171.3, %add105.3 + %xor.i173.3 = xor i32 %add.i172.3, %mul.i171.3 + %shr.i174.3 = lshr i32 %sub104.3, 15 + %and.i175.3 = and i32 %shr.i174.3, 65537 + %mul.i176.3 = mul nuw i32 %and.i175.3, 65535 + %add.i177.3 = add i32 %mul.i176.3, %sub104.3 + %xor.i178.3 = xor i32 %add.i177.3, %mul.i176.3 + %shr.i179.3 = lshr i32 %sub106.3, 15 + %and.i180.3 = and i32 %shr.i179.3, 65537 + %mul.i181.3 = mul nuw i32 %and.i180.3, 65535 + %add.i182.3 = add i32 %mul.i181.3, %sub106.3 + %xor.i183.3 = xor i32 %add.i182.3, %mul.i181.3 + %add108.3 = add i32 %xor.i173.3, %add113.2 + %add110.3 = add i32 %add108.3, %xor.i.3 + %add112.3 = add i32 %add110.3, %xor.i178.3 + %add113.3 = add i32 %add112.3, %xor.i183.3 + %conv118 = and i32 %add113.3, 65535 + %shr = lshr i32 %add113.3, 16 + %add119 = add nuw nsw i32 %conv118, %shr + %shr120 = lshr i32 %add119, 1 + ret i32 %shr120 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/poison-within-divisions.ll b/llvm/test/Transforms/SLPVectorizer/X86/poison-within-divisions.ll new file mode 100644 index 0000000..76ef396 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/poison-within-divisions.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt --passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @test(i1 %tobool2.not, i64 %conv21) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i1 [[TOBOOL2_NOT:%.*]], i64 [[CONV21:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[Q24_659:%.*]] = phi i32 [ [[Q24_655:%.*]], %[[IF_END35:.*]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[L15_1:%.*]] = phi i32 [ [[L15_4:%.*]], %[[IF_END35]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br i1 [[TOBOOL2_NOT]], label %[[IF_END4:.*]], label %[[Q:.*]] +; CHECK: [[IF_END4]]: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[Q24_659]], 0 +; CHECK-NEXT: br label %[[AB:.*]] +; CHECK: [[AB]]: +; CHECK-NEXT: [[Q24_658:%.*]] = phi i32 [ [[Q24_660:%.*]], %[[IF_END35]] ], [ 0, %[[IF_END4]] ] +; CHECK-NEXT: [[M_1:%.*]] = phi i1 [ false, %[[IF_END35]] ], [ [[TMP0]], %[[IF_END4]] ] +; CHECK-NEXT: [[O_2:%.*]] = phi i32 [ [[O_7:%.*]], %[[IF_END35]] ], [ 0, %[[IF_END4]] ] +; CHECK-NEXT: [[Q24_2:%.*]] = phi i32 [ [[Q24_7:%.*]], %[[IF_END35]] ], [ 0, %[[IF_END4]] ] +; CHECK-NEXT: br i1 [[M_1]], label %[[AE:.*]], label %[[AC:.*]] +; CHECK: [[Q]]: +; CHECK-NEXT: [[TOBOOL16_NOT:%.*]] = icmp ne i32 [[L15_1]], 0 +; CHECK-NEXT: [[SPEC_SELECT2:%.*]] = zext i1 [[TOBOOL16_NOT]] to i32 +; CHECK-NEXT: br label %[[AE]] +; CHECK: [[AE]]: +; CHECK-NEXT: [[Q24_655]] = phi i32 [ [[Q24_658]], %[[AB]] ], [ 0, %[[Q]] ] +; CHECK-NEXT: [[M_3:%.*]] = phi i64 [ 0, %[[AB]] ], [ 1, %[[Q]] ] +; CHECK-NEXT: [[L15_4]] = phi i32 [ poison, %[[AB]] ], [ [[SPEC_SELECT2]], %[[Q]] ] +; CHECK-NEXT: [[O_4:%.*]] = phi i32 [ [[O_2]], %[[AB]] ], [ 0, %[[Q]] ] +; CHECK-NEXT: [[Q24_4:%.*]] = phi i32 [ [[Q24_2]], %[[AB]] ], [ 0, %[[Q]] ] +; CHECK-NEXT: br i1 [[TOBOOL2_NOT]], label %[[IF_END35]], label %[[IF_THEN20:.*]] +; CHECK: [[IF_THEN20]]: +; CHECK-NEXT: [[DIV22:%.*]] = udiv i64 [[M_3]], [[CONV21]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[DIV22]] to i32 +; CHECK-NEXT: [[CONV23:%.*]] = sub i32 0, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[M_3]] to i32 +; CHECK-NEXT: [[CONV25:%.*]] = xor i32 [[TMP2]], 1 +; CHECK-NEXT: br label %[[IF_END35]] +; CHECK: [[AC]]: +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL2_NOT]], i32 [[Q24_2]], i32 [[O_2]] +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; CHECK: [[IF_END35]]: +; CHECK-NEXT: [[Q24_660]] = phi i32 [ 0, %[[AE]] ], [ [[CONV25]], %[[IF_THEN20]] ] +; CHECK-NEXT: [[O_7]] = phi i32 [ [[O_4]], %[[AE]] ], [ [[CONV23]], %[[IF_THEN20]] ] +; CHECK-NEXT: [[Q24_7]] = phi i32 [ [[Q24_4]], %[[AE]] ], [ [[CONV25]], %[[IF_THEN20]] ] +; CHECK-NEXT: br i1 [[TOBOOL2_NOT]], label %[[WHILE_BODY]], label %[[AB]] +; +entry: + br label %while.body + +while.body: + %q24.659 = phi i32 [ %q24.655, %if.end35 ], [ 0, %entry ] + %l15.1 = phi i32 [ %l15.4, %if.end35 ], [ 0, %entry ] + br i1 %tobool2.not, label %if.end4, label %q + +if.end4: + %0 = icmp eq i32 %q24.659, 0 + br label %ab + +ab: + %q24.658 = phi i32 [ %q24.660, %if.end35 ], [ 0, %if.end4 ] + %m.1 = phi i1 [ false, %if.end35 ], [ %0, %if.end4 ] + %o.2 = phi i32 [ %o.7, %if.end35 ], [ 0, %if.end4 ] + %q24.2 = phi i32 [ %q24.7, %if.end35 ], [ 0, %if.end4 ] + br i1 %m.1, label %ae, label %ac + +q: + %tobool16.not = icmp ne i32 %l15.1, 0 + %spec.select2 = zext i1 %tobool16.not to i32 + br label %ae + +ae: + %q24.655 = phi i32 [ %q24.658, %ab ], [ 0, %q ] + %m.3 = phi i64 [ 0, %ab ], [ 1, %q ] + %l15.4 = phi i32 [ poison, %ab ], [ %spec.select2, %q ] + %o.4 = phi i32 [ %o.2, %ab ], [ 0, %q ] + %q24.4 = phi i32 [ %q24.2, %ab ], [ 0, %q ] + br i1 %tobool2.not, label %if.end35, label %if.then20 + +if.then20: + %div22 = udiv i64 %m.3, %conv21 + %1 = trunc i64 %div22 to i32 + %conv23 = sub i32 0, %1 + %2 = trunc i64 %m.3 to i32 + %conv25 = xor i32 %2, 1 + br label %if.end35 + +ac: + %spec.select = select i1 %tobool2.not, i32 %q24.2, i32 %o.2 + ret i32 %spec.select + +if.end35: + %q24.660 = phi i32 [ 0, %ae ], [ %conv25, %if.then20 ] + %o.7 = phi i32 [ %o.4, %ae ], [ %conv23, %if.then20 ] + %q24.7 = phi i32 [ %q24.4, %ae ], [ %conv25, %if.then20 ] + br i1 %tobool2.not, label %while.body, label %ab +} diff --git a/llvm/unittests/BinaryFormat/DwarfTest.cpp b/llvm/unittests/BinaryFormat/DwarfTest.cpp index f4519f6..ba7d591 100644 --- a/llvm/unittests/BinaryFormat/DwarfTest.cpp +++ b/llvm/unittests/BinaryFormat/DwarfTest.cpp @@ -255,41 +255,186 @@ TEST(DwarfTest, lname_SourceLanguageNameString) { #include "llvm/BinaryFormat/Dwarf.def" } -TEST(DWARFDebugInfo, TestLanguageDescription_Versioned) { - // Tests for the llvm::dwarf::LanguageDescription API that - // takes a name *and* a version. - - // Unknown language. - EXPECT_EQ( - llvm::dwarf::LanguageDescription(static_cast<SourceLanguageName>(0)), - "Unknown"); - - EXPECT_EQ( - llvm::dwarf::LanguageDescription(static_cast<SourceLanguageName>(0), 0), - "Unknown"); - - // Test that specifying an invalid version falls back to a valid language name - // regardless. - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_ObjC, 0), "Objective C"); - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_Julia, 0), "Julia"); - - // Check some versions. - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 199711), - "C++98"); - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201402), - "C++14"); - - // Versions round up. - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201400), - "C++14"); - - // Version 0 for C and C++ is an unversioned name. - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C, 0), "C (K&R and ISO)"); - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 0), - "ISO C++"); - - // Version 0 for other versioned languages may not be the unversioned name. - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_Fortran, 0), - "FORTRAN 77"); +struct LanguageDescriptionTestCase { + llvm::dwarf::SourceLanguageName LName; + uint32_t LVersion; + llvm::StringRef ExpectedDescription; +}; + +LanguageDescriptionTestCase LanguageDescriptionTestCases[] = { + {static_cast<SourceLanguageName>(0), 0, "Unknown"}, + {static_cast<SourceLanguageName>(0), 1, "Unknown"}, + {DW_LNAME_Ada, 0, "Ada 83"}, + {DW_LNAME_Ada, 1982, "Ada 83"}, + {DW_LNAME_Ada, 1983, "Ada 83"}, + {DW_LNAME_Ada, 1994, "Ada 95"}, + {DW_LNAME_Ada, 1995, "Ada 95"}, + {DW_LNAME_Ada, 2004, "Ada 2005"}, + {DW_LNAME_Ada, 2005, "Ada 2005"}, + {DW_LNAME_Ada, 2011, "Ada 2012"}, + {DW_LNAME_Ada, 2012, "Ada 2012"}, + {DW_LNAME_Ada, 2013, "ISO Ada"}, + {DW_LNAME_Cobol, 0, "COBOL-74"}, + {DW_LNAME_Cobol, 1973, "COBOL-74"}, + {DW_LNAME_Cobol, 1974, "COBOL-74"}, + {DW_LNAME_Cobol, 1984, "COBOL-85"}, + {DW_LNAME_Cobol, 1985, "COBOL-85"}, + {DW_LNAME_Cobol, 1986, "ISO Cobol"}, + {DW_LNAME_Fortran, 0, "FORTRAN 77"}, + {DW_LNAME_Fortran, 1976, "FORTRAN 77"}, + {DW_LNAME_Fortran, 1977, "FORTRAN 77"}, + {DW_LNAME_Fortran, 1989, "FORTRAN 90"}, + {DW_LNAME_Fortran, 1990, "FORTRAN 90"}, + {DW_LNAME_Fortran, 1994, "Fortran 95"}, + {DW_LNAME_Fortran, 1995, "Fortran 95"}, + {DW_LNAME_Fortran, 2002, "Fortran 2003"}, + {DW_LNAME_Fortran, 2003, "Fortran 2003"}, + {DW_LNAME_Fortran, 2007, "Fortran 2008"}, + {DW_LNAME_Fortran, 2008, "Fortran 2008"}, + {DW_LNAME_Fortran, 2017, "Fortran 2018"}, + {DW_LNAME_Fortran, 2018, "Fortran 2018"}, + {DW_LNAME_Fortran, 2019, "ISO Fortran"}, + {DW_LNAME_C, 0, "C (K&R and ISO)"}, + {DW_LNAME_C, 198911, "C89"}, + {DW_LNAME_C, 198912, "C89"}, + {DW_LNAME_C, 199901, "C99"}, + {DW_LNAME_C, 199902, "C11"}, + {DW_LNAME_C, 201111, "C11"}, + {DW_LNAME_C, 201112, "C11"}, + {DW_LNAME_C, 201201, "C17"}, + {DW_LNAME_C, 201709, "C17"}, + {DW_LNAME_C, 201710, "C17"}, + {DW_LNAME_C, 201711, "C (K&R and ISO)"}, + {DW_LNAME_C_plus_plus, 0, "ISO C++"}, + {DW_LNAME_C_plus_plus, 199710, "C++98"}, + {DW_LNAME_C_plus_plus, 199711, "C++98"}, + {DW_LNAME_C_plus_plus, 199712, "C++03"}, + {DW_LNAME_C_plus_plus, 200310, "C++03"}, + {DW_LNAME_C_plus_plus, 200311, "C++11"}, + {DW_LNAME_C_plus_plus, 201102, "C++11"}, + {DW_LNAME_C_plus_plus, 201103, "C++11"}, + {DW_LNAME_C_plus_plus, 201104, "C++14"}, + {DW_LNAME_C_plus_plus, 201401, "C++14"}, + {DW_LNAME_C_plus_plus, 201402, "C++14"}, + {DW_LNAME_C_plus_plus, 201403, "C++17"}, + {DW_LNAME_C_plus_plus, 201702, "C++17"}, + {DW_LNAME_C_plus_plus, 201703, "C++17"}, + {DW_LNAME_C_plus_plus, 201704, "C++20"}, + {DW_LNAME_C_plus_plus, 202001, "C++20"}, + {DW_LNAME_C_plus_plus, 202002, "C++20"}, + {DW_LNAME_C_plus_plus, 202003, "ISO C++"}, + {DW_LNAME_ObjC_plus_plus, 0, LanguageDescription(DW_LNAME_ObjC_plus_plus)}, + {DW_LNAME_ObjC_plus_plus, 1, LanguageDescription(DW_LNAME_ObjC_plus_plus)}, + {DW_LNAME_ObjC, 0, LanguageDescription(DW_LNAME_ObjC)}, + {DW_LNAME_ObjC, 1, LanguageDescription(DW_LNAME_ObjC)}, + {DW_LNAME_Move, 0, LanguageDescription(DW_LNAME_Move)}, + {DW_LNAME_Move, 1, LanguageDescription(DW_LNAME_Move)}, + {DW_LNAME_SYCL, 0, LanguageDescription(DW_LNAME_SYCL)}, + {DW_LNAME_SYCL, 1, LanguageDescription(DW_LNAME_SYCL)}, + {DW_LNAME_BLISS, 0, LanguageDescription(DW_LNAME_BLISS)}, + {DW_LNAME_BLISS, 1, LanguageDescription(DW_LNAME_BLISS)}, + {DW_LNAME_Crystal, 0, LanguageDescription(DW_LNAME_Crystal)}, + {DW_LNAME_Crystal, 1, LanguageDescription(DW_LNAME_Crystal)}, + {DW_LNAME_D, 0, LanguageDescription(DW_LNAME_D)}, + {DW_LNAME_D, 1, LanguageDescription(DW_LNAME_D)}, + {DW_LNAME_Dylan, 0, LanguageDescription(DW_LNAME_Dylan)}, + {DW_LNAME_Dylan, 1, LanguageDescription(DW_LNAME_Dylan)}, + {DW_LNAME_Go, 0, LanguageDescription(DW_LNAME_Go)}, + {DW_LNAME_Go, 1, LanguageDescription(DW_LNAME_Go)}, + {DW_LNAME_Haskell, 0, LanguageDescription(DW_LNAME_Haskell)}, + {DW_LNAME_Haskell, 1, LanguageDescription(DW_LNAME_Haskell)}, + {DW_LNAME_HLSL, 0, LanguageDescription(DW_LNAME_HLSL)}, + {DW_LNAME_HLSL, 1, LanguageDescription(DW_LNAME_HLSL)}, + {DW_LNAME_Java, 0, LanguageDescription(DW_LNAME_Java)}, + {DW_LNAME_Java, 1, LanguageDescription(DW_LNAME_Java)}, + {DW_LNAME_Julia, 0, LanguageDescription(DW_LNAME_Julia)}, + {DW_LNAME_Julia, 1, LanguageDescription(DW_LNAME_Julia)}, + {DW_LNAME_Kotlin, 0, LanguageDescription(DW_LNAME_Kotlin)}, + {DW_LNAME_Kotlin, 1, LanguageDescription(DW_LNAME_Kotlin)}, + {DW_LNAME_Modula2, 0, LanguageDescription(DW_LNAME_Modula2)}, + {DW_LNAME_Modula2, 1, LanguageDescription(DW_LNAME_Modula2)}, + {DW_LNAME_Modula3, 0, LanguageDescription(DW_LNAME_Modula3)}, + {DW_LNAME_Modula3, 1, LanguageDescription(DW_LNAME_Modula3)}, + {DW_LNAME_OCaml, 0, LanguageDescription(DW_LNAME_OCaml)}, + {DW_LNAME_OCaml, 1, LanguageDescription(DW_LNAME_OCaml)}, + {DW_LNAME_OpenCL_C, 0, LanguageDescription(DW_LNAME_OpenCL_C)}, + {DW_LNAME_OpenCL_C, 1, LanguageDescription(DW_LNAME_OpenCL_C)}, + {DW_LNAME_Pascal, 0, LanguageDescription(DW_LNAME_Pascal)}, + {DW_LNAME_Pascal, 1, LanguageDescription(DW_LNAME_Pascal)}, + {DW_LNAME_PLI, 0, LanguageDescription(DW_LNAME_PLI)}, + {DW_LNAME_PLI, 1, LanguageDescription(DW_LNAME_PLI)}, + {DW_LNAME_Python, 0, LanguageDescription(DW_LNAME_Python)}, + {DW_LNAME_Python, 1, LanguageDescription(DW_LNAME_Python)}, + {DW_LNAME_RenderScript, 0, LanguageDescription(DW_LNAME_RenderScript)}, + {DW_LNAME_RenderScript, 1, LanguageDescription(DW_LNAME_RenderScript)}, + {DW_LNAME_Rust, 0, LanguageDescription(DW_LNAME_Rust)}, + {DW_LNAME_Rust, 1, LanguageDescription(DW_LNAME_Rust)}, + {DW_LNAME_Swift, 0, LanguageDescription(DW_LNAME_Swift)}, + {DW_LNAME_Swift, 1, LanguageDescription(DW_LNAME_Swift)}, + {DW_LNAME_UPC, 0, LanguageDescription(DW_LNAME_UPC)}, + {DW_LNAME_UPC, 1, LanguageDescription(DW_LNAME_UPC)}, + {DW_LNAME_Zig, 0, LanguageDescription(DW_LNAME_Zig)}, + {DW_LNAME_Zig, 1, LanguageDescription(DW_LNAME_Zig)}, + {DW_LNAME_Assembly, 0, LanguageDescription(DW_LNAME_Assembly)}, + {DW_LNAME_Assembly, 1, LanguageDescription(DW_LNAME_Assembly)}, + {DW_LNAME_C_sharp, 0, LanguageDescription(DW_LNAME_C_sharp)}, + {DW_LNAME_C_sharp, 1, LanguageDescription(DW_LNAME_C_sharp)}, + {DW_LNAME_Mojo, 0, LanguageDescription(DW_LNAME_Mojo)}, + {DW_LNAME_Mojo, 1, LanguageDescription(DW_LNAME_Mojo)}, + {DW_LNAME_GLSL, 0, LanguageDescription(DW_LNAME_GLSL)}, + {DW_LNAME_GLSL, 1, LanguageDescription(DW_LNAME_GLSL)}, + {DW_LNAME_GLSL_ES, 0, LanguageDescription(DW_LNAME_GLSL_ES)}, + {DW_LNAME_GLSL_ES, 1, LanguageDescription(DW_LNAME_GLSL_ES)}, + {DW_LNAME_OpenCL_CPP, 0, LanguageDescription(DW_LNAME_OpenCL_CPP)}, + {DW_LNAME_OpenCL_CPP, 1, LanguageDescription(DW_LNAME_OpenCL_CPP)}, + {DW_LNAME_CPP_for_OpenCL, 0, LanguageDescription(DW_LNAME_CPP_for_OpenCL)}, + {DW_LNAME_CPP_for_OpenCL, 1, LanguageDescription(DW_LNAME_CPP_for_OpenCL)}, + {DW_LNAME_Ruby, 0, LanguageDescription(DW_LNAME_Ruby)}, + {DW_LNAME_Ruby, 1, LanguageDescription(DW_LNAME_Ruby)}, + {DW_LNAME_Hylo, 0, LanguageDescription(DW_LNAME_Hylo)}, + {DW_LNAME_Hylo, 1, LanguageDescription(DW_LNAME_Hylo)}, + {DW_LNAME_Metal, 0, LanguageDescription(DW_LNAME_Metal)}, + {DW_LNAME_Metal, 1, LanguageDescription(DW_LNAME_Metal)}}; + +struct LanguageDescriptionTestFixture + : public testing::Test, + public testing::WithParamInterface<LanguageDescriptionTestCase> {}; + +TEST_P(LanguageDescriptionTestFixture, TestLanguageDescription) { + auto [LName, LVersion, ExpectedDescription] = GetParam(); + + // Basic test. + EXPECT_EQ(llvm::dwarf::LanguageDescription(LName, LVersion), + ExpectedDescription); + + // Now do the same test but roundtrip through the DW_LANG_ <-> DW_LNAME_ + // conversion APIs first. + + auto DWLang = llvm::dwarf::toDW_LANG(LName, LVersion); + // Some languages are not 1-to-1 mapped. In which case there's nothing else + // to test. + if (!DWLang) + return; + + std::optional<std::pair<SourceLanguageName, uint32_t>> DWLName = + llvm::dwarf::toDW_LNAME(*DWLang); + + // We are roundtripping, so there definitely should be a mapping back to + // DW_LNAME_. + ASSERT_TRUE(DWLName); + + // There is no official DW_LANG_ code for C++98. So the roundtripping turns it + // into a plain DW_LANG_C_plus_plus. + if (DWLang == DW_LANG_C_plus_plus && LVersion <= 199711) + EXPECT_EQ(llvm::dwarf::LanguageDescription(DWLName->first, DWLName->second), + "ISO C++"); + else + EXPECT_EQ(llvm::dwarf::LanguageDescription(DWLName->first, DWLName->second), + ExpectedDescription); } + +INSTANTIATE_TEST_SUITE_P(LanguageDescriptionTests, + LanguageDescriptionTestFixture, + ::testing::ValuesIn(LanguageDescriptionTestCases)); + } // end namespace diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index 1436f0f..5bc516d 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -802,4 +802,20 @@ TEST_F(ConstantFPRangeTest, negate) { ConstantFPRange::getNonNaN(APFloat(-2.0), APFloat(3.0))); } +TEST_F(ConstantFPRangeTest, getWithout) { + EXPECT_EQ(Full.getWithoutNaN(), ConstantFPRange::getNonNaN(Sem)); + EXPECT_EQ(NaN.getWithoutNaN(), Empty); + + EXPECT_EQ(NaN.getWithoutInf(), NaN); + EXPECT_EQ(PosInf.getWithoutInf(), Empty); + EXPECT_EQ(NegInf.getWithoutInf(), Empty); + EXPECT_EQ(ConstantFPRange::getNonNaN(Sem).getWithoutInf(), Finite); + EXPECT_EQ(Zero.getWithoutInf(), Zero); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat::getInf(Sem, /*Negative=*/true), + APFloat(3.0)) + .getWithoutInf(), + ConstantFPRange::getNonNaN( + APFloat::getLargest(Sem, /*Negative=*/true), APFloat(3.0))); +} + } // anonymous namespace diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Analysis/BUILD.gn index eb8aef2..5f9eb9a 100644 --- a/llvm/utils/gn/secondary/clang/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Analysis/BUILD.gn @@ -27,6 +27,8 @@ static_library("Analysis") { "FixitUtil.cpp", "IntervalPartition.cpp", "IssueHash.cpp", + "LifetimeAnnotations.cpp", + "LifetimeSafety.cpp", "LiveVariables.cpp", "MacroExpansionContext.cpp", "ObjCNoReturn.cpp", diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/LifetimeSafety/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Analysis/LifetimeSafety/BUILD.gn deleted file mode 100644 index a148e78..0000000 --- a/llvm/utils/gn/secondary/clang/lib/Analysis/LifetimeSafety/BUILD.gn +++ /dev/null @@ -1,16 +0,0 @@ -static_library("LifetimeSafety") { - output_name = "clangAnalysisLifetimeSafety" - configs += [ "//llvm/utils/gn/build:clang_code" ] - deps = [ "//clang/lib/Basic" ] - sources = [ - "Checker.cpp", - "Facts.cpp", - "FactsGenerator.cpp", - "LifetimeAnnotations.cpp", - "LifetimeSafety.cpp", - "LiveOrigins.cpp", - "Loans.cpp", - "LoanPropagation.cpp", - "Origins.cpp", - ] -} diff --git a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn index 14f44c4..96ff481 100644 --- a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn @@ -29,7 +29,6 @@ static_library("Sema") { "//clang/lib/APINotes", "//clang/lib/AST", "//clang/lib/Analysis", - "//clang/lib/Analysis/LifetimeSafety", "//clang/lib/Basic", "//clang/lib/Edit", "//clang/lib/Lex", diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index 8c41466..f101624 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -10,6 +10,7 @@ CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll CodeGen/AArch64/selectopt-cast.ll CodeGen/AArch64/selectopt.ll +CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll diff --git a/mlir/docs/Dialects/Vector.md b/mlir/docs/Dialects/Vector.md index 6c8949d..839dc75 100644 --- a/mlir/docs/Dialects/Vector.md +++ b/mlir/docs/Dialects/Vector.md @@ -125,7 +125,7 @@ Some existing Arith and Vector Dialect on `n-D` `vector` types comprise: // Produces a vector<3x7x8xf32> %b = arith.mulf %0, %1 : vector<3x7x8xf32> // Produces a vector<3x7x8xf32> -%c = vector.splat %1 : vector<3x7x8xf32> +%c = vector.broadcast %1 : f32 to vector<3x7x8xf32> %d = vector.extract %0[1]: vector<7x8xf32> from vector<3x7x8xf32> %e = vector.extract %0[1, 5]: vector<8xf32> from vector<3x7x8xf32> @@ -176,8 +176,6 @@ infrastructure can apply iteratively. ### Virtual Vector to Hardware Vector Lowering For now, `VV -> HWV` are specified in C++ (see for instance the -[SplatOpLowering for n-D vectors](https://github.com/tensorflow/mlir/commit/0a0c4867c6a6fcb0a2f17ef26a791c1d551fe33d) -or the [VectorOuterProductOp lowering](https://github.com/tensorflow/mlir/commit/957b1ca9680b4aacabb3a480fbc4ebd2506334b8)). Simple diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index bb5d686..6e79085 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -2881,53 +2881,6 @@ def Vector_PrintOp : }]; } -//===----------------------------------------------------------------------===// -// SplatOp -//===----------------------------------------------------------------------===// - -def Vector_SplatOp : Vector_Op<"splat", [ - Pure, - DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, - TypesMatchWith<"operand type matches element type of result", - "aggregate", "input", - "::llvm::cast<VectorType>($_self).getElementType()"> - ]> { - let summary = "vector splat or broadcast operation"; - let description = [{ - Note: This operation is deprecated. Please use vector.broadcast. - - Broadcast the operand to all elements of the result vector. The type of the - operand must match the element type of the vector type. - - Example: - - ```mlir - %s = arith.constant 10.1 : f32 - %t = vector.splat %s : vector<8x16xf32> - ``` - - This operation is deprecated, the preferred representation of the above is: - - ```mlir - %s = arith.constant 10.1 : f32 - %t = vector.broadcast %s : f32 to vector<8x16xf32> - ``` - }]; - - let arguments = (ins AnyType:$input); - let results = (outs AnyVectorOfAnyRank:$aggregate); - - let builders = [ - OpBuilder<(ins "Value":$element, "Type":$aggregateType), - [{ build($_builder, $_state, aggregateType, element); }]>]; - let assemblyFormat = "$input attr-dict `:` type($aggregate)"; - - let hasFolder = 1; - - // vector.splat is deprecated, and vector.broadcast should be used instead. - // Canonicalize vector.splat to vector.broadcast. - let hasCanonicalizer = 1; -} //===----------------------------------------------------------------------===// // VectorScaleOp diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h index b74c15e..a480195 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h @@ -64,6 +64,10 @@ void populateXeGPUFoldAliasOpsPatterns(RewritePatternSet &patterns); /// Appends patterns for XeGPU SIMT distribution into `patterns`. void populateXeGPUSubgroupDistributePatterns(RewritePatternSet &patterns); +/// Appends patterns for moving function body into gpu.warp_execute_on_lane0 op. +void populateXeGPUMoveFuncBodyToWarpOpPatterns(RewritePatternSet &patterns); +/// Appends patterns for XeGPU workgroup to subgroup distribution into +/// `patterns`. void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns); /// Collect a set of patterns to unroll xegpu operations to a smaller shapes. diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp index dcbaa56..247dba1 100644 --- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp +++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp @@ -432,10 +432,6 @@ static Value getOriginalVectorValue(Value value) { current = op.getSource(); return false; }) - .Case<vector::SplatOp>([¤t](auto op) { - current = op.getInput(); - return false; - }) .Default([](Operation *) { return false; }); if (!skipOp) { diff --git a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp index bad53c0..1002ebe 100644 --- a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp +++ b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp @@ -236,7 +236,7 @@ struct TileLoadOpConversion : public OpRewritePattern<arm_sme::TileLoadOp> { /// AFTER: /// ```mlir /// ... -/// %pad_1d = vector.splat %pad : vector<[4]xi32> +/// %pad_1d = vector.broadcast %pad : i32 to vector<[4]xi32> /// %tile = scf.for %tile_slice_idx = %c0 to %svl_s step %c1 /// iter_args(%iter_tile = %init_tile) -> (vector<[4]x[4]xi32>) { /// ... diff --git a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp index 363685a..778c616 100644 --- a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp +++ b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp @@ -731,28 +731,14 @@ struct ExtractFromCreateMaskToPselLowering } }; -// Convert all `vector.splat` to `vector.broadcast`. There is a path from -// `vector.broadcast` to ArmSME via another pattern. -struct ConvertSplatToBroadcast : public OpRewritePattern<vector::SplatOp> { - using Base::Base; - - LogicalResult matchAndRewrite(vector::SplatOp splatOp, - PatternRewriter &rewriter) const final { - - rewriter.replaceOpWithNewOp<vector::BroadcastOp>(splatOp, splatOp.getType(), - splatOp.getInput()); - return success(); - } -}; - } // namespace void mlir::populateVectorToArmSMEPatterns(RewritePatternSet &patterns, MLIRContext &ctx) { - patterns.add<BroadcastOpToArmSMELowering, ConvertSplatToBroadcast, - TransferReadToArmSMELowering, TransferWriteToArmSMELowering, - TransposeOpToArmSMELowering, VectorLoadToArmSMELowering, - VectorStoreToArmSMELowering, VectorOuterProductToArmSMELowering, + patterns.add<BroadcastOpToArmSMELowering, TransferReadToArmSMELowering, + TransferWriteToArmSMELowering, TransposeOpToArmSMELowering, + VectorLoadToArmSMELowering, VectorStoreToArmSMELowering, + VectorOuterProductToArmSMELowering, VectorExtractToArmSMELowering, VectorInsertToArmSMELowering, VectorPrintToArmSMELowering, FoldTransferWriteOfExtractTileSlice, ExtractFromCreateMaskToPselLowering>(&ctx); diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 5461646..5355909 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -2161,19 +2161,6 @@ public: } }; -/// Convert `vector.splat` to `vector.broadcast`. There is a path to LLVM from -/// `vector.broadcast` through other patterns. -struct VectorSplatToBroadcast : public ConvertOpToLLVMPattern<vector::SplatOp> { - using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; - LogicalResult - matchAndRewrite(vector::SplatOp splat, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp<vector::BroadcastOp>(splat, splat.getType(), - adaptor.getInput()); - return success(); - } -}; - } // namespace void mlir::vector::populateVectorRankReducingFMAPattern( @@ -2212,7 +2199,7 @@ void mlir::populateVectorToLLVMConversionPatterns( VectorInsertOpConversion, VectorPrintOpConversion, VectorTypeCastOpConversion, VectorScaleOpConversion, VectorExpandLoadOpConversion, VectorCompressStoreOpConversion, - VectorSplatToBroadcast, VectorBroadcastScalarToLowRankLowering, + VectorBroadcastScalarToLowRankLowering, VectorBroadcastScalarToNdLowering, VectorScalableInsertOpLowering, VectorScalableExtractOpLowering, MaskedReductionOpConversion, VectorInterleaveOpLowering, diff --git a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp index 311ff6f..56e8fee 100644 --- a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp +++ b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp @@ -22,7 +22,6 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Location.h" -#include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/Transforms/DialectConversion.h" @@ -79,20 +78,6 @@ struct VectorShapeCast final : public OpConversionPattern<vector::ShapeCastOp> { } }; -// Convert `vector.splat` to `vector.broadcast`. There is a path from -// `vector.broadcast` to SPIRV via other patterns. -struct VectorSplatToBroadcast final - : public OpConversionPattern<vector::SplatOp> { - using Base::Base; - LogicalResult - matchAndRewrite(vector::SplatOp splat, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp<vector::BroadcastOp>(splat, splat.getType(), - adaptor.getInput()); - return success(); - } -}; - struct VectorBitcastConvert final : public OpConversionPattern<vector::BitCastOp> { using Base::Base; @@ -1092,10 +1077,10 @@ void mlir::populateVectorToSPIRVPatterns( VectorReductionPattern<CL_INT_MAX_MIN_OPS>, VectorReductionFloatMinMax<CL_FLOAT_MAX_MIN_OPS>, VectorReductionFloatMinMax<GL_FLOAT_MAX_MIN_OPS>, VectorShapeCast, - VectorSplatToBroadcast, VectorInsertStridedSliceOpConvert, - VectorShuffleOpConvert, VectorInterleaveOpConvert, - VectorDeinterleaveOpConvert, VectorScalarBroadcastPattern, - VectorLoadOpConverter, VectorStoreOpConverter, VectorStepOpConvert>( + VectorInsertStridedSliceOpConvert, VectorShuffleOpConvert, + VectorInterleaveOpConvert, VectorDeinterleaveOpConvert, + VectorScalarBroadcastPattern, VectorLoadOpConverter, + VectorStoreOpConverter, VectorStepOpConvert>( typeConverter, patterns.getContext(), PatternBenefit(1)); // Make sure that the more specialized dot product pattern has higher benefit diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp index f449d90..f276984 100644 --- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp +++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp @@ -715,6 +715,135 @@ class LLVMLoadStoreToOCLPattern : public OpConversionPattern<OpType> { }; //===----------------------------------------------------------------------===// +// GPU index id operations +//===----------------------------------------------------------------------===// +/* +// Launch Config ops +// dimidx - x, y, z - is fixed to i32 +// return type is set by XeVM type converter +// get_local_id +xevm::WorkitemIdXOp; +xevm::WorkitemIdYOp; +xevm::WorkitemIdZOp; +// get_local_size +xevm::WorkgroupDimXOp; +xevm::WorkgroupDimYOp; +xevm::WorkgroupDimZOp; +// get_group_id +xevm::WorkgroupIdXOp; +xevm::WorkgroupIdYOp; +xevm::WorkgroupIdZOp; +// get_num_groups +xevm::GridDimXOp; +xevm::GridDimYOp; +xevm::GridDimZOp; +// get_global_id : to be added if needed +*/ + +// Helpers to get the OpenCL function name and dimension argument for each op. +static std::pair<StringRef, int64_t> getConfig(xevm::WorkitemIdXOp) { + return {"get_local_id", 0}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::WorkitemIdYOp) { + return {"get_local_id", 1}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::WorkitemIdZOp) { + return {"get_local_id", 2}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupDimXOp) { + return {"get_local_size", 0}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupDimYOp) { + return {"get_local_size", 1}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupDimZOp) { + return {"get_local_size", 2}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupIdXOp) { + return {"get_group_id", 0}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupIdYOp) { + return {"get_group_id", 1}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupIdZOp) { + return {"get_group_id", 2}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::GridDimXOp) { + return {"get_num_groups", 0}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::GridDimYOp) { + return {"get_num_groups", 1}; +} +static std::pair<StringRef, int64_t> getConfig(xevm::GridDimZOp) { + return {"get_num_groups", 2}; +} +/// Replace `xevm.*` with an `llvm.call` to the corresponding OpenCL func with +/// a constant argument for the dimension - x, y or z. +template <typename OpType> +class LaunchConfigOpToOCLPattern : public OpConversionPattern<OpType> { + using OpConversionPattern<OpType>::OpConversionPattern; + LogicalResult + matchAndRewrite(OpType op, typename OpType::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); + auto [baseName, dim] = getConfig(op); + Type dimTy = rewriter.getI32Type(); + Value dimVal = LLVM::ConstantOp::create(rewriter, loc, dimTy, + static_cast<int64_t>(dim)); + std::string func = mangle(baseName, {dimTy}, {true}); + Type resTy = op.getType(); + auto call = + createDeviceFunctionCall(rewriter, func, resTy, {dimTy}, {dimVal}, {}, + noUnwindWillReturnAttrs, op.getOperation()); + constexpr auto noModRef = LLVM::ModRefInfo::NoModRef; + auto memAttr = rewriter.getAttr<LLVM::MemoryEffectsAttr>( + /*other=*/noModRef, + /*argMem=*/noModRef, /*inaccessibleMem=*/noModRef); + call.setMemoryEffectsAttr(memAttr); + rewriter.replaceOp(op, call); + return success(); + } +}; + +/* +// Subgroup ops +// get_sub_group_local_id +xevm::LaneIdOp; +// get_sub_group_id +xevm::SubgroupIdOp; +// get_sub_group_size +xevm::SubgroupSizeOp; +// get_num_sub_groups : to be added if needed +*/ + +// Helpers to get the OpenCL function name for each op. +static StringRef getConfig(xevm::LaneIdOp) { return "get_sub_group_local_id"; } +static StringRef getConfig(xevm::SubgroupIdOp) { return "get_sub_group_id"; } +static StringRef getConfig(xevm::SubgroupSizeOp) { + return "get_sub_group_size"; +} +template <typename OpType> +class SubgroupOpWorkitemOpToOCLPattern : public OpConversionPattern<OpType> { + using OpConversionPattern<OpType>::OpConversionPattern; + LogicalResult + matchAndRewrite(OpType op, typename OpType::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + std::string func = mangle(getConfig(op).str(), {}); + Type resTy = op.getType(); + auto call = + createDeviceFunctionCall(rewriter, func, resTy, {}, {}, {}, + noUnwindWillReturnAttrs, op.getOperation()); + constexpr auto noModRef = LLVM::ModRefInfo::NoModRef; + auto memAttr = rewriter.getAttr<LLVM::MemoryEffectsAttr>( + /*other=*/noModRef, + /*argMem=*/noModRef, /*inaccessibleMem=*/noModRef); + call.setMemoryEffectsAttr(memAttr); + rewriter.replaceOp(op, call); + return success(); + } +}; + +//===----------------------------------------------------------------------===// // Pass Definition //===----------------------------------------------------------------------===// @@ -775,7 +904,22 @@ void ::mlir::populateXeVMToLLVMConversionPatterns(ConversionTarget &target, LLVMLoadStoreToOCLPattern<LLVM::LoadOp>, LLVMLoadStoreToOCLPattern<LLVM::StoreOp>, BlockLoadStore1DToOCLPattern<BlockLoadOp>, - BlockLoadStore1DToOCLPattern<BlockStoreOp>>( + BlockLoadStore1DToOCLPattern<BlockStoreOp>, + LaunchConfigOpToOCLPattern<WorkitemIdXOp>, + LaunchConfigOpToOCLPattern<WorkitemIdYOp>, + LaunchConfigOpToOCLPattern<WorkitemIdZOp>, + LaunchConfigOpToOCLPattern<WorkgroupDimXOp>, + LaunchConfigOpToOCLPattern<WorkgroupDimYOp>, + LaunchConfigOpToOCLPattern<WorkgroupDimZOp>, + LaunchConfigOpToOCLPattern<WorkgroupIdXOp>, + LaunchConfigOpToOCLPattern<WorkgroupIdYOp>, + LaunchConfigOpToOCLPattern<WorkgroupIdZOp>, + LaunchConfigOpToOCLPattern<GridDimXOp>, + LaunchConfigOpToOCLPattern<GridDimYOp>, + LaunchConfigOpToOCLPattern<GridDimZOp>, + SubgroupOpWorkitemOpToOCLPattern<LaneIdOp>, + SubgroupOpWorkitemOpToOCLPattern<SubgroupIdOp>, + SubgroupOpWorkitemOpToOCLPattern<SubgroupSizeOp>>( patterns.getContext()); } diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp index d5c7190..f405d0c 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp +++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp @@ -26,6 +26,7 @@ #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeUtilities.h" +#include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TypeSwitch.h" @@ -40,6 +41,15 @@ using namespace mlir::amdgpu; #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.cpp.inc" +namespace { +struct AMDGPUInlinerInterface final : DialectInlinerInterface { + using DialectInlinerInterface::DialectInlinerInterface; + bool isLegalToInline(Operation *, Region *, bool, IRMapping &) const final { + return true; + } +}; +} // namespace + void AMDGPUDialect::initialize() { addOperations< #define GET_OP_LIST @@ -49,6 +59,7 @@ void AMDGPUDialect::initialize() { #define GET_ATTRDEF_LIST #include "mlir/Dialect/AMDGPU/IR/AMDGPUAttributes.cpp.inc" >(); + addInterfaces<AMDGPUInlinerInterface>(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Arith/Transforms/EmulateUnsupportedFloats.cpp b/mlir/lib/Dialect/Arith/Transforms/EmulateUnsupportedFloats.cpp index c64e10f5..d018cdd 100644 --- a/mlir/lib/Dialect/Arith/Transforms/EmulateUnsupportedFloats.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/EmulateUnsupportedFloats.cpp @@ -123,8 +123,7 @@ void mlir::arith::populateEmulateUnsupportedFloatsLegality( vector::OuterProductOp, vector::ScanOp>( [&](Operation *op) { return converter.isLegal(op); }); target.addLegalOp<arith::BitcastOp, arith::ExtFOp, arith::TruncFOp, - arith::ConstantOp, arith::SelectOp, vector::SplatOp, - vector::BroadcastOp>(); + arith::ConstantOp, arith::SelectOp, vector::BroadcastOp>(); } void EmulateUnsupportedFloatsPass::runOnOperation() { diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 14e235f..a7e3ba8 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -1665,10 +1665,10 @@ static bool hasZeroDimVectors(Operation *op) { llvm::any_of(op->getResultTypes(), hasZeroDimVectorType); } -/// All BroadcastOps and SplatOps, as well as ShapeCastOps that only prepend -/// 1s, are considered to be 'broadcastlike'. +/// All BroadcastOps, as well as ShapeCastOps that only prepend 1s, are +/// considered to be 'broadcastlike'. static bool isBroadcastLike(Operation *op) { - if (isa<BroadcastOp, SplatOp>(op)) + if (isa<BroadcastOp>(op)) return true; auto shapeCast = dyn_cast<ShapeCastOp>(op); @@ -3249,12 +3249,11 @@ struct Canonicalize0DShuffleOp : public OpRewritePattern<ShuffleOp> { }; /// Consider the defining operation `defOp` of `value`. If `defOp` is a -/// vector.splat or a vector.broadcast with a scalar operand, return the scalar -/// value that is splatted. Otherwise return null. +/// vector.broadcast with a scalar operand, return the scalar value that is +/// splatted. Otherwise return null. /// -/// Examples: +/// Example: /// -/// scalar_source --> vector.splat --> value - return scalar_source /// scalar_source --> vector.broadcast --> value - return scalar_source static Value getScalarSplatSource(Value value) { // Block argument: @@ -3262,10 +3261,6 @@ static Value getScalarSplatSource(Value value) { if (!defOp) return {}; - // Splat: - if (auto splat = dyn_cast<vector::SplatOp>(defOp)) - return splat.getInput(); - auto broadcast = dyn_cast<vector::BroadcastOp>(defOp); // Not broadcast (and not splat): @@ -7511,41 +7506,6 @@ void mlir::vector::populateVectorToVectorCanonicalizationPatterns( patterns.getContext(), benefit); } -//===----------------------------------------------------------------------===// -// SplatOp -//===----------------------------------------------------------------------===// - -OpFoldResult SplatOp::fold(FoldAdaptor adaptor) { - auto constOperand = adaptor.getInput(); - if (!isa_and_nonnull<IntegerAttr, FloatAttr>(constOperand)) - return {}; - - // SplatElementsAttr::get treats single value for second arg as being a splat. - return SplatElementsAttr::get(getType(), {constOperand}); -} - -// Canonicalizer for vector.splat. It always gets canonicalized to a -// vector.broadcast. -class SplatToBroadcastPattern final : public OpRewritePattern<SplatOp> { -public: - using Base::Base; - LogicalResult matchAndRewrite(SplatOp splatOp, - PatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp<vector::BroadcastOp>(splatOp, splatOp.getType(), - splatOp.getOperand()); - return success(); - } -}; -void SplatOp::getCanonicalizationPatterns(RewritePatternSet &results, - MLIRContext *context) { - results.add<SplatToBroadcastPattern>(context); -} - -void SplatOp::inferResultRanges(ArrayRef<ConstantIntRanges> argRanges, - SetIntRangeFn setResultRanges) { - setResultRanges(getResult(), argRanges.front()); -} - Value mlir::vector::makeArithReduction(OpBuilder &b, Location loc, CombiningKind kind, Value v1, Value acc, arith::FastMathFlagsAttr fastmath, diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp index 255f2bf..3a3231d 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @@ -90,7 +90,7 @@ static FailureOr<Operation *> getCompressedMaskOp(OpBuilder &rewriter, Operation *maskOp = mask.getDefiningOp(); SmallVector<vector::ExtractOp, 2> extractOps; - // TODO: add support to `vector.splat`. + // TODO: add support to `vector.broadcast`. // Finding the mask creation operation. while (maskOp && !isa<arith::ConstantOp, vector::CreateMaskOp, vector::ConstantMaskOp>( diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp index 71fba71c..1b656d8 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp @@ -590,32 +590,6 @@ struct LinearizeVectorBitCast final } }; -/// This pattern converts the SplatOp to work on a linearized vector. -/// Following, -/// vector.splat %value : vector<4x4xf32> -/// is converted to: -/// %out_1d = vector.splat %value : vector<16xf32> -/// %out_nd = vector.shape_cast %out_1d : vector<16xf32> to vector<4x4xf32> -struct LinearizeVectorSplat final - : public OpConversionPattern<vector::SplatOp> { - using Base::Base; - - LinearizeVectorSplat(const TypeConverter &typeConverter, MLIRContext *context, - PatternBenefit benefit = 1) - : OpConversionPattern(typeConverter, context, benefit) {} - - LogicalResult - matchAndRewrite(vector::SplatOp splatOp, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto dstTy = getTypeConverter()->convertType(splatOp.getType()); - if (!dstTy) - return rewriter.notifyMatchFailure(splatOp, "cannot convert type."); - rewriter.replaceOpWithNewOp<vector::SplatOp>(splatOp, adaptor.getInput(), - dstTy); - return success(); - } -}; - /// This pattern converts the CreateMaskOp to work on a linearized vector. /// It currently supports only 2D masks with a unit outer dimension. /// Following, @@ -934,9 +908,9 @@ void mlir::vector::populateVectorLinearizeBasePatterns( RewritePatternSet &patterns) { patterns .add<LinearizeConstantLike, LinearizeVectorizable, LinearizeVectorBitCast, - LinearizeVectorSplat, LinearizeVectorCreateMask, LinearizeVectorLoad, - LinearizeVectorStore, LinearizeVectorFromElements, - LinearizeVectorToElements>(typeConverter, patterns.getContext()); + LinearizeVectorCreateMask, LinearizeVectorLoad, LinearizeVectorStore, + LinearizeVectorFromElements, LinearizeVectorToElements>( + typeConverter, patterns.getContext()); } void mlir::vector::populateVectorLinearizeShuffleLikeOpsPatterns( diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp index d6a6d7cd..726da1e 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -878,7 +878,7 @@ struct BubbleUpBitCastForStridedSliceInsert // This transforms IR like: // %1 = vector.bitcast %0: vector<8xf16> to vector<4xf32> // Into: -// %cst = vector.splat %c0_f32 : vector<4xf32> +// %cst = vector.broadcast %c0_f32 : f32 to vector<4xf32> // %1 = vector.extract_strided_slice %0 { // offsets = [0], sizes = [4], strides = [1] // } : vector<8xf16> to vector<4xf16> @@ -987,8 +987,8 @@ static Type cloneOrReplace(Type type, Type newElementType) { return newElementType; } -/// If `value` is the result of a splat or broadcast operation, return the input -/// of the splat/broadcast operation. +/// If `value` is the result of a broadcast operation, return the input +/// of the broadcast operation. static Value getBroadcastLikeSource(Value value) { Operation *op = value.getDefiningOp(); @@ -998,13 +998,10 @@ static Value getBroadcastLikeSource(Value value) { if (auto broadcast = dyn_cast<vector::BroadcastOp>(op)) return broadcast.getSource(); - if (auto splat = dyn_cast<vector::SplatOp>(op)) - return splat.getInput(); - return {}; } -/// Reorders elementwise(broadcast/splat) to broadcast(elementwise). Ex: +/// Reorders elementwise(broadcast) to broadcast(elementwise). Ex: /// /// Example: /// ``` @@ -1017,9 +1014,6 @@ static Value getBroadcastLikeSource(Value value) { /// %r = arith.addi %arg0, %arg1 : index /// %b = vector.broadcast %r : index to vector<1x4xindex> /// ``` -/// -/// Both `vector.broadcast` and `vector.splat` are supported as broadcasting -/// ops. struct ReorderElementwiseOpsOnBroadcast final : public OpTraitRewritePattern<OpTrait::Elementwise> { using OpTraitRewritePattern::OpTraitRewritePattern; @@ -1045,29 +1039,29 @@ struct ReorderElementwiseOpsOnBroadcast final Type resultElemType = resultType.getElementType(); // Get the type of the first non-constant operand - Value splatSource; + Value broadcastSource; for (Value operand : op->getOperands()) { Operation *definingOp = operand.getDefiningOp(); if (!definingOp) return failure(); if (definingOp->hasTrait<OpTrait::ConstantLike>()) continue; - splatSource = getBroadcastLikeSource(operand); + broadcastSource = getBroadcastLikeSource(operand); break; } - if (!splatSource) + if (!broadcastSource) return failure(); Type unbroadcastResultType = - cloneOrReplace(splatSource.getType(), resultElemType); + cloneOrReplace(broadcastSource.getType(), resultElemType); // Make sure that all operands are broadcast from identically-shaped types: - // * scalar (`vector.broadcast` + `vector.splat`), or + // * scalar (`vector.broadcast`), or // * vector (`vector.broadcast`). // Otherwise the re-ordering wouldn't be safe. - if (!llvm::all_of(op->getOperands(), [splatSource](Value val) { + if (!llvm::all_of(op->getOperands(), [broadcastSource](Value val) { if (auto source = getBroadcastLikeSource(val)) return haveSameShapeAndScaling(source.getType(), - splatSource.getType()); + broadcastSource.getType()); SplatElementsAttr splatConst; return matchPattern(val, m_Constant(&splatConst)); })) { @@ -1271,19 +1265,18 @@ public: } }; -/// Pattern to rewrite vector.store(vector.splat) -> vector/memref.store. +/// Pattern to rewrite vector.store(vector.broadcast) -> vector/memref.store. /// /// Example: /// ``` -/// %0 = vector.splat %arg2 : vector<1xf32> +/// %0 = vector.broadcast %arg2 : f32 to vector<1xf32> /// vector.store %0, %arg0[%arg1] : memref<?xf32>, vector<1xf32> /// ``` /// Gets converted to: /// ``` /// memref.store %arg2, %arg0[%arg1] : memref<?xf32> /// ``` -class StoreOpFromSplatOrBroadcast final - : public OpRewritePattern<vector::StoreOp> { +class StoreOpFromBroadcast final : public OpRewritePattern<vector::StoreOp> { public: using Base::Base; @@ -1308,9 +1301,9 @@ public: return rewriter.notifyMatchFailure( op, "value to store is not from a broadcast"); - // Checking for single use so we can remove splat. - Operation *splat = toStore.getDefiningOp(); - if (!splat->hasOneUse()) + // Checking for single use so we can remove broadcast. + Operation *broadcast = toStore.getDefiningOp(); + if (!broadcast->hasOneUse()) return rewriter.notifyMatchFailure(op, "expected single op use"); Value base = op.getBase(); @@ -1321,7 +1314,7 @@ public: } else { rewriter.replaceOpWithNewOp<memref::StoreOp>(op, source, base, indices); } - rewriter.eraseOp(splat); + rewriter.eraseOp(broadcast); return success(); } }; @@ -2391,8 +2384,8 @@ void mlir::vector::populateSinkVectorOpsPatterns(RewritePatternSet &patterns, void mlir::vector::populateSinkVectorMemOpsPatterns(RewritePatternSet &patterns, PatternBenefit benefit) { // TODO: Consider converting these patterns to canonicalizations. - patterns.add<ExtractOpFromLoad, StoreOpFromSplatOrBroadcast>( - patterns.getContext(), benefit); + patterns.add<ExtractOpFromLoad, StoreOpFromBroadcast>(patterns.getContext(), + benefit); } void mlir::vector::populateChainedVectorReductionFoldingPatterns( diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp index f1dbc5d..26770b3 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp @@ -195,8 +195,7 @@ static bool requireTranspose(const xegpu::LayoutAttr layout, /// } /// return %0 /// } -struct MoveFuncBodyToWarpExecuteOnLane0 - : public OpRewritePattern<gpu::GPUFuncOp> { +struct MoveFuncBodyToWarpOp : public OpRewritePattern<gpu::GPUFuncOp> { using OpRewritePattern<gpu::GPUFuncOp>::OpRewritePattern; LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, PatternRewriter &rewriter) const override { @@ -1447,6 +1446,11 @@ void xegpu::populateXeGPUSubgroupDistributePatterns( /*pattern benefit=*/highPatternBenefit); } +void xegpu::populateXeGPUMoveFuncBodyToWarpOpPatterns( + RewritePatternSet &patterns) { + patterns.add<MoveFuncBodyToWarpOp>(patterns.getContext()); +} + void XeGPUSubgroupDistributePass::runOnOperation() { // Step 1: Attach layouts to op operands. // TODO: Following assumptions are made: @@ -1473,7 +1477,7 @@ void XeGPUSubgroupDistributePass::runOnOperation() { // gpu.warp_execute_on_lane_0 operation. { RewritePatternSet patterns(&getContext()); - patterns.add<MoveFuncBodyToWarpExecuteOnLane0>(&getContext()); + xegpu::populateXeGPUMoveFuncBodyToWarpOpPatterns(patterns); if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) { signalPassFailure(); diff --git a/mlir/test/Conversion/ConvertToSPIRV/vector.mlir b/mlir/test/Conversion/ConvertToSPIRV/vector.mlir index eb9feaa..a75f30d 100644 --- a/mlir/test/Conversion/ConvertToSPIRV/vector.mlir +++ b/mlir/test/Conversion/ConvertToSPIRV/vector.mlir @@ -86,7 +86,7 @@ func.func @fma_size1_vector(%a: vector<1xf32>, %b: vector<1xf32>, %c: vector<1xf // CHECK: %[[VAL:.+]] = spirv.CompositeConstruct %[[A]], %[[A]], %[[A]], %[[A]] // CHECK: spirv.ReturnValue %[[VAL]] : vector<4xf32> func.func @splat(%f : f32) -> vector<4xf32> { - %splat = vector.splat %f : vector<4xf32> + %splat = vector.broadcast %f : f32 to vector<4xf32> return %splat : vector<4xf32> } diff --git a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir index c8a434b..1735e08 100644 --- a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir +++ b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir @@ -429,38 +429,6 @@ func.func @broadcast_vec2d_from_vec1d(%arg0: vector<[8]xi16>) { return } -//===----------------------------------------------------------------------===// -// vector.splat -//===----------------------------------------------------------------------===// - -// ----- - -// CHECK-LABEL: func.func @splat_vec2d_from_i32( -// CHECK-SAME: %[[SRC:.*]]: i32) { -// CHECK: %[[BCST:.*]] = vector.broadcast %[[SRC]] : i32 to vector<[4]xi32> -// CHECK: arm_sme.get_tile : vector<[4]x[4]xi32> -// CHECK: %[[VSCALE:.*]] = vector.vscale -// CHECK: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %{{.*}} : index -// CHECK: scf.for {{.*}} to %[[NUM_TILE_SLICES]] {{.*}} { -// CHECK: arm_sme.insert_tile_slice %[[BCST]], {{.*}} : vector<[4]xi32> into vector<[4]x[4]xi32> -func.func @splat_vec2d_from_i32(%arg0: i32) { - %0 = vector.splat %arg0 : vector<[4]x[4]xi32> - "prevent.dce"(%0) : (vector<[4]x[4]xi32>) -> () - return -} - -// ----- - -// CHECK-LABEL: func.func @splat_vec2d_from_f16( -// CHECK-SAME: %[[SRC:.*]]: f16) { -// CHECK: %[[BCST:.*]] = vector.broadcast %[[SRC]] : f16 to vector<[8]xf16> -// CHECK: scf.for -// CHECK: arm_sme.insert_tile_slice %[[BCST]], {{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16> -func.func @splat_vec2d_from_f16(%arg0: f16) { - %0 = vector.splat %arg0 : vector<[8]x[8]xf16> - "prevent.dce"(%0) : (vector<[8]x[8]xf16>) -> () - return -} //===----------------------------------------------------------------------===// // vector.transpose diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm-interface.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm-interface.mlir index 5973c2ba..cb48ca3 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm-interface.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm-interface.mlir @@ -2217,23 +2217,6 @@ func.func @compress_store_op_with_alignment(%arg0: memref<?xindex>, %arg1: vecto // ----- //===----------------------------------------------------------------------===// -// vector.splat -//===----------------------------------------------------------------------===// - -// vector.splat is converted to vector.broadcast. Then, vector.broadcast is converted to LLVM. -// CHECK-LABEL: @splat_0d -// CHECK-NOT: splat -// CHECK: return -func.func @splat_0d(%elt: f32) -> (vector<f32>, vector<4xf32>, vector<[4]xf32>) { - %a = vector.splat %elt : vector<f32> - %b = vector.splat %elt : vector<4xf32> - %c = vector.splat %elt : vector<[4]xf32> - return %a, %b, %c : vector<f32>, vector<4xf32>, vector<[4]xf32> -} - -// ----- - -//===----------------------------------------------------------------------===// // vector.scalable_insert //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir index b31a973..72e70ff 100644 --- a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir +++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir @@ -35,7 +35,7 @@ llvm.func @blockload2d(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32 // ----- // CHECK-LABEL: llvm.func spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt( llvm.func @blockload2d_cache_control(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32, %base_pitch_a: i32, %x: i32, %y: i32) -> vector<8xi16> { - // CHECK: xevm.DecorationCacheControl = + // CHECK: xevm.DecorationCacheControl = // CHECK-SAME: 6442 : i32, 0 : i32, 1 : i32, 0 : i32 // CHECK-SAME: 6442 : i32, 1 : i32, 1 : i32, 0 : i32 %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y @@ -345,3 +345,148 @@ llvm.func @blockstore_scalar(%ptr: !llvm.ptr<3>, %data: i64) { xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<3>, i64) llvm.return } + +// ----- +// CHECK-LABEL: llvm.func @local_id.x() -> i32 { +llvm.func @local_id.x() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR1:.*]] = llvm.call spir_funccc @_Z12get_local_idj(%[[VAR0]]) + // CHECK-SAME: {function_type = !llvm.func<i32 (i32)>, linkage = #llvm.linkage<external>, + // CHECK-SAME: memory_effects = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>, + // CHECK-SAME: no_unwind, sym_name = "_Z12get_local_idj", visibility_ = 0 : i64, will_return} : (i32) -> i32 + %1 = xevm.local_id.x : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @local_id.y() -> i32 { +llvm.func @local_id.y() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(1 : i32) : i32 + %1 = xevm.local_id.y : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @local_id.z() -> i32 { +llvm.func @local_id.z() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(2 : i32) : i32 + %1 = xevm.local_id.z : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @local_size.x() -> i32 { +llvm.func @local_size.x() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR1:.*]] = llvm.call spir_funccc @_Z14get_local_sizej(%[[VAR0]]) + // CHECK-SAME: {function_type = !llvm.func<i32 (i32)>, linkage = #llvm.linkage<external>, + // CHECK-SAME: memory_effects = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>, + // CHECK-SAME: no_unwind, sym_name = "_Z14get_local_sizej", visibility_ = 0 : i64, will_return} : (i32) -> i32 + %1 = xevm.local_size.x : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @local_size.y() -> i32 { +llvm.func @local_size.y() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(1 : i32) : i32 + %1 = xevm.local_size.y : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @local_size.z() -> i32 { +llvm.func @local_size.z() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(2 : i32) : i32 + %1 = xevm.local_size.z : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @group_id.x() -> i32 { +llvm.func @group_id.x() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR1:.*]] = llvm.call spir_funccc @_Z12get_group_idj(%[[VAR0]]) + // CHECK-SAME: {function_type = !llvm.func<i32 (i32)>, linkage = #llvm.linkage<external>, + // CHECK-SAME: memory_effects = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>, + // CHECK-SAME: no_unwind, sym_name = "_Z12get_group_idj", visibility_ = 0 : i64, will_return} : (i32) -> i32 + %1 = xevm.group_id.x : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @group_id.y() -> i32 { +llvm.func @group_id.y() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(1 : i32) : i32 + %1 = xevm.group_id.y : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @group_id.z() -> i32 { +llvm.func @group_id.z() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(2 : i32) : i32 + %1 = xevm.group_id.z : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @group_count.x() -> i32 { +llvm.func @group_count.x() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR1:.*]] = llvm.call spir_funccc @_Z14get_num_groupsj(%[[VAR0]]) + // CHECK-SAME: {function_type = !llvm.func<i32 (i32)>, linkage = #llvm.linkage<external>, + // CHECK-SAME: memory_effects = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>, + // CHECK-SAME: no_unwind, sym_name = "_Z14get_num_groupsj", visibility_ = 0 : i64, will_return} : (i32) -> i32 + %1 = xevm.group_count.x : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @group_count.y() -> i32 { +llvm.func @group_count.y() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(1 : i32) : i32 + %1 = xevm.group_count.y : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func @group_count.z() -> i32 { +llvm.func @group_count.z() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(2 : i32) : i32 + %1 = xevm.group_count.z : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z22get_sub_group_local_id() -> i32 attributes {no_unwind, will_return} +llvm.func @lane_id() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z22get_sub_group_local_id() + // CHECK-SAME: {function_type = !llvm.func<i32 ()>, linkage = #llvm.linkage<external>, + // CHECK-SAME: memory_effects = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>, + // CHECK-SAME: no_unwind, sym_name = "_Z22get_sub_group_local_id", visibility_ = 0 : i64, will_return} : () -> i32 + %1 = xevm.lane_id : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z18get_sub_group_size() -> i32 attributes {no_unwind, will_return} +llvm.func @subgroup_size() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z18get_sub_group_size() + // CHECK-SAME: {function_type = !llvm.func<i32 ()>, linkage = #llvm.linkage<external>, + // CHECK-SAME: memory_effects = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>, + // CHECK-SAME: no_unwind, sym_name = "_Z18get_sub_group_size", visibility_ = 0 : i64, will_return} : () -> i32 + %1 = xevm.subgroup_size : i32 + llvm.return %1 : i32 +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z16get_sub_group_id() -> i32 attributes {no_unwind, will_return} +llvm.func @subgroup_id() -> i32 { + // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z16get_sub_group_id() + // CHECK-SAME: {function_type = !llvm.func<i32 ()>, linkage = #llvm.linkage<external>, + // CHECK-SAME: memory_effects = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>, + // CHECK-SAME: no_unwind, sym_name = "_Z16get_sub_group_id", visibility_ = 0 : i64, will_return} : () -> i32 + %1 = xevm.subgroup_id : i32 + llvm.return %1 : i32 +} diff --git a/mlir/test/Dialect/AMDGPU/inlining.mlir b/mlir/test/Dialect/AMDGPU/inlining.mlir new file mode 100644 index 0000000..33fb635 --- /dev/null +++ b/mlir/test/Dialect/AMDGPU/inlining.mlir @@ -0,0 +1,14 @@ +// RUN: mlir-opt %s --inline | FileCheck %s + +func.func @calee(%arg0 : f32) -> f32 { + %0 = amdgpu.permlane_swap %arg0 32 : f32 + func.return %0 : f32 +} + +// CHECK-LABEL: func @caller +func.func @caller(%arg0 : f32) -> f32 { + // CHECK-NOT: call + // CHECK: amdgpu.permlane_swap + %0 = call @calee(%arg0) : (f32) -> f32 + func.return %0 : f32 +} diff --git a/mlir/test/Dialect/Math/canonicalize_ipowi.mlir b/mlir/test/Dialect/Math/canonicalize_ipowi.mlir index 9e65a96..6812092 100644 --- a/mlir/test/Dialect/Math/canonicalize_ipowi.mlir +++ b/mlir/test/Dialect/Math/canonicalize_ipowi.mlir @@ -105,9 +105,9 @@ func.func @ipowi32_fold(%result : memref<?xi32>) { // --- Test vector folding --- %arg11_base = arith.constant 2 : i32 - %arg11_base_vec = vector.splat %arg11_base : vector<2x2xi32> + %arg11_base_vec = vector.broadcast %arg11_base : i32 to vector<2x2xi32> %arg11_power = arith.constant 30 : i32 - %arg11_power_vec = vector.splat %arg11_power : vector<2x2xi32> + %arg11_power_vec = vector.broadcast %arg11_power : i32 to vector<2x2xi32> %res11_vec = math.ipowi %arg11_base_vec, %arg11_power_vec : vector<2x2xi32> %i11 = arith.constant 11 : index %res11 = vector.extract %res11_vec[1, 1] : i32 from vector<2x2xi32> diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index eb369c0..59774f9 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -837,7 +837,7 @@ func.func @fold_extract_splatlike(%a : f32, %idx0 : index, %idx1 : index, %idx2 // CHECK-LABEL: fold_extract_vector_from_splat // CHECK: vector.broadcast {{.*}} f32 to vector<4xf32> func.func @fold_extract_vector_from_splat(%a : f32, %idx0 : index, %idx1 : index) -> vector<4xf32> { - %b = vector.splat %a : vector<1x2x4xf32> + %b = vector.broadcast %a : f32 to vector<1x2x4xf32> %r = vector.extract %b[%idx0, %idx1] : vector<4xf32> from vector<1x2x4xf32> return %r : vector<4xf32> } diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-splat.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-splat.mlir deleted file mode 100644 index e4a9391..0000000 --- a/mlir/test/Dialect/Vector/canonicalize/vector-splat.mlir +++ /dev/null @@ -1,126 +0,0 @@ -// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file -allow-unregistered-dialect | FileCheck %s - -// This file should be removed when vector.splat is removed. -// This file tests canonicalization/folding with vector.splat. -// These tests all have equivalent tests using vector.broadcast in canonicalize.mlir - - -// CHECK-LABEL: fold_extract_splat -// CHECK-SAME: %[[A:.*]]: f32 -// CHECK: return %[[A]] : f32 -func.func @fold_extract_splat(%a : f32, %idx0 : index, %idx1 : index, %idx2 : index) -> f32 { - %b = vector.splat %a : vector<1x2x4xf32> - %r = vector.extract %b[%idx0, %idx1, %idx2] : f32 from vector<1x2x4xf32> - return %r : f32 -} - -// ----- - -// CHECK-LABEL: extract_strided_splat -// CHECK: %[[B:.*]] = vector.broadcast %{{.*}} f16 to vector<2x4xf16> -// CHECK-NEXT: return %[[B]] : vector<2x4xf16> -func.func @extract_strided_splat(%arg0: f16) -> vector<2x4xf16> { - %0 = vector.splat %arg0 : vector<16x4xf16> - %1 = vector.extract_strided_slice %0 - {offsets = [1, 0], sizes = [2, 4], strides = [1, 1]} : - vector<16x4xf16> to vector<2x4xf16> - return %1 : vector<2x4xf16> -} - -// ----- - -// CHECK-LABEL: func @splat_fold -// CHECK-NEXT: [[V:%.*]] = arith.constant dense<1.000000e+00> : vector<4xf32> -// CHECK-NEXT: return [[V]] : vector<4xf32> -func.func @splat_fold() -> vector<4xf32> { - %c = arith.constant 1.0 : f32 - %v = vector.splat %c : vector<4xf32> - return %v : vector<4xf32> - -} - -// ----- - -// CHECK-LABEL: func @transpose_splat2( -// CHECK-SAME: %[[VAL_0:.*]]: f32) -> vector<3x4xf32> { -// CHECK: %[[VAL_1:.*]] = vector.broadcast %[[VAL_0]] : f32 to vector<3x4xf32> -// CHECK: return %[[VAL_1]] : vector<3x4xf32> -func.func @transpose_splat2(%arg : f32) -> vector<3x4xf32> { - %splat = vector.splat %arg : vector<4x3xf32> - %0 = vector.transpose %splat, [1, 0] : vector<4x3xf32> to vector<3x4xf32> - return %0 : vector<3x4xf32> -} - -// ----- - -// CHECK-LABEL: @insert_strided_slice_splat -// CHECK-SAME: (%[[ARG:.*]]: f32) -// CHECK-NEXT: %[[SPLAT:.*]] = vector.broadcast %[[ARG]] : f32 to vector<8x16xf32> -// CHECK-NEXT: return %[[SPLAT]] : vector<8x16xf32> -func.func @insert_strided_slice_splat(%x: f32) -> (vector<8x16xf32>) { - %splat0 = vector.splat %x : vector<4x4xf32> - %splat1 = vector.splat %x : vector<8x16xf32> - %0 = vector.insert_strided_slice %splat0, %splat1 {offsets = [2, 2], strides = [1, 1]} - : vector<4x4xf32> into vector<8x16xf32> - return %0 : vector<8x16xf32> -} - -// ----- - -// CHECK-LABEL: func @shuffle_splat -// CHECK-SAME: (%[[ARG:.*]]: i32) -// CHECK-NEXT: %[[SPLAT:.*]] = vector.broadcast %[[ARG]] : i32 to vector<4xi32> -// CHECK-NEXT: return %[[SPLAT]] : vector<4xi32> -func.func @shuffle_splat(%x : i32) -> vector<4xi32> { - %v0 = vector.splat %x : vector<4xi32> - %v1 = vector.splat %x : vector<2xi32> - %shuffle = vector.shuffle %v0, %v1 [2, 3, 4, 5] : vector<4xi32>, vector<2xi32> - return %shuffle : vector<4xi32> -} - - -// ----- - -// CHECK-LABEL: func @insert_splat -// CHECK-SAME: (%[[ARG:.*]]: i32) -// CHECK-NEXT: %[[SPLAT:.*]] = vector.broadcast %[[ARG]] : i32 to vector<2x4x3xi32> -// CHECK-NEXT: return %[[SPLAT]] : vector<2x4x3xi32> -func.func @insert_splat(%x : i32) -> vector<2x4x3xi32> { - %v0 = vector.splat %x : vector<4x3xi32> - %v1 = vector.splat %x : vector<2x4x3xi32> - %insert = vector.insert %v0, %v1[0] : vector<4x3xi32> into vector<2x4x3xi32> - return %insert : vector<2x4x3xi32> -} - -// ----- - -// CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression -// CHECK-SAME: (%[[A:.*]]: f32, %[[C:.*]]: vector<2xf32>) -func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %c: vector<2xf32>) -> (f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) { - // Splat scalar to 0D and extract scalar. - %0 = vector.splat %a : vector<f32> - %1 = vector.extract %0[] : f32 from vector<f32> - - // Broadcast scalar to 0D and extract scalar. - %2 = vector.splat %a : vector<f32> - %3 = vector.extract %2[] : f32 from vector<f32> - - // Splat scalar to 2D and extract scalar. - %6 = vector.splat %a : vector<2x3xf32> - %7 = vector.extract %6[0, 1] : f32 from vector<2x3xf32> - - // Broadcast scalar to 3D and extract scalar. - %8 = vector.splat %a : vector<5x6x7xf32> - %9 = vector.extract %8[2, 1, 5] : f32 from vector<5x6x7xf32> - - // Extract 2D from 3D that was broadcasted from a scalar. - // CHECK: %[[EXTRACT2:.*]] = vector.broadcast %[[A]] : f32 to vector<6x7xf32> - %10 = vector.extract %8[2] : vector<6x7xf32> from vector<5x6x7xf32> - - // Extract 1D from 2D that was splat'ed from a scalar. - // CHECK: %[[EXTRACT3:.*]] = vector.broadcast %[[A]] : f32 to vector<3xf32> - %11 = vector.extract %6[1] : vector<3xf32> from vector<2x3xf32> - - // CHECK: return %[[A]], %[[A]], %[[A]], %[[A]], %[[EXTRACT2]], %[[EXTRACT3]] - return %1, %3, %7, %9, %10, %11 : f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32> -} diff --git a/mlir/test/Dialect/Vector/int-range-interface.mlir b/mlir/test/Dialect/Vector/int-range-interface.mlir index b2f16bb..4da8d8a 100644 --- a/mlir/test/Dialect/Vector/int-range-interface.mlir +++ b/mlir/test/Dialect/Vector/int-range-interface.mlir @@ -28,7 +28,7 @@ func.func @float_constant_splat() -> vector<8xf32> { // CHECK: test.reflect_bounds {smax = 5 : index, smin = 4 : index, umax = 5 : index, umin = 4 : index} func.func @vector_splat() -> vector<4xindex> { %0 = test.with_bounds { umin = 4 : index, umax = 5 : index, smin = 4 : index, smax = 5 : index } : index - %1 = vector.splat %0 : vector<4xindex> + %1 = vector.broadcast %0 : index to vector<4xindex> %2 = test.reflect_bounds %1 : vector<4xindex> func.return %2 : vector<4xindex> } diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 6ee70fd..5f035e3 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -320,7 +320,7 @@ func.func @test_vector.transfer_write(%m: memref<1xi32>, %2: vector<1x32xi32>) func.func @test_vector.transfer_read(%arg0: vector<4x3xf32>) { %c3 = arith.constant 3 : index %f0 = arith.constant 0.0 : f32 - %vf0 = vector.splat %f0 : vector<4x3xf32> + %vf0 = vector.broadcast %f0 : f32 to vector<4x3xf32> // expected-error@+1 {{ requires memref or ranked tensor type}} %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 : vector<4x3xf32>, vector<1x1x2x3xf32> } @@ -330,7 +330,7 @@ func.func @test_vector.transfer_read(%arg0: vector<4x3xf32>) { func.func @test_vector.transfer_read(%arg0: memref<4x3xf32>) { %c3 = arith.constant 3 : index %f0 = arith.constant 0.0 : f32 - %vf0 = vector.splat %f0 : vector<4x3xf32> + %vf0 = vector.broadcast %f0 : f32 to vector<4x3xf32> // expected-error@+1 {{ requires vector type}} %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 : memref<4x3xf32>, f32 } @@ -414,7 +414,7 @@ func.func @test_vector.transfer_read(%arg0: memref<?x?x?xf32>) { %c3 = arith.constant 3 : index %cst = arith.constant 3.0 : f32 // expected-note@+1 {{prior use here}} - %mask = vector.splat %c1 : vector<3x8x7xi1> + %mask = vector.broadcast %c1 : i1 to vector<3x8x7xi1> // expected-error@+1 {{expects different type than prior uses: 'vector<3x7xi1>' vs 'vector<3x8x7xi1>'}} %0 = vector.transfer_read %arg0[%c3, %c3, %c3], %cst, %mask {permutation_map = affine_map<(d0, d1, d2)->(d0, 0, d2)>} : memref<?x?x?xf32>, vector<3x8x7xf32> } @@ -424,7 +424,7 @@ func.func @test_vector.transfer_read(%arg0: memref<?x?x?xf32>) { func.func @test_vector.transfer_read(%arg0: memref<?x?xvector<4x3xf32>>) { %c3 = arith.constant 3 : index %f0 = arith.constant 0.0 : f32 - %vf0 = vector.splat %f0 : vector<4x3xf32> + %vf0 = vector.broadcast %f0 : f32 to vector<4x3xf32> // expected-error@+1 {{requires source vector element and vector result ranks to match}} %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref<?x?xvector<4x3xf32>>, vector<3xf32> } @@ -434,7 +434,7 @@ func.func @test_vector.transfer_read(%arg0: memref<?x?xvector<4x3xf32>>) { func.func @test_vector.transfer_read(%arg0: memref<?x?xvector<6xf32>>) { %c3 = arith.constant 3 : index %f0 = arith.constant 0.0 : f32 - %vf0 = vector.splat %f0 : vector<6xf32> + %vf0 = vector.broadcast %f0 : f32 to vector<6xf32> // expected-error@+1 {{requires the bitwidth of the minor 1-D vector to be an integral multiple of the bitwidth of the minor 1-D vector of the source}} %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 : memref<?x?xvector<6xf32>>, vector<3xf32> } @@ -444,7 +444,7 @@ func.func @test_vector.transfer_read(%arg0: memref<?x?xvector<6xf32>>) { func.func @test_vector.transfer_read(%arg0: memref<?x?xvector<2x3xf32>>) { %c3 = arith.constant 3 : index %f0 = arith.constant 0.0 : f32 - %vf0 = vector.splat %f0 : vector<2x3xf32> + %vf0 = vector.broadcast %f0 : f32 to vector<2x3xf32> // expected-error@+1 {{ expects the in_bounds attr of same rank as permutation_map results: affine_map<(d0, d1) -> (d0, d1)>}} %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 {in_bounds = [true], permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref<?x?xvector<2x3xf32>>, vector<1x1x2x3xf32> } @@ -454,8 +454,8 @@ func.func @test_vector.transfer_read(%arg0: memref<?x?xvector<2x3xf32>>) { func.func @test_vector.transfer_read(%arg0: memref<?x?xvector<2x3xf32>>) { %c3 = arith.constant 3 : index %f0 = arith.constant 0.0 : f32 - %vf0 = vector.splat %f0 : vector<2x3xf32> - %mask = vector.splat %c1 : vector<2x3xi1> + %vf0 = vector.broadcast %f0 : f32 to vector<2x3xf32> + %mask = vector.broadcast %c1 : f32 to vector<2x3xi1> // expected-error@+1 {{does not support masks with vector element type}} %0 = vector.transfer_read %arg0[%c3, %c3], %vf0, %mask {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref<?x?xvector<2x3xf32>>, vector<1x1x2x3xf32> } @@ -492,7 +492,7 @@ func.func @test_vector.transfer_write(%arg0: memref<?x?xf32>) { func.func @test_vector.transfer_write(%arg0: memref<vector<4x3xf32>>) { %c3 = arith.constant 3 : index %f0 = arith.constant 0.0 : f32 - %vf0 = vector.splat %f0 : vector<4x3xf32> + %vf0 = vector.broadcast %f0 : f32 to vector<4x3xf32> // expected-error@+1 {{ requires vector type}} vector.transfer_write %arg0, %arg0[%c3, %c3] : memref<vector<4x3xf32>>, vector<4x3xf32> } @@ -502,7 +502,7 @@ func.func @test_vector.transfer_write(%arg0: memref<vector<4x3xf32>>) { func.func @test_vector.transfer_write(%arg0: vector<4x3xf32>) { %c3 = arith.constant 3 : index %f0 = arith.constant 0.0 : f32 - %vf0 = vector.splat %f0 : vector<4x3xf32> + %vf0 = vector.broadcast %f0 : f32 to vector<4x3xf32> // expected-error@+1 {{ requires memref or ranked tensor type}} vector.transfer_write %arg0, %arg0[%c3, %c3] : vector<4x3xf32>, f32 } @@ -1981,29 +1981,6 @@ func.func @invalid_step_2d() { // ----- //===----------------------------------------------------------------------===// -// vector.splat -//===----------------------------------------------------------------------===// - -// ----- - -func.func @vector_splat_invalid_result(%v : f32) { - // expected-error@+1 {{invalid kind of type specified: expected builtin.vector, but found 'memref<8xf32>'}} - vector.splat %v : memref<8xf32> - return -} - -// ----- - -// expected-note @+1 {{prior use here}} -func.func @vector_splat_type_mismatch(%a: f32) { - // expected-error @+1 {{expects different type than prior uses: 'i32' vs 'f32'}} - %0 = vector.splat %a : vector<1xi32> - return -} - -// ----- - -//===----------------------------------------------------------------------===// // vector.load //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir index fe697c8..ee5cfbcd 100644 --- a/mlir/test/Dialect/Vector/linearize.mlir +++ b/mlir/test/Dialect/Vector/linearize.mlir @@ -428,33 +428,6 @@ func.func @test_linearize_across_for(%arg0 : vector<4xi8>) -> vector<4xi8> { // ----- -// CHECK-LABEL: linearize_vector_splat -// CHECK-SAME: (%[[ARG:.*]]: i32) -> vector<4x2xi32> -func.func @linearize_vector_splat(%arg0: i32) -> vector<4x2xi32> { - - // CHECK: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<8xi32> - // CHECK: %[[CAST:.*]] = vector.shape_cast %[[SPLAT]] : vector<8xi32> to vector<4x2xi32> - // CHECK: return %[[CAST]] : vector<4x2xi32> - %0 = vector.splat %arg0 : vector<4x2xi32> - return %0 : vector<4x2xi32> -} - -// ----- - -// CHECK-LABEL: linearize_scalable_vector_splat -// CHECK-SAME: (%[[ARG:.*]]: i32) -> vector<4x[2]xi32> -func.func @linearize_scalable_vector_splat(%arg0: i32) -> vector<4x[2]xi32> { - - // CHECK: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<[8]xi32> - // CHECK: %[[CAST:.*]] = vector.shape_cast %[[SPLAT]] : vector<[8]xi32> to vector<4x[2]xi32> - // CHECK: return %[[CAST]] : vector<4x[2]xi32> - %0 = vector.splat %arg0 : vector<4x[2]xi32> - return %0 : vector<4x[2]xi32> - -} - -// ----- - // CHECK-LABEL: linearize_create_mask // CHECK-SAME: (%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) -> vector<1x16xi1> func.func @linearize_create_mask(%arg0 : index, %arg1 : index) -> vector<1x16xi1> { diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 550e52a..da9a1a8 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -45,11 +45,11 @@ func.func @vector_transfer_ops(%arg0: memref<?x?xf32>, %i0 = arith.constant 0 : index %i1 = arith.constant 1 : i1 - %vf0 = vector.splat %f0 : vector<4x3xf32> - %v0 = vector.splat %c0 : vector<4x3xi32> - %vi0 = vector.splat %i0 : vector<4x3xindex> + %vf0 = vector.broadcast %f0 : f32 to vector<4x3xf32> + %v0 = vector.broadcast %c0 : i32 to vector<4x3xi32> + %vi0 = vector.broadcast %i0 : index to vector<4x3xindex> %m = arith.constant dense<[0, 0, 1, 0, 1]> : vector<5xi1> - %m2 = vector.splat %i1 : vector<4x5xi1> + %m2 = vector.broadcast %i1 : i1 to vector<4x5xi1> // // CHECK: vector.transfer_read %0 = vector.transfer_read %arg0[%c3, %c3], %f0 {permutation_map = affine_map<(d0, d1)->(d0)>} : memref<?x?xf32>, vector<128xf32> @@ -106,9 +106,9 @@ func.func @vector_transfer_ops_tensor(%arg0: tensor<?x?xf32>, %c0 = arith.constant 0 : i32 %i0 = arith.constant 0 : index - %vf0 = vector.splat %f0 : vector<4x3xf32> - %v0 = vector.splat %c0 : vector<4x3xi32> - %vi0 = vector.splat %i0 : vector<4x3xindex> + %vf0 = vector.broadcast %f0 : f32 to vector<4x3xf32> + %v0 = vector.broadcast %c0 : i32 to vector<4x3xi32> + %vi0 = vector.broadcast %i0 : index to vector<4x3xindex> // // CHECK: vector.transfer_read @@ -922,28 +922,6 @@ func.func @vector_scan(%0: vector<4x8x16x32xf32>) -> vector<4x8x16x32xf32> { return %2#0 : vector<4x8x16x32xf32> } -// CHECK-LABEL: func @test_splat_op -// CHECK-SAME: %[[s:.*]]: f32, %[[s2:.*]]: !llvm.ptr<1> -func.func @test_splat_op(%s : f32, %s2 : !llvm.ptr<1>) { - // CHECK: vector.splat %[[s]] : vector<8xf32> - %v = vector.splat %s : vector<8xf32> - - // CHECK: vector.splat %[[s]] : vector<4xf32> - %u = "vector.splat"(%s) : (f32) -> vector<4xf32> - - // CHECK: vector.splat %[[s2]] : vector<16x!llvm.ptr<1>> - %w = vector.splat %s2 : vector<16x!llvm.ptr<1>> - return -} - -// CHECK-LABEL: func @vector_splat_0d( -func.func @vector_splat_0d(%a: f32) -> vector<f32> { - // CHECK: vector.splat %{{.*}} : vector<f32> - %0 = vector.splat %a : vector<f32> - return %0 : vector<f32> -} - - // CHECK-LABEL: func @vector_mask func.func @vector_mask(%a: vector<8xi32>, %m0: vector<8xi1>) -> i32 { // CHECK-NEXT: %{{.*}} = vector.mask %{{.*}} { vector.reduction <add>, %{{.*}} : vector<8xi32> into i32 } : vector<8xi1> -> i32 diff --git a/mlir/test/Dialect/Vector/vector-emulate-masked-load-store.mlir b/mlir/test/Dialect/Vector/vector-emulate-masked-load-store.mlir index e74eb08..6e5d68c 100644 --- a/mlir/test/Dialect/Vector/vector-emulate-masked-load-store.mlir +++ b/mlir/test/Dialect/Vector/vector-emulate-masked-load-store.mlir @@ -49,7 +49,7 @@ func.func @vector_maskedload(%arg0 : memref<4x5xf32>) -> vector<4xf32> { %idx_4 = arith.constant 4 : index %mask = vector.create_mask %idx_1 : vector<4xi1> %s = arith.constant 0.0 : f32 - %pass_thru = vector.splat %s : vector<4xf32> + %pass_thru = vector.broadcast %s : f32 to vector<4xf32> %0 = vector.maskedload %arg0[%idx_0, %idx_4], %mask, %pass_thru : memref<4x5xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32> return %0: vector<4xf32> } @@ -65,7 +65,7 @@ func.func @vector_maskedload_with_alignment(%arg0 : memref<4x5xf32>) -> vector<4 %idx_4 = arith.constant 4 : index %mask = vector.create_mask %idx_1 : vector<4xi1> %s = arith.constant 0.0 : f32 - %pass_thru = vector.splat %s : vector<4xf32> + %pass_thru = vector.broadcast %s : f32 to vector<4xf32> %0 = vector.maskedload %arg0[%idx_0, %idx_4], %mask, %pass_thru {alignment = 8}: memref<4x5xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32> return %0: vector<4xf32> } diff --git a/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir b/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir new file mode 100644 index 0000000..d289d73 --- /dev/null +++ b/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir @@ -0,0 +1,63 @@ +// RUN: mlir-opt -test-xegpu-move-func-to-warp-op -split-input-file --allow-unregistered-dialect %s | FileCheck %s + +gpu.module @test { +gpu.func @empty() { + gpu.return +} +} + +// CHECK-LABEL: gpu.func @empty() { +// CHECK-NEXT: gpu.return +// CHECK-NEXT: } + +// ----- +gpu.module @test { +gpu.func @gemm(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<8x16xf32>) { + %c0 = arith.constant 0 : index + %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> + %1 = xegpu.create_nd_tdesc %arg1 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> + %2 = xegpu.load_nd %0[%c0, %c0] : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16> + %3 = xegpu.load_nd %1[%c0, %c0] : !xegpu.tensor_desc<16x16xf16> -> vector<16x16xf16> + %4 = xegpu.dpas %2, %3 : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32> + %5 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32> + xegpu.store_nd %4, %5[%c0, %c0] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32> + gpu.return +} +} + +// CHECK-LABEL: gpu.func @gemm( +// CHECK: %[[ARG0:[a-zA-Z0-9]+]]: memref<8x16xf16>, %[[ARG1:[a-zA-Z0-9]+]]: memref<16x16xf16>, +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: memref<8x16xf32>) { +// CHECK: %[[LANEID:.*]] = gpu.lane_id +// CHECK-NEXT: gpu.warp_execute_on_lane_0(%[[LANEID]])[16] +// CHECK-SAME: args(%[[ARG0]], %[[ARG1]], %[[ARG2]] : memref<8x16xf16>, memref<16x16xf16>, memref<8x16xf32>) { +// CHECK: ^bb0(%[[ARG3:[a-zA-Z0-9]+]]: memref<8x16xf16>, %[[ARG4:[a-zA-Z0-9]+]]: memref<16x16xf16>, +// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: memref<8x16xf32>): +// CHECK-NEXT: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> +// CHECK-NEXT: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG4]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> +// CHECK-NEXT: %[[T3:.*]] = xegpu.load_nd %[[T1]][{{.*}}] : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16> +// CHECK-NEXT: %[[T4:.*]] = xegpu.load_nd %[[T2]][{{.*}}] : !xegpu.tensor_desc<16x16xf16> -> vector<16x16xf16> +// CHECK-NEXT: %[[T5:.*]] = xegpu.dpas %[[T3]], %[[T4]] : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32> +// CHECK-NEXT: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG5]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32> +// CHECK-NEXT: xegpu.store_nd %[[T5]], %[[T6]][%{{.*}}] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32> +// CHECK-NEXT: } +// CHECK-NEXT: gpu.return + +// ----- +gpu.module @test { +gpu.func @already_in_warp_op() { + %laneid = gpu.lane_id + gpu.warp_execute_on_lane_0(%laneid)[16] { + "some_op"() : () -> () + gpu.yield + } + gpu.return +} +} + +// CHECK-LABEL: gpu.func @already_in_warp_op() { +// CHECK: %[[LANEID:.*]] = gpu.lane_id +// CHECK: gpu.warp_execute_on_lane_0(%[[LANEID]])[16] { +// CHECK: "some_op"() : () -> () +// CHECK: } +// CHECK: gpu.return diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir index 40b66d1..f233dff 100644 --- a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir +++ b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir @@ -530,7 +530,7 @@ gpu.module @xevm_module{ // CHECK-NEXT: } // CHECK-NEXT: %[[T1:.*]] = vector.transpose %[[W]]#1, [1, 0] : vector<1x2xf32> to vector<2x1xf32> gpu.module @xevm_module{ - gpu.func @vector_transpose(%arg0: memref<2x16xf32>, %laneid: index) { + gpu.func @vector_transpose(%laneid: index) { %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2x1xf32>) { %cst = "some_op"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} @@ -556,7 +556,7 @@ gpu.module @xevm_module{ // CHECK: } // CHECK: vector.bitcast %[[W]]#1 : vector<4x2xi8> to vector<4x1xi16> gpu.module @xevm_module{ - gpu.func @vector_bitcast(%arg0: memref<4x16xi16>, %laneid: index) { + gpu.func @vector_bitcast(%laneid: index) { %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<4x1xi16>) { %cst = "some_op"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>} @@ -573,3 +573,82 @@ gpu.module @xevm_module{ gpu.return } } + +// ----- +// CHECK-LABEL: gpu.func @vector_shapecast_rank_increasing +// CHECK: %{{.*}}:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<1x1xf32>, vector<1xf32>) { +// CHECK: gpu.yield %{{.*}} : vector<1x16xf32>, vector<16xf32> +// CHECK: } +// CHECK: %{{.*}} = vector.shape_cast %{{.*}}#1 : vector<1xf32> to vector<1x1xf32> +gpu.module @xevm_module { + gpu.func @vector_shapecast_rank_increasing(%laneid: index) { + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<1x1xf32>) { + %cst = "some_op"() + {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>} + : () -> (vector<16xf32>) + %cast = vector.shape_cast %cst + { + layout_operand_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>, + layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]> + } + : vector<16xf32> to vector<1x16xf32> + gpu.yield %cast : vector<1x16xf32> + } + "some_user_op"(%r) : (vector<1x1xf32>) -> () + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @vector_shapecast_rank_reducing( +// CHECK: %{{.*}}:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<1xf32>, vector<1x1xf32>) { +// CHECK: gpu.yield %{{.*}} : vector<16xf32>, vector<1x16xf32> +// CHECK: } +// CHECK: %{{.*}} = vector.shape_cast %{{.*}}#1 : vector<1x1xf32> to vector<1xf32> +gpu.module @xevm_module { + gpu.func @vector_shapecast_rank_reducing(%laneid: index) { + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<1xf32>) { + %cst = "some_op"() + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : () -> (vector<1x16xf32>) + %cast = vector.shape_cast %cst + { + layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, + layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]> + } + : vector<1x16xf32> to vector<16xf32> + gpu.yield %cast : vector<16xf32> + } + "some_user_op"(%r) : (vector<1xf32>) -> () + gpu.return + } +} + +// ----- +// NOTE: Layouts are still valid, but distribution still requires a slice layout for the operand. +// +// CHECK-LABEL: gpu.func @vector_shapecast_unsupported +// CHECK: %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<1x1xf32>) { +// CHECK: %[[T1:.*]] = vector.shape_cast %{{.*}} : vector<16xf32> to vector<1x16xf32> +// CHECK: gpu.yield %[[T1]] : vector<1x16xf32> +// CHECK: } +// CHECK: "some_user_op"(%[[W]]) : (vector<1x1xf32>) -> () +// CHECK: gpu.return +gpu.module @xevm_module { + gpu.func @vector_shapecast_unsupported(%laneid: index) { + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<1x1xf32>) { + %cst = "some_op"() + {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]> } + : () -> (vector<16xf32>) + %cast = vector.shape_cast %cst + { + layout_operand_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>, + layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]> + } + : vector<16xf32> to vector<1x16xf32> + gpu.yield %cast : vector<1x16xf32> + } + "some_user_op"(%r) : (vector<1x1xf32>) -> () + gpu.return + } +} diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 12a911c..0c5fec8c 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -107,7 +107,7 @@ func.func @return_not_in_function() { // ----- func.func @invalid_splat(%v : f32) { // expected-note {{prior use here}} - vector.splat %v : vector<8xf64> + vector.broadcast %v : f64 to vector<8xf64> // expected-error@-1 {{expects different type than prior uses}} return } diff --git a/mlir/test/Integration/Dialect/Vector/CPU/0-d-vectors.mlir b/mlir/test/Integration/Dialect/Vector/CPU/0-d-vectors.mlir index 6ec1031..6c2183f 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/0-d-vectors.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/0-d-vectors.mlir @@ -21,13 +21,6 @@ func.func @print_vector_0d(%a: vector<f32>) { return } -func.func @splat_0d(%a: f32) { - %1 = vector.splat %a : vector<f32> - // CHECK: ( 42 ) - vector.print %1: vector<f32> - return -} - func.func @broadcast_0d(%a: f32) { %1 = vector.broadcast %a : f32 to vector<f32> // CHECK: ( 42 ) @@ -147,7 +140,6 @@ func.func @entry() { // Warning: these must be called in their textual order of definition in the // file to not mess up FileCheck. - call @splat_0d(%4) : (f32) -> () call @broadcast_0d(%4) : (f32) -> () call @bitcast_0d() : () -> () call @constant_mask_0d() : () -> () diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp index 6ba7a00..76d4611 100644 --- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp +++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp @@ -247,6 +247,36 @@ struct TestXeGPUSGDistribute } }; +struct TestXeGPUMoveFuncBodyToWarpOp + : public PassWrapper<TestXeGPUMoveFuncBodyToWarpOp, + OperationPass<gpu::GPUModuleOp>> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestXeGPUMoveFuncBodyToWarpOp) + + StringRef getArgument() const final { + return "test-xegpu-move-func-to-warp-op"; + } + + StringRef getDescription() const final { + return "Test the implementation of XeGPU move gpu function body to " + "WarpExecuteOnLane0 op."; + } + + void getDependentDialects(::mlir::DialectRegistry ®istry) const override { + registry.insert<xegpu::XeGPUDialect>(); + registry.insert<gpu::GPUDialect>(); + } + + TestXeGPUMoveFuncBodyToWarpOp() = default; + TestXeGPUMoveFuncBodyToWarpOp(const TestXeGPUMoveFuncBodyToWarpOp &pass) = + default; + + void runOnOperation() override { + RewritePatternSet patterns(&getContext()); + xegpu::populateXeGPUMoveFuncBodyToWarpOpPatterns(patterns); + (void)applyPatternsGreedily(getOperation(), std::move(patterns)); + } +}; + struct TestXeGPULayoutInterface : public PassWrapper<TestXeGPULayoutInterface, OperationPass<gpu::GPUModuleOp>> { @@ -312,6 +342,7 @@ void registerTestXeGPULowerings() { PassRegistration<TestXeGPUUnrollingPatterns>(); PassRegistration<TestXeGPULayoutInterface>(); PassRegistration<TestXeGPUSGDistribute>(); + PassRegistration<TestXeGPUMoveFuncBodyToWarpOp>(); } } // namespace test } // namespace mlir diff --git a/mlir/test/mlir-runner/utils.mlir b/mlir/test/mlir-runner/utils.mlir index 0c25078..d3fc23b 100644 --- a/mlir/test/mlir-runner/utils.mlir +++ b/mlir/test/mlir-runner/utils.mlir @@ -56,7 +56,7 @@ func.func private @printMemrefF32(memref<*xf32>) attributes { llvm.emit_c_interf func.func @vector_splat_2d() { %c0 = arith.constant 0 : index %f10 = arith.constant 10.0 : f32 - %vf10 = vector.splat %f10: !vector_type_C + %vf10 = vector.broadcast %f10: f32 to !vector_type_C %C = memref.alloc() : !matrix_type_CC memref.store %vf10, %C[%c0, %c0]: !matrix_type_CC diff --git a/mlir/utils/tree-sitter-mlir/queries/highlights.scm b/mlir/utils/tree-sitter-mlir/queries/highlights.scm index 59e280b..ca52bcc 100644 --- a/mlir/utils/tree-sitter-mlir/queries/highlights.scm +++ b/mlir/utils/tree-sitter-mlir/queries/highlights.scm @@ -181,7 +181,6 @@ "vector.insert_strided_slice" "vector.matrix_multiply" "vector.print" - "vector.splat" "vector.transfer_read" "vector.transfer_write" "vector.yield" diff --git a/orc-rt/include/orc-rt/SimpleNativeMemoryMap.h b/orc-rt/include/orc-rt/SimpleNativeMemoryMap.h index 77d3339..6dbc0c0 100644 --- a/orc-rt/include/orc-rt/SimpleNativeMemoryMap.h +++ b/orc-rt/include/orc-rt/SimpleNativeMemoryMap.h @@ -54,18 +54,10 @@ public: struct FinalizeRequest { struct Segment { - enum class ContentType : uint8_t { Uninitialized, ZeroFill, Regular }; - - Segment() = default; - Segment(void *Address, size_t Size, AllocGroup G, ContentType C) - : Address(Address), Size(Size), G(G), C(C) {} - - void *Address = nullptr; + AllocGroup AG; + char *Address = nullptr; size_t Size = 0; - AllocGroup G; - ContentType C = ContentType::Uninitialized; - char *data() { return reinterpret_cast<char *>(Address); } - size_t size() const { return Size; } + span<const char> Content; }; std::vector<Segment> Segments; diff --git a/orc-rt/lib/executor/SimpleNativeMemoryMap.cpp b/orc-rt/lib/executor/SimpleNativeMemoryMap.cpp index 603ef8b..10cdcf5 100644 --- a/orc-rt/lib/executor/SimpleNativeMemoryMap.cpp +++ b/orc-rt/lib/executor/SimpleNativeMemoryMap.cpp @@ -31,36 +31,22 @@ struct SPSSimpleNativeMemoryMapSegment; template <> class SPSSerializationTraits<SPSSimpleNativeMemoryMapSegment, SimpleNativeMemoryMap::FinalizeRequest::Segment> { - using SPSType = SPSTuple<SPSExecutorAddr, uint64_t, SPSAllocGroup, uint8_t>; + using SPSType = + SPSTuple<SPSAllocGroup, SPSExecutorAddr, uint64_t, SPSSequence<char>>; public: static bool deserialize(SPSInputBuffer &IB, SimpleNativeMemoryMap::FinalizeRequest::Segment &S) { - using ContentType = - SimpleNativeMemoryMap::FinalizeRequest::Segment::ContentType; - + AllocGroup AG; ExecutorAddr Address; uint64_t Size; - AllocGroup G; - uint8_t C; - if (!SPSType::AsArgList::deserialize(IB, Address, Size, G, C)) + span<const char> Content; + if (!SPSType::AsArgList::deserialize(IB, AG, Address, Size, Content)) return false; - if (Size >= std::numeric_limits<size_t>::max()) + if (Size > std::numeric_limits<size_t>::max()) return false; - S.Address = Address.toPtr<void *>(); - S.Size = Size; - S.G = G; - S.C = static_cast<ContentType>(C); - switch (S.C) { - case ContentType::Uninitialized: - return true; - case ContentType::ZeroFill: - memset(reinterpret_cast<char *>(S.Address), 0, S.Size); - return true; - case ContentType::Regular: - // Read content directly into target address. - return IB.read(reinterpret_cast<char *>(S.Address), S.Size); - } + S = {AG, Address.toPtr<char *>(), static_cast<size_t>(Size), Content}; + return true; } }; @@ -138,10 +124,31 @@ void SimpleNativeMemoryMap::finalize(OnFinalizeCompleteFn &&OnComplete, // TODO: Record finalize segments for release. // std::vector<std::pair<void*, size_t>> FinalizeSegments; + // Check segment validity before proceeding. for (auto &S : FR.Segments) { - if (auto Err = hostOSMemoryProtect(S.Address, S.Size, S.G.getMemProt())) + + if (S.Content.size() > S.Size) { + return OnComplete(make_error<StringError>( + (std::ostringstream() + << "For segment [" << (void *)S.Address << ".." + << (void *)(S.Address + S.Size) << "), " + << " content size (" << std::hex << S.Content.size() + << ") exceeds segment size (" << S.Size << ")") + .str())); + } + + // Copy any requested content. + if (!S.Content.empty()) + memcpy(S.Address, S.Content.data(), S.Content.size()); + + // Zero-fill the rest of the section. + if (size_t ZeroFillSize = S.Size - S.Content.size()) + memset(S.Address + S.Content.size(), 0, ZeroFillSize); + + if (auto Err = hostOSMemoryProtect(S.Address, S.Size, S.AG.getMemProt())) return OnComplete(std::move(Err)); - switch (S.G.getMemLifetime()) { + + switch (S.AG.getMemLifetime()) { case MemLifetime::Standard: if (!Base || S.Address < Base) Base = S.Address; diff --git a/orc-rt/unittests/SimpleNativeMemoryMapTest.cpp b/orc-rt/unittests/SimpleNativeMemoryMapTest.cpp index ebd9bc3..b7ef7f0 100644 --- a/orc-rt/unittests/SimpleNativeMemoryMapTest.cpp +++ b/orc-rt/unittests/SimpleNativeMemoryMapTest.cpp @@ -31,49 +31,32 @@ struct SPSSimpleNativeMemoryMapSegment; struct TestSNMMSegment : public SimpleNativeMemoryMap::FinalizeRequest::Segment { - enum TestSNMMSegmentContent { Uninitialized, ZeroFill }; - - TestSNMMSegment(void *Address, AllocGroup G, std::string Content) - : SimpleNativeMemoryMap::FinalizeRequest::Segment( - Address, Content.size(), G, ContentType::Regular), - Content(std::move(Content)) {} - - TestSNMMSegment(void *Address, size_t Size, AllocGroup G, - TestSNMMSegmentContent Content) + TestSNMMSegment(AllocGroup AG, char *Address, size_t Size, + std::vector<char> C = {}) : SimpleNativeMemoryMap::FinalizeRequest::Segment( - Address, Size, G, - Content == ZeroFill ? ContentType::ZeroFill - : ContentType::Uninitialized) {} + {AG, Address, Size, {}}), + OwnedContent(std::move(C)) { + this->Content = {OwnedContent.data(), OwnedContent.size()}; + } - std::string Content; + std::vector<char> OwnedContent; }; template <> class SPSSerializationTraits<SPSSimpleNativeMemoryMapSegment, TestSNMMSegment> { - using SPSType = SPSTuple<SPSExecutorAddr, uint64_t, SPSAllocGroup, uint8_t>; + using SPSType = + SPSTuple<SPSAllocGroup, SPSExecutorAddr, uint64_t, SPSSequence<char>>; public: static size_t size(const TestSNMMSegment &S) { - using ContentType = - SimpleNativeMemoryMap::FinalizeRequest::Segment::ContentType; - assert((S.C != ContentType::Regular || S.Size == S.Content.size())); - return SPSType::AsArgList::size(ExecutorAddr::fromPtr(S.Address), - static_cast<uint64_t>(S.Size), S.G, - static_cast<uint8_t>(S.C)) + - (S.C == ContentType::Regular ? S.Size : 0); + return SPSType::AsArgList::size(S.AG, ExecutorAddr::fromPtr(S.Address), + static_cast<uint64_t>(S.Size), S.Content); } static bool serialize(SPSOutputBuffer &OB, const TestSNMMSegment &S) { - using ContentType = - SimpleNativeMemoryMap::FinalizeRequest::Segment::ContentType; - assert((S.C != ContentType::Regular || S.Size == S.Content.size())); - if (!SPSType::AsArgList::serialize(OB, ExecutorAddr::fromPtr(S.Address), - static_cast<uint64_t>(S.Size), S.G, - static_cast<uint8_t>(S.C))) - return false; - if (S.C == ContentType::Regular) - return OB.write(S.Content.data(), S.Content.size()); - return true; + return SPSType::AsArgList::serialize( + OB, S.AG, ExecutorAddr::fromPtr(S.Address), + static_cast<uint64_t>(S.Size), S.Content); } }; @@ -207,30 +190,61 @@ TEST(SimpleNativeMemoryMap, FullPipelineForOneRWSegment) { std::future<Expected<Expected<void *>>> FinalizeKey; TestSNMMFinalizeRequest FR; - void *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. - reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(Addr) + 64 * 1024); - uint64_t SentinelValue = 0; - - FR.Segments.push_back({FinalizeBase, 64 * 1024, - MemProt::Read | MemProt::Write, - TestSNMMSegment::ZeroFill}); + char *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. + reinterpret_cast<char *>(Addr) + 64 * 1024; + uint64_t SentinelValue1 = 0; // Read from pre-filled content + uint64_t SentinelValue2 = 0; // Written in finalize, read back during dealloc. + uint64_t SentinelValue3 = 42; // Read from zero-filled region. + + // Build initial content vector. + std::vector<char> Content; + Content.resize(sizeof(uint64_t) * 2); + memcpy(Content.data(), &SentinelValue3, sizeof(uint64_t)); + memcpy(Content.data() + sizeof(uint64_t), &SentinelValue1, sizeof(uint64_t)); + + FR.Segments.push_back({MemProt::Read | MemProt::Write, FinalizeBase, + 64 * 1024, std::move(Content)}); + + // Read initial content into Sentinel 1. + FR.AAPs.push_back({ + *MakeAllocAction<SPSExecutorAddr, SPSExecutorAddr>::from( + read_value_sps_allocaction, ExecutorAddr::fromPtr(&SentinelValue1), + ExecutorAddr::fromPtr(FinalizeBase)), + {} // No dealloc action. + }); + + // Write value in finalize action, then read back into Sentinel 2. FR.AAPs.push_back( {*MakeAllocAction<SPSExecutorAddr, uint64_t>::from( - write_value_sps_allocaction, ExecutorAddr::fromPtr(FinalizeBase), + write_value_sps_allocaction, + ExecutorAddr::fromPtr(FinalizeBase) + sizeof(uint64_t), uint64_t(42)), *MakeAllocAction<SPSExecutorAddr, SPSExecutorAddr>::from( - read_value_sps_allocaction, ExecutorAddr::fromPtr(&SentinelValue), - ExecutorAddr::fromPtr(FinalizeBase))}); + read_value_sps_allocaction, ExecutorAddr::fromPtr(&SentinelValue2), + ExecutorAddr::fromPtr(FinalizeBase) + sizeof(uint64_t))}); + + // Read first 64 bits of the zero-fill region. + FR.AAPs.push_back({ + *MakeAllocAction<SPSExecutorAddr, SPSExecutorAddr>::from( + read_value_sps_allocaction, ExecutorAddr::fromPtr(&SentinelValue3), + ExecutorAddr::fromPtr(FinalizeBase) + sizeof(uint64_t) * 2), + {} // No dealloc action. + }); + snmm_finalize(waitFor(FinalizeKey), SNMM.get(), std::move(FR)); void *FinalizeKeyAddr = cantFail(cantFail(FinalizeKey.get())); - EXPECT_EQ(SentinelValue, 0U); + EXPECT_EQ(SentinelValue1, 42U); + EXPECT_EQ(SentinelValue2, 0U); + EXPECT_EQ(SentinelValue3, 0U); std::future<Expected<Error>> DeallocResult; snmm_deallocate(waitFor(DeallocResult), SNMM.get(), FinalizeKeyAddr); cantFail(cantFail(DeallocResult.get())); - EXPECT_EQ(SentinelValue, 42); + EXPECT_EQ(SentinelValue1, 42U); + EXPECT_EQ(SentinelValue2, 42U); + EXPECT_EQ(SentinelValue3, 0U); std::future<Expected<Error>> ReleaseResult; snmm_release(waitFor(ReleaseResult), SNMM.get(), Addr); @@ -248,13 +262,13 @@ TEST(SimpleNativeMemoryMap, ReserveFinalizeShutdown) { std::future<Expected<Expected<void *>>> FinalizeKey; TestSNMMFinalizeRequest FR; - void *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. - reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(Addr) + 64 * 1024); + char *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. + reinterpret_cast<char *>(Addr) + 64 * 1024; uint64_t SentinelValue = 0; - FR.Segments.push_back({FinalizeBase, 64 * 1024, - MemProt::Read | MemProt::Write, - TestSNMMSegment::ZeroFill}); + FR.Segments.push_back( + {MemProt::Read | MemProt::Write, FinalizeBase, 64 * 1024}); + FR.AAPs.push_back( {*MakeAllocAction<SPSExecutorAddr, uint64_t>::from( write_value_sps_allocaction, ExecutorAddr::fromPtr(FinalizeBase), @@ -285,13 +299,13 @@ TEST(SimpleNativeMemoryMap, ReserveFinalizeDetachShutdown) { std::future<Expected<Expected<void *>>> FinalizeKey; TestSNMMFinalizeRequest FR; - void *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. - reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(Addr) + 64 * 1024); + char *FinalizeBase = // Finalize addr at non-zero (64kb) offset from base. + reinterpret_cast<char *>(Addr) + 64 * 1024; uint64_t SentinelValue = 0; - FR.Segments.push_back({FinalizeBase, 64 * 1024, - MemProt::Read | MemProt::Write, - TestSNMMSegment::ZeroFill}); + FR.Segments.push_back( + {MemProt::Read | MemProt::Write, FinalizeBase, 64 * 1024}); + FR.AAPs.push_back( {*MakeAllocAction<SPSExecutorAddr, uint64_t>::from( write_value_sps_allocaction, ExecutorAddr::fromPtr(FinalizeBase), diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 6c7679b..74ba077 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1657,6 +1657,7 @@ cc_library( ":GPUDialect", ":IR", ":InferTypeOpInterface", + ":InliningUtils", ":MemRefUtils", ":ROCDLDialect", ":SideEffectInterfaces", @@ -3693,6 +3694,7 @@ cc_library( ":XeGPUAttrInterfaceIncGen", ":XeGPUEnumsIncGen", ":XeGPUIncGen", + ":XeGPUuArch", ":XeVMDialect", "//llvm:Support", ], @@ -3768,6 +3770,16 @@ cc_library( ], ) +cc_library( + name = "XeGPUuArch", + hdrs = glob(["include/mlir/Dialect/XeGPU/uArch/*.h"]), + includes = ["include"], + deps = [ + ":IR", + "//llvm:Support", + ], +) + td_library( name = "FuncTdFiles", srcs = [ |