diff options
| author | Vitaly Buka <vitalybuka@google.com> | 2026-03-05 17:18:24 -0800 |
|---|---|---|
| committer | Vitaly Buka <vitalybuka@google.com> | 2026-03-05 17:18:24 -0800 |
| commit | 2d67bf9405268cb08d70b7724e9c01d79a7dc839 (patch) | |
| tree | 87709c1d59b9c8440165b384ca460bcf0c78dda1 | |
| parent | 68708c61f8cb9028614772c22141a6312fe1aee7 (diff) | |
| parent | cf8004bdee91cdbf6473a2c2d4dd992e5d5f8a92 (diff) | |
| download | llvm-users/vitalybuka/spr/libcstring-add-constexpr-initialization-stress-test.tar.gz llvm-users/vitalybuka/spr/libcstring-add-constexpr-initialization-stress-test.tar.bz2 llvm-users/vitalybuka/spr/libcstring-add-constexpr-initialization-stress-test.zip | |
no explicit limitsusers/vitalybuka/spr/libcstring-add-constexpr-initialization-stress-test
Created using spr 1.3.7
226 files changed, 8276 insertions, 2020 deletions
diff --git a/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt b/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt index 2f31d168e65c..9179e5dea4ea 100644 --- a/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt @@ -8,7 +8,6 @@ add_clang_library(clangTidyHICPPModule STATIC HICPPTidyModule.cpp IgnoredRemoveResultCheck.cpp MultiwayPathsCoveredCheck.cpp - NoAssemblerCheck.cpp SignedBitwiseCheck.cpp LINK_LIBS @@ -19,6 +18,7 @@ add_clang_library(clangTidyHICPPModule STATIC clangTidyMiscModule clangTidyModernizeModule clangTidyPerformanceModule + clangTidyPortabilityModule clangTidyReadabilityModule clangTidyUtils diff --git a/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp b/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp index 2e0e64fbcd2a..a4601d9cdde9 100644 --- a/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp @@ -30,6 +30,7 @@ #include "../modernize/UseOverrideCheck.h" #include "../performance/MoveConstArgCheck.h" #include "../performance/NoexceptMoveConstructorCheck.h" +#include "../portability/NoAssemblerCheck.h" #include "../readability/BracesAroundStatementsCheck.h" #include "../readability/FunctionSizeCheck.h" #include "../readability/NamedParameterCheck.h" @@ -37,7 +38,6 @@ #include "ExceptionBaseclassCheck.h" #include "IgnoredRemoveResultCheck.h" #include "MultiwayPathsCoveredCheck.h" -#include "NoAssemblerCheck.h" #include "SignedBitwiseCheck.h" namespace clang::tidy { @@ -81,7 +81,8 @@ public: CheckFactories .registerCheck<cppcoreguidelines::ProBoundsArrayToPointerDecayCheck>( "hicpp-no-array-decay"); - CheckFactories.registerCheck<NoAssemblerCheck>("hicpp-no-assembler"); + CheckFactories.registerCheck<portability::NoAssemblerCheck>( + "hicpp-no-assembler"); CheckFactories.registerCheck<cppcoreguidelines::NoMallocCheck>( "hicpp-no-malloc"); CheckFactories diff --git a/clang-tools-extra/clang-tidy/portability/CMakeLists.txt b/clang-tools-extra/clang-tidy/portability/CMakeLists.txt index 73d74a550afc..170fedf52130 100644 --- a/clang-tools-extra/clang-tidy/portability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/portability/CMakeLists.txt @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS add_clang_library(clangTidyPortabilityModule STATIC AvoidPragmaOnceCheck.cpp + NoAssemblerCheck.cpp PortabilityTidyModule.cpp RestrictSystemIncludesCheck.cpp SIMDIntrinsicsCheck.cpp diff --git a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp b/clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.cpp index e7d97b2a26b2..d9a20b97b233 100644 --- a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp +++ b/clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.cpp @@ -11,7 +11,7 @@ using namespace clang::ast_matchers; -namespace clang::tidy::hicpp { +namespace clang::tidy::portability { void NoAssemblerCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher(asmStmt().bind("asm-stmt"), this); @@ -34,4 +34,4 @@ void NoAssemblerCheck::check(const MatchFinder::MatchResult &Result) { diag(ASMLocation, "do not use inline assembler in safety-critical code"); } -} // namespace clang::tidy::hicpp +} // namespace clang::tidy::portability diff --git a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h b/clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.h index 15d646fd97af..2bc403e57a14 100644 --- a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h +++ b/clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.h @@ -6,17 +6,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NOASSEMBLERCHECK_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NOASSEMBLERCHECK_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_NOASSEMBLERCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_NOASSEMBLERCHECK_H #include "../ClangTidyCheck.h" -namespace clang::tidy::hicpp { +namespace clang::tidy::portability { /// Find assembler statements. No fix is offered. /// /// For the user-facing documentation see: -/// https://clang.llvm.org/extra/clang-tidy/checks/hicpp/no-assembler.html +/// https://clang.llvm.org/extra/clang-tidy/checks/portability/no-assembler.html class NoAssemblerCheck : public ClangTidyCheck { public: NoAssemblerCheck(StringRef Name, ClangTidyContext *Context) @@ -25,6 +25,6 @@ public: void check(const ast_matchers::MatchFinder::MatchResult &Result) override; }; -} // namespace clang::tidy::hicpp +} // namespace clang::tidy::portability -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NOASSEMBLERCHECK_H +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_NOASSEMBLERCHECK_H diff --git a/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp index fda997a2a3df..1f2340502f68 100644 --- a/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp @@ -9,6 +9,7 @@ #include "../ClangTidy.h" #include "../ClangTidyModule.h" #include "AvoidPragmaOnceCheck.h" +#include "NoAssemblerCheck.h" #include "RestrictSystemIncludesCheck.h" #include "SIMDIntrinsicsCheck.h" #include "StdAllocatorConstCheck.h" @@ -23,6 +24,7 @@ public: void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { CheckFactories.registerCheck<AvoidPragmaOnceCheck>( "portability-avoid-pragma-once"); + CheckFactories.registerCheck<NoAssemblerCheck>("portability-no-assembler"); CheckFactories.registerCheck<RestrictSystemIncludesCheck>( "portability-restrict-system-includes"); CheckFactories.registerCheck<SIMDIntrinsicsCheck>( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index b461f764eb0d..b0b4cd646c3b 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -157,6 +157,11 @@ New checks New check aliases ^^^^^^^^^^^^^^^^^ +- Renamed :doc:`hicpp-no-assembler <clang-tidy/checks/hicpp/no-assembler>` + to :doc:`portability-no-assembler + <clang-tidy/checks/portability/no-assembler>`. The `hicpp-no-assembler` + name is kept as an alias. + Changes in existing checks ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/docs/clang-tidy/checks/hicpp/no-assembler.rst b/clang-tools-extra/docs/clang-tidy/checks/hicpp/no-assembler.rst index 55231fbd0a8d..7987e40ba9e8 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/hicpp/no-assembler.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/hicpp/no-assembler.rst @@ -1,10 +1,10 @@ .. title:: clang-tidy - hicpp-no-assembler +.. meta:: + :http-equiv=refresh: 0;URL=../portability/no-assembler.html hicpp-no-assembler ================== -Checks for assembler statements. Use of inline assembly should be avoided since -it restricts the portability of the code. - -This enforces `rule 7.5.1 <https://www.perforce.com/resources/qac/high-integrity-cpp-coding-rules>`_ -of the High Integrity C++ Coding Standard. +The `hicpp-no-assembler` check is an alias, please see +`portability-no-assembler <../portability/no-assembler.html>`_ for more +information. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 4beea34f0c5a..068431fb5c94 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -243,7 +243,6 @@ Clang-Tidy Checks :doc:`hicpp-exception-baseclass <hicpp/exception-baseclass>`, :doc:`hicpp-ignored-remove-result <hicpp/ignored-remove-result>`, :doc:`hicpp-multiway-paths-covered <hicpp/multiway-paths-covered>`, - :doc:`hicpp-no-assembler <hicpp/no-assembler>`, :doc:`hicpp-signed-bitwise <hicpp/signed-bitwise>`, :doc:`linuxkernel-must-check-errs <linuxkernel/must-check-errs>`, :doc:`llvm-header-guard <llvm/header-guard>`, @@ -371,6 +370,7 @@ Clang-Tidy Checks :doc:`performance-unnecessary-value-param <performance/unnecessary-value-param>`, "Yes" :doc:`performance-use-std-move <performance/use-std-move>`, "Yes" :doc:`portability-avoid-pragma-once <portability/avoid-pragma-once>`, + :doc:`portability-no-assembler <portability/no-assembler>`, :doc:`portability-restrict-system-includes <portability/restrict-system-includes>`, "Yes" :doc:`portability-simd-intrinsics <portability/simd-intrinsics>`, :doc:`portability-std-allocator-const <portability/std-allocator-const>`, @@ -608,6 +608,7 @@ Check aliases :doc:`hicpp-named-parameter <hicpp/named-parameter>`, :doc:`readability-named-parameter <readability/named-parameter>`, "Yes" :doc:`hicpp-new-delete-operators <hicpp/new-delete-operators>`, :doc:`misc-new-delete-overloads <misc/new-delete-overloads>`, :doc:`hicpp-no-array-decay <hicpp/no-array-decay>`, :doc:`cppcoreguidelines-pro-bounds-array-to-pointer-decay <cppcoreguidelines/pro-bounds-array-to-pointer-decay>`, + :doc:`hicpp-no-assembler <hicpp/no-assembler>`, :doc:`portability-no-assembler <portability/no-assembler>`, :doc:`hicpp-no-malloc <hicpp/no-malloc>`, :doc:`cppcoreguidelines-no-malloc <cppcoreguidelines/no-malloc>`, :doc:`hicpp-noexcept-move <hicpp/noexcept-move>`, :doc:`performance-noexcept-move-constructor <performance/noexcept-move-constructor>`, "Yes" :doc:`hicpp-special-member-functions <hicpp/special-member-functions>`, :doc:`cppcoreguidelines-special-member-functions <cppcoreguidelines/special-member-functions>`, diff --git a/clang-tools-extra/docs/clang-tidy/checks/portability/no-assembler.rst b/clang-tools-extra/docs/clang-tidy/checks/portability/no-assembler.rst new file mode 100644 index 000000000000..ddc24683d76d --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/portability/no-assembler.rst @@ -0,0 +1,12 @@ +.. title:: clang-tidy - portability-no-assembler + +portability-no-assembler +======================== + +Checks for assembler statements. Use of inline assembly should be avoided +since it ties to a specific CPU architecture and syntax making code that +uses it non-portable across platforms. + +.. code-block:: c++ + + asm("mov al, 2"); // warning: do not use assembler statements diff --git a/clang-tools-extra/test/clang-tidy/checkers/hicpp/no-assembler.cpp b/clang-tools-extra/test/clang-tidy/checkers/portability/no-assembler.cpp index d08ea74f6cde..0e589b65df1e 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/hicpp/no-assembler.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/portability/no-assembler.cpp @@ -1,12 +1,12 @@ -// RUN: %check_clang_tidy %s hicpp-no-assembler %t +// RUN: %check_clang_tidy %s portability-no-assembler %t __asm__(".symver foo, bar@v"); -// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: do not use inline assembler in safety-critical code [hicpp-no-assembler] +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: do not use inline assembler in safety-critical code [portability-no-assembler] static int s asm("spam"); -// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use inline assembler in safety-critical code [hicpp-no-assembler] +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use inline assembler in safety-critical code [portability-no-assembler] void f() { __asm("mov al, 2"); - // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use inline assembler in safety-critical code [hicpp-no-assembler] + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use inline assembler in safety-critical code [portability-no-assembler] } diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-exception-variable-names.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-exception-variable-names.cpp new file mode 100644 index 000000000000..a58d6769ffc7 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-exception-variable-names.cpp @@ -0,0 +1,22 @@ +// RUN: %check_clang_tidy %s readability-identifier-length %t \ +// RUN: -config='{CheckOptions: {readability-identifier-length.IgnoredExceptionVariableNames: "^[ex]$"}}' \ +// RUN: -- -fexceptions + +struct myexcept { int val; }; +void doIt(); + +void test() { + try { + doIt(); + } catch (const myexcept &e) { doIt(); } // no warning, e allowed + try { + doIt(); + } catch (const myexcept &x) { doIt(); } // no warning, x allowed + try { + doIt(); + } catch (const myexcept &y) + // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: exception variable name 'y' is too short, expected at least 2 characters [readability-identifier-length] + { + doIt(); + } +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-loop-counter-names.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-loop-counter-names.cpp new file mode 100644 index 000000000000..2d2049bbdc6d --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-loop-counter-names.cpp @@ -0,0 +1,14 @@ +// RUN: %check_clang_tidy %s readability-identifier-length %t \ +// RUN: -config='{CheckOptions: {readability-identifier-length.IgnoredLoopCounterNames: "^[ijk]$"}}' \ +// RUN: -- -fexceptions + +void doIt(); + +void test() { + for (int i = 0; i < 5; ++i) { doIt(); } // no warning, i allowed + for (int m = 0; m < 5; ++m) + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: loop variable name 'm' is too short, expected at least 2 characters [readability-identifier-length] + { + doIt(); + } +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-parameter-names.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-parameter-names.cpp new file mode 100644 index 000000000000..65a45f8bb37b --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-parameter-names.cpp @@ -0,0 +1,11 @@ +// RUN: %check_clang_tidy %s readability-identifier-length %t \ +// RUN: -config='{CheckOptions: {readability-identifier-length.IgnoredParameterNames: "^[ab]$"}}' \ +// RUN: -- -fexceptions + +void test(int a, int b, int c) +// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: parameter name 'c' is too short, expected at least 3 characters [readability-identifier-length] +{ + (void)a; + (void)b; + (void)c; +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-exception-name-length.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-exception-name-length.cpp new file mode 100644 index 000000000000..d108b115e29d --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-exception-name-length.cpp @@ -0,0 +1,16 @@ +// RUN: %check_clang_tidy %s readability-identifier-length %t \ +// RUN: -config='{CheckOptions: {readability-identifier-length.MinimumExceptionNameLength: 4}}' \ +// RUN: -- -fexceptions + +struct myexcept { int val; }; +void doIt(); + +void test() { + try { + doIt(); + } catch (const myexcept &err) + // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: exception variable name 'err' is too short, expected at least 4 characters [readability-identifier-length] + { + doIt(); + } +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-loop-counter-name-length.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-loop-counter-name-length.cpp new file mode 100644 index 000000000000..a11a6da0085b --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-loop-counter-name-length.cpp @@ -0,0 +1,14 @@ +// RUN: %check_clang_tidy %s readability-identifier-length %t \ +// RUN: -config='{CheckOptions: {readability-identifier-length.MinimumLoopCounterNameLength: 4}}' \ +// RUN: -- -fexceptions + +void doIt(); + +void test() { + for (int idx = 0; idx < 5; ++idx) + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: loop variable name 'idx' is too short, expected at least 4 characters [readability-identifier-length] + { + doIt(); + } + for (int index = 0; index < 5; ++index) { doIt(); } // 5 chars, ok +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-parameter-name-length.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-parameter-name-length.cpp new file mode 100644 index 000000000000..afe53896fe00 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-parameter-name-length.cpp @@ -0,0 +1,9 @@ +// RUN: %check_clang_tidy %s readability-identifier-length %t \ +// RUN: -config='{CheckOptions: {readability-identifier-length.MinimumParameterNameLength: 5}}' \ +// RUN: -- -fexceptions + +void test(int data) +// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: parameter name 'data' is too short, expected at least 5 characters [readability-identifier-length] +{ + (void)data; +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-variable-name-length.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-variable-name-length.cpp new file mode 100644 index 000000000000..91d95beb42f7 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-variable-name-length.cpp @@ -0,0 +1,11 @@ +// RUN: %check_clang_tidy %s readability-identifier-length %t \ +// RUN: -config='{CheckOptions: {readability-identifier-length.MinimumVariableNameLength: 5}}' \ +// RUN: -- -fexceptions + +void doIt(); + +void test() { + int valu = 5; + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: variable name 'valu' is too short, expected at least 5 characters [readability-identifier-length] + int value = 6; // 5 chars, ok +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/magic-numbers-ignore-all-float.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/magic-numbers-ignore-all-float.cpp new file mode 100644 index 000000000000..109f636c0290 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/magic-numbers-ignore-all-float.cpp @@ -0,0 +1,8 @@ +// RUN: %check_clang_tidy %s readability-magic-numbers %t -check-suffix=IGNFP \ +// RUN: -config='{CheckOptions: {readability-magic-numbers.IgnoreAllFloatingPointValues: true}}' -- + +int BadInt = 5; +// CHECK-MESSAGES-IGNFP: :[[@LINE-1]]:14: warning: 5 is a magic number; consider replacing it with a named constant [readability-magic-numbers] + +float IgnoredFloat = 3.14f; +// CHECK-MESSAGES-IGNFP-NOT: 3.14f is a magic number diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/qualified-auto-add-const-to-qualified.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/qualified-auto-add-const-to-qualified.cpp new file mode 100644 index 000000000000..c8ed9e09eb86 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/qualified-auto-add-const-to-qualified.cpp @@ -0,0 +1,12 @@ +// RUN: %check_clang_tidy %s readability-qualified-auto %t -- +// RUN: %check_clang_tidy %s readability-qualified-auto %t -check-suffix=NOCONST \ +// RUN: -config='{CheckOptions: { readability-qualified-auto.AddConstToQualified: false }}' -- + +const int *getCIntPtr(); + +void foo() { + auto *QualCPtr = getCIntPtr(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'auto *QualCPtr' can be declared as 'const auto *QualCPtr' + // CHECK-FIXES: const auto *QualCPtr = getCIntPtr(); + // No warning for NOCONST +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses-allowed-decls.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses-allowed-decls.cpp new file mode 100644 index 000000000000..de0366ba1540 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses-allowed-decls.cpp @@ -0,0 +1,16 @@ +// RUN: %check_clang_tidy %s readability-redundant-parentheses %t \ +// RUN: -config='{CheckOptions: {readability-redundant-parentheses.AllowedDecls: ""}}' + +namespace std { + template<class T> T max(T, T); + template<class T> T min(T, T); +} // namespace std + +void foo() { + (std::max)(1, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant parentheses around expression [readability-redundant-parentheses] + // CHECK-FIXES: std::max(1, 2); + (std::min)(1, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant parentheses around expression [readability-redundant-parentheses] + // CHECK-FIXES: std::min(1, 2); +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument-option.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument-option.cpp index 7788feef8ce2..a6ab8fea1fab 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument-option.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument-option.cpp @@ -1,7 +1,16 @@ // RUN: %check_clang_tidy %s readability-suspicious-call-argument %t \ // RUN: -config="{CheckOptions: {readability-suspicious-call-argument.Abbreviations: 'crash='}}" -- -std=c++11-or-later +// RUN: %check_clang_tidy %s readability-suspicious-call-argument %t -check-suffix=MINLEN \ +// RUN: -config='{CheckOptions: {readability-suspicious-call-argument.MinimumIdentifierNameLength: 10}}' -- -std=c++11-or-later void f() {} // CHECK-MESSAGES: warning: Invalid abbreviation configuration 'crash=', ignoring. -// TODO: Add testcases for other options +void takeTwoParams(int frobble1, int frobble2); + +void testMinimumIdentifierNameLength() { + int frobble2 = 1, frobble1 = 2; + takeTwoParams(frobble2, frobble1); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 1st argument 'frobble2' (passed to 'frobble1') looks like it might be swapped with the 2nd, 'frobble1' (passed to 'frobble2') + // No warning for MINLEN +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/use-std-min-max-include-style.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/use-std-min-max-include-style.cpp new file mode 100644 index 000000000000..0e17db3fe836 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/use-std-min-max-include-style.cpp @@ -0,0 +1,13 @@ +// RUN: %check_clang_tidy -std=c++11-or-later %s readability-use-std-min-max %t \ +// RUN: -config='{CheckOptions: {readability-use-std-min-max.IncludeStyle: "google"}}' \ +// RUN: -- -fno-delayed-template-parsing + +// CHECK-FIXES: #include <algorithm> + +void foo() { + int a = 0, b = 1; + if (a < b) + a = b; + // CHECK-MESSAGES: :[[@LINE-2]]:3: warning: use `std::max` instead of `<` [readability-use-std-min-max] + // CHECK-FIXES: a = std::max(a, b); +} diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index b438a9b250b0..e7aa6c26dfce 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -839,8 +839,10 @@ public: void VisitSYCLKernelCallStmt(const SYCLKernelCallStmt *Node) { Visit(Node->getOriginalStmt()); - if (Traversal != TK_IgnoreUnlessSpelledInSource) + if (Traversal != TK_IgnoreUnlessSpelledInSource) { + Visit(Node->getKernelLaunchStmt()); Visit(Node->getOutlinedFunctionDecl()); + } } void VisitOMPExecutableDirective(const OMPExecutableDirective *Node) { diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index f97b54276cbe..ce6ad723191e 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3001,6 +3001,13 @@ DEF_TRAVERSE_STMT(ParenListExpr, {}) DEF_TRAVERSE_STMT(SYCLUniqueStableNameExpr, { TRY_TO(TraverseTypeLoc(S->getTypeSourceInfo()->getTypeLoc())); }) +DEF_TRAVERSE_STMT(UnresolvedSYCLKernelCallStmt, { + if (getDerived().shouldVisitImplicitCode()) { + TRY_TO(TraverseStmt(S->getOriginalStmt())); + TRY_TO(TraverseStmt(S->getKernelLaunchIdExpr())); + ShouldVisitChildren = false; + } +}) DEF_TRAVERSE_STMT(OpenACCAsteriskSizeExpr, {}) DEF_TRAVERSE_STMT(PredefinedExpr, {}) DEF_TRAVERSE_STMT(ShuffleVectorExpr, {}) @@ -3038,6 +3045,7 @@ DEF_TRAVERSE_STMT(CapturedStmt, { TRY_TO(TraverseDecl(S->getCapturedDecl())); }) DEF_TRAVERSE_STMT(SYCLKernelCallStmt, { if (getDerived().shouldVisitImplicitCode()) { TRY_TO(TraverseStmt(S->getOriginalStmt())); + TRY_TO(TraverseStmt(S->getKernelLaunchStmt())); TRY_TO(TraverseDecl(S->getOutlinedFunctionDecl())); ShouldVisitChildren = false; } diff --git a/clang/include/clang/AST/StmtSYCL.h b/clang/include/clang/AST/StmtSYCL.h index 28ace12d7916..79ac88532e14 100644 --- a/clang/include/clang/AST/StmtSYCL.h +++ b/clang/include/clang/AST/StmtSYCL.h @@ -28,40 +28,44 @@ namespace clang { /// of such a function specifies the statements to be executed on a SYCL device /// to invoke a SYCL kernel with a particular set of kernel arguments. The /// SYCLKernelCallStmt associates an original statement (the compound statement -/// that is the function body) with an OutlinedFunctionDecl that holds the -/// kernel parameters and the transformed body. During code generation, the -/// OutlinedFunctionDecl is used to emit an offload kernel entry point suitable -/// for invocation from a SYCL library implementation. If executed, the -/// SYCLKernelCallStmt behaves as a no-op; no code generation is performed for -/// it. +/// that is the function body) with a kernel launch statement to execute on a +/// SYCL host and an OutlinedFunctionDecl that holds the kernel parameters and +/// the transformed body to execute on a SYCL device. During code generation, +/// the OutlinedFunctionDecl is used to emit an offload kernel entry point +/// suitable for invocation from a SYCL library implementation. class SYCLKernelCallStmt : public Stmt { friend class ASTStmtReader; friend class ASTStmtWriter; private: Stmt *OriginalStmt = nullptr; + Stmt *KernelLaunchStmt = nullptr; OutlinedFunctionDecl *OFDecl = nullptr; public: /// Construct a SYCL kernel call statement. - SYCLKernelCallStmt(CompoundStmt *CS, OutlinedFunctionDecl *OFD) - : Stmt(SYCLKernelCallStmtClass), OriginalStmt(CS), OFDecl(OFD) {} + SYCLKernelCallStmt(CompoundStmt *CS, Stmt *S, OutlinedFunctionDecl *OFD) + : Stmt(SYCLKernelCallStmtClass), OriginalStmt(CS), KernelLaunchStmt(S), + OFDecl(OFD) {} /// Construct an empty SYCL kernel call statement. SYCLKernelCallStmt(EmptyShell Empty) : Stmt(SYCLKernelCallStmtClass, Empty) {} - /// Retrieve the model statement. CompoundStmt *getOriginalStmt() { return cast<CompoundStmt>(OriginalStmt); } const CompoundStmt *getOriginalStmt() const { return cast<CompoundStmt>(OriginalStmt); } + void setOriginalStmt(CompoundStmt *CS) { OriginalStmt = CS; } - /// Retrieve the outlined function declaration. + Stmt *getKernelLaunchStmt() { return KernelLaunchStmt; } + const Stmt *getKernelLaunchStmt() const { return KernelLaunchStmt; } + + void setKernelLaunchStmt(Stmt *S) { KernelLaunchStmt = S; } + OutlinedFunctionDecl *getOutlinedFunctionDecl() { return OFDecl; } const OutlinedFunctionDecl *getOutlinedFunctionDecl() const { return OFDecl; } - /// Set the outlined function declaration. void setOutlinedFunctionDecl(OutlinedFunctionDecl *OFD) { OFDecl = OFD; } SourceLocation getBeginLoc() const LLVM_READONLY { @@ -89,6 +93,66 @@ public: } }; +// UnresolvedSYCLKernelCallStmt represents an invocation of a SYCL kernel in +// a dependent context for which lookup of the sycl_kernel_launch identifier +// cannot be performed. These statements are transformed to SYCLKernelCallStmt +// during template instantiation. +class UnresolvedSYCLKernelCallStmt : public Stmt { + friend class ASTStmtReader; + friend class ASTStmtWriter; + +private: + Stmt *OriginalStmt = nullptr; + // KernelLaunchIdExpr stores an UnresolvedLookupExpr or UnresolvedMemberExpr + // corresponding to the SYCL kernel launch function for which a call + // will be synthesized during template instantiation. + Expr *KernelLaunchIdExpr = nullptr; + + UnresolvedSYCLKernelCallStmt(CompoundStmt *CS, Expr *IdExpr) + : Stmt(UnresolvedSYCLKernelCallStmtClass), OriginalStmt(CS), + KernelLaunchIdExpr(IdExpr) {} + + void setOriginalStmt(CompoundStmt *CS) { OriginalStmt = CS; } + + void setKernelLaunchIdExpr(Expr *IdExpr) { KernelLaunchIdExpr = IdExpr; } + +public: + static UnresolvedSYCLKernelCallStmt *Create(const ASTContext &C, + CompoundStmt *CS, Expr *IdExpr) { + return new (C) UnresolvedSYCLKernelCallStmt(CS, IdExpr); + } + + static UnresolvedSYCLKernelCallStmt *CreateEmpty(const ASTContext &C) { + return new (C) UnresolvedSYCLKernelCallStmt(nullptr, nullptr); + } + + CompoundStmt *getOriginalStmt() { return cast<CompoundStmt>(OriginalStmt); } + const CompoundStmt *getOriginalStmt() const { + return cast<CompoundStmt>(OriginalStmt); + } + + Expr *getKernelLaunchIdExpr() { return KernelLaunchIdExpr; } + const Expr *getKernelLaunchIdExpr() const { return KernelLaunchIdExpr; } + + SourceLocation getBeginLoc() const LLVM_READONLY { + return getOriginalStmt()->getBeginLoc(); + } + + SourceLocation getEndLoc() const LLVM_READONLY { + return getOriginalStmt()->getEndLoc(); + } + static bool classof(const Stmt *T) { + return T->getStmtClass() == UnresolvedSYCLKernelCallStmtClass; + } + child_range children() { + return child_range(&OriginalStmt, &OriginalStmt + 1); + } + + const_child_range children() const { + return const_child_range(&OriginalStmt, &OriginalStmt + 1); + } +}; + } // end namespace clang #endif // LLVM_CLANG_AST_STMTSYCL_H diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 68fdc3b976d6..60dfdfc2f23f 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -580,25 +580,26 @@ The following examples demonstrate the use of this attribute: def SYCLKernelEntryPointDocs : Documentation { let Category = DocCatFunction; let Content = [{ -The ``sycl_kernel_entry_point`` attribute facilitates the generation of an -offload kernel entry point, sometimes called a SYCL kernel caller function, -suitable for invoking a SYCL kernel on an offload device. The attribute is -intended for use in the implementation of SYCL kernel invocation functions -like the ``single_task`` and ``parallel_for`` member functions of the -``sycl::handler`` class specified in section 4.9.4, "Command group ``handler`` -class", of the SYCL 2020 specification. - -The attribute requires a single type argument that specifies a class type that -meets the requirements for a SYCL kernel name as described in section 5.2, -"Naming of kernels", of the SYCL 2020 specification. A unique kernel name type -is required for each function declared with the attribute. The attribute may -not first appear on a declaration that follows a definition of the function. +The ``sycl_kernel_entry_point`` attribute facilitates the launch of a SYCL +kernel and the generation of an offload kernel entry point, sometimes called +a SYCL kernel caller function, suitable for invoking a SYCL kernel on an +offload device. The attribute is intended for use in the implementation of +SYCL kernel invocation functions like the ``single_task`` and ``parallel_for`` +member functions of the ``sycl::handler`` class specified in section 4.9.4, +"Command group ``handler`` class", of the SYCL 2020 specification. + +The attribute requires a single type argument that meets the requirements for +a SYCL kernel name as described in section 5.2, "Naming of kernels", of the +SYCL 2020 specification. A unique kernel name type is required for each +function declared with the attribute. The attribute may not first appear on a +declaration that follows a definition of the function. The attribute only appertains to functions and only those that meet the following requirements. * Has a non-deduced ``void`` return type. -* Is not a non-static member function, constructor, or destructor. +* Is not a constructor or destructor. +* Is not a non-static member function with an explicit object parameter. * Is not a C variadic function. * Is not a coroutine. * Is not defined as deleted or as defaulted. @@ -613,73 +614,84 @@ follows. namespace sycl { class handler { - template<typename KernelNameType, typename KernelType> - [[ clang::sycl_kernel_entry_point(KernelNameType) ]] - static void kernel_entry_point(KernelType kernel) { - kernel(); + template<typename KernelName, typename... Ts> + void sycl_kernel_launch(const char* kernelSymbol, Ts&&... kernelArgs) { + // This code will run on the host and is responsible for calling functions + // appropriate for the desired offload backend (OpenCL, CUDA, HIP, + // Level Zero, etc...) to copy the kernel arguments denoted by kernelArgs + // to a device and to schedule an invocation of the offload kernel entry + // point denoted by kernelSymbol with the copied arguments. + } + + template<typename KernelName, typename KernelType> + [[ clang::sycl_kernel_entry_point(KernelName) ]] + void kernel_entry_point(KernelType kernelFunc) { + // This code will run on the device. The call to kernelFunc() invokes + // the SYCL kernel. + kernelFunc(); } public: - template<typename KernelNameType, typename KernelType> - void single_task(KernelType kernel) { - // Call kernel_entry_point() to trigger generation of an offload - // kernel entry point. - kernel_entry_point<KernelNameType>(kernel); - // Call functions appropriate for the desired offload backend - // (OpenCL, CUDA, HIP, Level Zero, etc...). + template<typename KernelName, typename KernelType> + void single_task(const KernelType& kernelFunc) { + // This code will run on the host. kernel_entry_point() is called to + // trigger generation of an offload kernel entry point and to schedule + // an invocation of it on a device with kernelFunc (a SYCL kernel object) + // passed as a kernel argument. This call will result in an implicit call + // to sycl_kernel_launch() with the symbol name for the generated offload + // kernel entry point passed as the first function argument followed by + // kernelFunc. + kernel_entry_point<KernelName>(kernelFunc); } }; } // namespace sycl -A SYCL kernel is a callable object of class type that is constructed on a host, -often via a lambda expression, and then passed to a SYCL kernel invocation -function to be executed on an offload device. A SYCL kernel invocation function -is responsible for copying the provided SYCL kernel object to an offload -device and initiating a call to it. The SYCL kernel object and its data members -constitute the parameters of an offload kernel. - -A SYCL kernel type is required to satisfy the device copyability requirements -specified in section 3.13.1, "Device copyable", of the SYCL 2020 specification. -Additionally, any data members of the kernel object type are required to satisfy -section 4.12.4, "Rules for parameter passing to kernels". For most types, these -rules require that the type is trivially copyable. However, the SYCL -specification mandates that certain special SYCL types, such as -``sycl::accessor`` and ``sycl::stream`` be device copyable even if they are not -trivially copyable. These types require special handling because they cannot -be copied to device memory as if by ``memcpy()``. Additionally, some offload -backends, OpenCL for example, require objects of some of these types to be -passed as individual arguments to the offload kernel. - -An offload kernel consists of an entry point function that declares the -parameters of the offload kernel and the set of all functions and variables that -are directly or indirectly used by the entry point function. - -A SYCL kernel invocation function invokes a SYCL kernel on a device by -performing the following tasks (likely with the help of an offload backend -like OpenCL): +A SYCL kernel object is a callable object of class type that is constructed on +a host, often via a lambda expression, and then passed to a SYCL kernel +invocation function to be executed on an offload device. The ``kernelFunc`` +parameters in the example code above correspond to SYCL kernel objects. + +A SYCL kernel object type is required to satisfy the device copyability +requirements specified in section 3.13.1, "Device copyable", of the SYCL 2020 +specification. Additionally, any data members of the kernel object type are +required to satisfy section 4.12.4, "Rules for parameter passing to kernels". +For most types, these rules require that the type is trivially copyable. +However, the SYCL specification mandates that certain special SYCL types, such +as ``sycl::accessor`` and ``sycl::stream``, be device copyable even if they are +not trivially copyable. These types require special handling because they cannot +necessarily be copied to device memory as if by ``memcpy()``. + +The SYCL kernel object and its data members constitute the parameters of an +offload kernel. An offload kernel consists of an offload entry point function +and the set of all functions and variables that are directly or indirectly used +by the entry point function. + +A SYCL kernel invocation function is responsible for performing the following +tasks (likely with the help of an offload backend like OpenCL): #. Identifying the offload kernel entry point to be used for the SYCL kernel. -#. Deconstructing the SYCL kernel object, if necessary, to produce the set of - offload kernel arguments required by the offload kernel entry point. +#. Validating that the SYCL kernel object type and its data members meet the + SYCL device copyability and kernel parameter requirements noted above. -#. Copying the offload kernel arguments to device memory. +#. Copying the SYCL kernel object and any other kernel arguments to device + memory including any special handling required for SYCL special types. #. Initiating execution of the offload kernel entry point. The offload kernel entry point for a SYCL kernel performs the following tasks: -#. Reconstituting the SYCL kernel object, if necessary, using the offload - kernel parameters. +#. Calling the ``operator()`` member function of the SYCL kernel object. -#. Calling the ``operator()`` member function of the (reconstituted) SYCL kernel - object. +The ``sycl_kernel_entry_point`` attribute facilitates or automates these tasks +by providing generation of an offload kernel entry point with a unique symbol +name, type checking of kernel argument requirements, and initiation of kernel +execution via synthesized calls to a ``sycl_kernel_launch`` template. -The ``sycl_kernel_entry_point`` attribute automates generation of an offload -kernel entry point that performs those latter tasks. The parameters and body of -a function declared with the ``sycl_kernel_entry_point`` attribute specify a -pattern from which the parameters and body of the entry point function are -derived. Consider the following call to a SYCL kernel invocation function. +A function declared with the ``sycl_kernel_entry_point`` attribute specifies +the parameters and body of an offload entry point function. Consider the +following call to the ``single_task()`` SYCL kernel invocation function assuming +an implementation similar to the one shown above. .. code-block:: c++ @@ -690,65 +702,87 @@ derived. Consider the following call to a SYCL kernel invocation function. }); } -The SYCL kernel object is the result of the lambda expression. It has two -data members corresponding to the captures of ``sout`` and ``s``. Since one -of these data members corresponds to a special SYCL type that must be passed -individually as an offload kernel parameter, it is necessary to decompose the -SYCL kernel object into its constituent parts; the offload kernel will have -two kernel parameters. Given a SYCL implementation that uses a -``sycl_kernel_entry_point`` attributed function like the one shown above, an -offload kernel entry point function will be generated that looks approximately +The SYCL kernel object is the result of the lambda expression. The call to +``kernel_entry_point()`` via the call to ``single_task()`` triggers the +generation of an offload kernel entry point function that looks approximately as follows. .. code-block:: c++ - void sycl-kernel-caller-for-KN(sycl::stream sout, S s) { - kernel-type kernel = { sout, s ); - kernel(); + void sycl-kernel-caller-for-KN(kernel-type kernelFunc) { + kernelFunc(); } There are a few items worthy of note: -#. The name of the generated function incorporates the SYCL kernel name, - ``KN``, that was passed as the ``KernelNameType`` template parameter to - ``kernel_entry_point()`` and provided as the argument to the - ``sycl_kernel_entry_point`` attribute. There is a one-to-one correspondence - between SYCL kernel names and offload kernel entry points. +#. ``sycl-kernel-caller-for-KN`` is an exposition only name; the actual name + generated for an entry point is an implementation detail and subject to + change. However, the name will incorporate the SYCL kernel name, ``KN``, + that was passed as the ``KernelName`` template parameter to + ``single_task()`` and eventually provided as the argument to the + ``sycl_kernel_entry_point`` attribute in order to ensure that a unique + name is generated for each entry point. There is a one-to-one correspondence + between SYCL kernel names and offload kernel entry points. #. The SYCL kernel is a lambda closure type and therefore has no name; ``kernel-type`` is substituted above and corresponds to the ``KernelType`` - template parameter deduced in the call to ``kernel_entry_point()``. - Lambda types cannot be declared and initialized using the aggregate - initialization syntax used above, but the intended behavior should be clear. + template parameter deduced in the call to ``single_task()``. + +#. The parameter and the call to ``kernelFunc()`` in the function body + correspond to the definition of ``kernel_entry_point()`` as called by + ``single_task()``. -#. ``S`` is a device copyable type that does not directly or indirectly contain - a data member of a SYCL special type. It therefore does not need to be - decomposed into its constituent members to be passed as a kernel argument. +#. The parameter is type checked for conformance with the SYCL device + copyability and kernel parameter requirements. -#. The depiction of the ``sycl::stream`` parameter as a single self contained - kernel parameter is an oversimplification. SYCL special types may require - additional decomposition such that the generated function might have three - or more parameters depending on how the SYCL library implementation defines - these types. +Within ``single_task()``, the call to ``kernel_entry_point()`` is effectively +replaced with a synthesized call to a ''sycl_kernel_launch`` template that +looks approximately as follows. -#. The call to ``kernel_entry_point()`` has no effect other than to trigger - emission of the entry point function. The statments that make up the body - of the function are not executed when the function is called; they are - only used in the generation of the entry point function. +.. code-block:: c++ + + sycl_kernel_launch<KN>("sycl-kernel-caller-for-KN", kernelFunc); + +There are a few items worthy of note: + +#. Lookup for the ``sycl_kernel_launch`` template is performed as if from the + body of the (possibly instantiated) definition of ``kernel_entry_point()``. + If name lookup or overload resolution fails, the program is ill-formed. + If the selected overload is a non-static member function, then ``this`` is + passed as the implicit object parameter. + +#. Function arguments passed to ``sycl_kernel_launch()`` are passed + as if by ``std::move(x)``. + +#. The ``sycl_kernel_launch`` template is expected to be provided by the SYCL + library implementation. It is responsible for copying the kernel arguments + to device memory and for scheduling execution of the generated offload + kernel entry point identified by the symbol name passed as the first + function argument. ``sycl-kernel-caller-for-KN`` is substituted above for + the actual symbol name that would be generated for the offload kernel entry + point. It is not necessary for a function declared with the ``sycl_kernel_entry_point`` attribute to be called for the offload kernel entry point to be emitted. For inline functions and function templates, any ODR-use will suffice. For other functions, an ODR-use is not required; the offload kernel entry point will be -emitted if the function is defined. +emitted if the function is defined. In any case, a call to the function is +required for the synthesized call to ``sycl_kernel_launch()`` to occur. + +A function declared with the ``sycl_kernel_entry_point`` attribute may include +an exception specification. If a non-throwing exception specification is +present, an exception propagating from the implicit call to the +``sycl_kernel_launch`` template will result in a call to ``std::terminate()``. +Otherwise, such an exception will propagate normally. Functions declared with the ``sycl_kernel_entry_point`` attribute are not limited to the simple example shown above. They may have additional template parameters, declare additional function parameters, and have complex control -flow in the function body. Function parameter decomposition and reconstitution -is performed for all function parameters. The function must abide by the -language feature restrictions described in section 5.4, "Language restrictions -for device functions" in the SYCL 2020 specification. +flow in the function body. The function must abide by the language feature +restrictions described in section 5.4, "Language restrictions for device +functions" in the SYCL 2020 specification. If the function is a non-static +member function, ``this`` shall not be used in a potentially evaluated +expression. }]; } diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 7063d7c06c4c..62421e588814 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5072,6 +5072,18 @@ def HLSLResourceSampleCmpLevelZero : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLResourceGather : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_resource_gather"]; + let Attributes = [NoThrow]; + let Prototype = "void(...)"; +} + +def HLSLResourceGatherCmp : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_resource_gather_cmp"]; + let Attributes = [NoThrow]; + let Prototype = "void(...)"; +} + def HLSLResourceUninitializedHandle : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_resource_uninitializedhandle"]; let Attributes = [NoThrow]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 2f8a37e50613..99ab83f5e021 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12735,8 +12735,7 @@ def note_unreachable_entity : Note< "is not %select{visible|reachable|reachable|reachable|reachable|reachable}0">; def ext_module_import_in_extern_c : ExtWarn< "import of C++ module '%0' appears within extern \"C\" language linkage " - "specification">, DefaultError, - InGroup<DiagGroup<"module-import-in-extern-c">>; + "specification">, InGroup<DiagGroup<"module-import-in-extern-c">>; def err_module_import_not_at_top_level_fatal : Error< "import of module '%0' appears within %1">, DefaultFatal; def ext_module_import_not_at_top_level_noop : ExtWarn< @@ -13353,19 +13352,24 @@ def warn_sycl_external_missing_on_first_decl : Warning< // SYCL kernel entry point diagnostics def err_sycl_entry_point_invalid : Error< "the %0 attribute cannot be applied to a %enum_select<InvalidSKEPReason>{" - "%NonStaticMemberFn{non-static member function}|" "%VariadicFn{variadic function}|" "%DeletedFn{deleted function}|" "%DefaultedFn{defaulted function}|" + "%Constructor{constructor}|" + "%Destructor{destructor}|" + "%Coroutine{coroutine}|" "%ConstexprFn{constexpr function}|" "%ConstevalFn{consteval function}|" "%NoreturnFn{function declared with the 'noreturn' attribute}|" - "%Coroutine{coroutine}|" - "%FunctionTryBlock{function defined with a function try block}" + "%FunctionTryBlock{function defined with a function try block}|" + "%ExplicitObjectFn{function with an explicit object parameter}|" "}1">; def err_sycl_entry_point_invalid_redeclaration : Error< "the %0 kernel name argument does not match prior" " declaration%diff{: $ vs $|}1,2">; +def err_sycl_entry_point_invalid_this : Error< + "'this' cannot be%select{| implicitly}0 used in a potentially evaluated" + " expression in the body of a function declared with the %1 attribute">; def err_sycl_kernel_name_conflict : Error< "the %0 kernel name argument conflicts with a previous declaration">; def warn_sycl_kernel_name_not_a_class_type : Warning< @@ -13381,6 +13385,18 @@ def err_sycl_entry_point_return_type : Error< def err_sycl_entry_point_deduced_return_type : Error< "the %0 attribute only applies to functions with a non-deduced 'void' return" " type">; +def note_sycl_runtime_defect : Note< + "this indicates a problem with the SYCL runtime header files; please consider" + " reporting this to your SYCL runtime provider">; +def note_sycl_kernel_launch_lookup_here : Note< + "in implicit call to 'sycl_kernel_launch' with template argument %0 required" + " here">; +def note_sycl_kernel_launch_overload_resolution_here : Note< + "in implicit call to 'sycl_kernel_launch' with template argument %0 and" + " function arguments %1 required here">; +def err_sycl_entry_point_device_use : Error< + "function %0 cannot be used in device code because it is declared with the" + " %1 attribute">; def warn_cuda_maxclusterrank_sm_90 : Warning< "maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring " @@ -13625,6 +13641,8 @@ def err_hlsl_push_constant_unique def err_hlsl_samplecmp_requires_float : Error<"'SampleCmp' and 'SampleCmpLevelZero' require resource to contain " "a floating point type">; +def err_hlsl_gathercmp_invalid_component + : Error<"gatherCmp%select{Red|Green|Blue|Alpha}0 operations on the Vulkan target are not supported; only GatherCmp and GatherCmpRed are allowed">; // Layout randomization diagnostics. def err_non_designated_init_used : Error< diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index cb869cc21062..b196382025c9 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -24,6 +24,7 @@ def CaseStmt : StmtNode<SwitchCase>; def DefaultStmt : StmtNode<SwitchCase>; def CapturedStmt : StmtNode<Stmt>; def SYCLKernelCallStmt : StmtNode<Stmt>; +def UnresolvedSYCLKernelCallStmt : StmtNode<Stmt>; // Break/continue. def LoopControlStmt : StmtNode<Stmt, 1>; diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index d206503d914f..1e3a2c9af35d 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -291,7 +291,6 @@ struct MissingFeatures { static bool handleBuiltinICEArguments() { return false; } static bool hip() { return false; } static bool incrementProfileCounter() { return false; } - static bool innermostEHScope() { return false; } static bool insertBuiltinUnpredictable() { return false; } static bool instrumentation() { return false; } static bool intrinsicElementTypeSupport() { return false; } @@ -348,6 +347,7 @@ struct MissingFeatures { static bool targetCodeGenInfoGetNullPointer() { return false; } static bool thunks() { return false; } static bool tryEmitAsConstant() { return false; } + static bool typeAwareAllocation() { return false; } static bool typeChecks() { return false; } static bool useEHCleanupForArray() { return false; } static bool vaArgABILowering() { return false; } diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index 217efa3fe756..266e0826b38f 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -197,6 +197,14 @@ class CompilerInstance : public ModuleLoader { /// Force an output buffer. std::unique_ptr<llvm::raw_pwrite_stream> OutputStream; + using GenModuleActionWrapperFunc = + std::function<std::unique_ptr<FrontendAction>( + const FrontendOptions &, std::unique_ptr<FrontendAction>)>; + + /// An optional callback function used to wrap all FrontendActions + /// produced to generate imported modules before they are executed. + GenModuleActionWrapperFunc GenModuleActionWrapper; + CompilerInstance(const CompilerInstance &) = delete; void operator=(const CompilerInstance &) = delete; public: @@ -958,6 +966,14 @@ public: bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override; + void setGenModuleActionWrapper(GenModuleActionWrapperFunc Wrapper) { + GenModuleActionWrapper = Wrapper; + } + + GenModuleActionWrapperFunc getGenModuleActionWrapper() const { + return GenModuleActionWrapper; + } + void addDependencyCollector(std::shared_ptr<DependencyCollector> Listener) { DependencyCollectors.push_back(std::move(Listener)); } diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h index 4f4d38c96114..f334f58ebd0a 100644 --- a/clang/include/clang/Sema/ScopeInfo.h +++ b/clang/include/clang/Sema/ScopeInfo.h @@ -245,6 +245,10 @@ public: /// The set of GNU address of label extension "&&label". llvm::SmallVector<AddrLabelExpr *, 4> AddrLabels; + /// An unresolved identifier lookup expression for an implicit call + /// to a SYCL kernel launch function in a dependent context. + Expr *SYCLKernelLaunchIdExpr = nullptr; + public: /// Represents a simple identification of a weak object. /// diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 13a412914f5c..5917eb0ffbfe 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1430,7 +1430,8 @@ public: /// Diagnostics that are emitted only if we discover that the given function /// must be codegen'ed. Because handling these correctly adds overhead to - /// compilation, this is currently only enabled for CUDA compilations. + /// compilation, this is currently only used for offload languages like CUDA, + /// OpenMP, and SYCL. SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags; /// CurContext - This is the current declaration context of parsing. @@ -13275,6 +13276,14 @@ public: /// We are performing partial ordering for template template parameters. PartialOrderingTTP, + + /// We are performing name lookup for a function template or variable + /// template named 'sycl_kernel_launch'. + SYCLKernelLaunchLookup, + + /// We are performing overload resolution for a call to a function + /// template or variable template named 'sycl_kernel_launch'. + SYCLKernelLaunchOverloadResolution, } Kind; /// Whether we're substituting into constraints. @@ -13630,6 +13639,20 @@ public: operator=(const SynthesizedFunctionScope &) = delete; }; + /// RAII object to ensure that a code synthesis context is popped on scope + /// exit. + class ScopedCodeSynthesisContext { + Sema &S; + + public: + ScopedCodeSynthesisContext(Sema &S, const CodeSynthesisContext &Ctx) + : S(S) { + S.pushCodeSynthesisContext(Ctx); + } + + ~ScopedCodeSynthesisContext() { S.popCodeSynthesisContext(); } + }; + /// List of active code synthesis contexts. /// /// This vector is treated as a stack. As synthesis of one entity requires diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h index 7ae556da2bec..4980aa44c301 100644 --- a/clang/include/clang/Sema/SemaSYCL.h +++ b/clang/include/clang/Sema/SemaSYCL.h @@ -64,9 +64,38 @@ public: void handleKernelAttr(Decl *D, const ParsedAttr &AL); void handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL); + /// Issues a deferred diagnostic if use of the declaration designated + /// by 'ND' is invalid in a device context. + void CheckDeviceUseOfDecl(NamedDecl *ND, SourceLocation Loc); + void CheckSYCLExternalFunctionDecl(FunctionDecl *FD); void CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD); - StmtResult BuildSYCLKernelCallStmt(FunctionDecl *FD, CompoundStmt *Body); + + /// Builds an expression for the lookup of a 'sycl_kernel_launch' template + /// with 'KernelName' as an explicit template argument. Lookup is performed + /// as if from the first statement of the body of 'FD' and thus requires + /// searching the scopes that exist at parse time. This function therefore + /// requires the current semantic context to be the definition of 'FD'. In a + /// dependent context, the returned expression will be an UnresolvedLookupExpr + /// or an UnresolvedMemberExpr. In a non-dependent context, the returned + /// expression will be a DeclRefExpr or MemberExpr. If lookup fails, a null + /// error result is returned. The resulting expression is intended to be + /// passed as the 'LaunchIdExpr' argument in a call to either + /// BuildSYCLKernelCallStmt() or BuildUnresolvedSYCLKernelCallStmt() after + /// the function body has been parsed. + ExprResult BuildSYCLKernelLaunchIdExpr(FunctionDecl *FD, QualType KernelName); + + /// Builds a SYCLKernelCallStmt to wrap 'Body' and to be used as the body of + /// 'FD'. 'LaunchIdExpr' specifies the lookup result returned by a previous + /// call to BuildSYCLKernelLaunchIdExpr(). + StmtResult BuildSYCLKernelCallStmt(FunctionDecl *FD, CompoundStmt *Body, + Expr *LaunchIdExpr); + + /// Builds an UnresolvedSYCLKernelCallStmt to wrap 'Body'. 'LaunchIdExpr' + /// specifies the lookup result returned by a previous call to + /// BuildSYCLKernelLaunchIdExpr(). + StmtResult BuildUnresolvedSYCLKernelCallStmt(CompoundStmt *Body, + Expr *LaunchIdExpr); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index d72f1f9db86b..752e7fd288aa 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1618,6 +1618,9 @@ enum StmtCode { /// A SYCLKernelCallStmt record. STMT_SYCLKERNELCALL, + /// An UnresolvedSYCLKernelCallStmt record. + STMT_UNRESOLVED_SYCL_KERNEL_CALL, + /// A GCC-style AsmStmt record. STMT_GCCASM, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index f4ce4a7573aa..4d364fdcd550 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -600,7 +600,7 @@ void StmtPrinter::VisitCapturedStmt(CapturedStmt *Node) { } void StmtPrinter::VisitSYCLKernelCallStmt(SYCLKernelCallStmt *Node) { - PrintStmt(Node->getOutlinedFunctionDecl()->getBody()); + PrintStmt(Node->getOriginalStmt()); } void StmtPrinter::VisitObjCAtTryStmt(ObjCAtTryStmt *Node) { @@ -1447,6 +1447,11 @@ void StmtPrinter::VisitSYCLUniqueStableNameExpr( OS << ")"; } +void StmtPrinter::VisitUnresolvedSYCLKernelCallStmt( + UnresolvedSYCLKernelCallStmt *Node) { + PrintStmt(Node->getOriginalStmt()); +} + void StmtPrinter::VisitPredefinedExpr(PredefinedExpr *Node) { OS << PredefinedExpr::getIdentKindName(Node->getIdentKind()); } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 623905188b2d..dc7fd352a67b 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1410,6 +1410,11 @@ void StmtProfiler::VisitSYCLUniqueStableNameExpr( VisitType(S->getTypeSourceInfo()->getType()); } +void StmtProfiler::VisitUnresolvedSYCLKernelCallStmt( + const UnresolvedSYCLKernelCallStmt *S) { + VisitStmt(S); +} + void StmtProfiler::VisitPredefinedExpr(const PredefinedExpr *S) { VisitExpr(S); ID.AddInteger(llvm::to_underlying(S->getIdentKind())); diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 493891e40db5..ea215985f303 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -40,36 +40,394 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc, builder.getUInt64(scalingFactor, loc)); } -static bool aarch64SVEIntrinsicsProvenSorted = false; +//===----------------------------------------------------------------------===// +// Intrinsics maps +// +// Maps that help automate code-generation. +// +// TODO(cir): Share this code with ARM.cpp +//===----------------------------------------------------------------------===// +enum { + AddRetType = (1 << 0), + Add1ArgType = (1 << 1), + Add2ArgTypes = (1 << 2), + + VectorizeRetType = (1 << 3), + VectorizeArgTypes = (1 << 4), + + InventFloatType = (1 << 5), + UnsignedAlts = (1 << 6), + + Use64BitVectors = (1 << 7), + Use128BitVectors = (1 << 8), + + Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, + VectorRet = AddRetType | VectorizeRetType, + VectorRetGetArgs01 = + AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, + FpCmpzModifiers = + AddRetType | VectorizeRetType | Add1ArgType | InventFloatType +}; namespace { -struct AArch64BuiltinInfo { +struct ARMVectorIntrinsicInfo { + const char *nameHint; unsigned builtinID; unsigned llvmIntrinsic; + unsigned altLLVMIntrinsic; uint64_t typeModifier; bool operator<(unsigned rhsBuiltinID) const { return builtinID < rhsBuiltinID; } - bool operator<(const AArch64BuiltinInfo &te) const { + bool operator<(const ARMVectorIntrinsicInfo &te) const { return builtinID < te.builtinID; } }; } // end anonymous namespace -#define SVEMAP1(NameBase, llvmIntrinsic, TypeModifier) \ - {SVE::BI__builtin_sve_##NameBase, Intrinsic::llvmIntrinsic, TypeModifier} +#define NEONMAP0(NameBase) \ + {#NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0} + +#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ + {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ + TypeModifier} + +#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ + {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, \ + Intrinsic::AltLLVMIntrinsic, TypeModifier} + +static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { + NEONMAP0(splat_lane_v), + NEONMAP0(splat_laneq_v), + NEONMAP0(splatq_lane_v), + NEONMAP0(splatq_laneq_v), + NEONMAP1(vabs_v, aarch64_neon_abs, 0), + NEONMAP1(vabsq_v, aarch64_neon_abs, 0), + NEONMAP0(vadd_v), + NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), + NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0), + NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0), + NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0), + NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0), + NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, + Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, + Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, + Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, + Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, + Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, + Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, + Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, + Add1ArgType | UnsignedAlts), + NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0), + NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0), + NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0), + NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0), + NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0), + NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcage_v, aarch64_neon_facge, 0), + NEONMAP1(vcageq_v, aarch64_neon_facge, 0), + NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), + NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), + NEONMAP1(vcale_v, aarch64_neon_facge, 0), + NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), + NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), + NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), + NEONMAP0(vceqz_v), + NEONMAP0(vceqzq_v), + NEONMAP0(vcgez_v), + NEONMAP0(vcgezq_v), + NEONMAP0(vcgtz_v), + NEONMAP0(vcgtzq_v), + NEONMAP0(vclez_v), + NEONMAP0(vclezq_v), + NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), + NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), + NEONMAP0(vcltz_v), + NEONMAP0(vcltzq_v), + NEONMAP1(vclz_v, ctlz, Add1ArgType), + NEONMAP1(vclzq_v, ctlz, Add1ArgType), + NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcnt_v, ctpop, Add1ArgType), + NEONMAP1(vcntq_v, ctpop, Add1ArgType), + NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), + NEONMAP0(vcvt_f16_s16), + NEONMAP0(vcvt_f16_u16), + NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), + NEONMAP0(vcvt_f32_v), + NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), + NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP0(vcvtq_f16_s16), + NEONMAP0(vcvtq_f16_u16), + NEONMAP0(vcvtq_f32_v), + NEONMAP0(vcvtq_high_bf16_f32), + NEONMAP0(vcvtq_low_bf16_f32), + NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), + NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, + 0), + NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, + 0), + NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), + NEONMAP1(vdot_s32, aarch64_neon_sdot, 0), + NEONMAP1(vdot_u32, aarch64_neon_udot, 0), + NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0), + NEONMAP1(vdotq_u32, aarch64_neon_udot, 0), + NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, + Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, + Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, + Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, + Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, + Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, + Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, + Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, + Add1ArgType | UnsignedAlts), + NEONMAP0(vext_v), + NEONMAP0(vextq_v), + NEONMAP0(vfma_v), + NEONMAP0(vfmaq_v), + NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0), + NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0), + NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, + Add1ArgType | UnsignedAlts), + NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, + Add1ArgType | UnsignedAlts), + NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, + Add1ArgType | UnsignedAlts), + NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, + Add1ArgType | UnsignedAlts), + NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0), + NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0), + NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0), + NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0), + NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0), + NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0), + NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0), + NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0), + NEONMAP0(vmovl_v), + NEONMAP0(vmovn_v), + NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), + NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), + NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), + NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), + NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), + NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), + NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), + NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), + NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, + Add1ArgType | UnsignedAlts), + NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, + Add1ArgType | UnsignedAlts), + NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), + NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), + NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0), + NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0), + NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), + NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0), + NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0), + NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), + NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), + NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, + Add1ArgType | UnsignedAlts), + NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), + NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), + NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), + NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0), + NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), + NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), + NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0), + NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), + NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), + NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, + Add1ArgType | UnsignedAlts), + NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, + Add1ArgType | UnsignedAlts), + NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), + NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, + Add1ArgType | UnsignedAlts), + NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), + NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, + Add1ArgType | UnsignedAlts), + NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), + NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), + NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, + Add1ArgType | UnsignedAlts), + NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, + Add1ArgType | UnsignedAlts), + NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), + NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0), + NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), + NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), + NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), + NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), + NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, + Add1ArgType | UnsignedAlts), + NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, + Add1ArgType | UnsignedAlts), + NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType), + NEONMAP0(vrndi_v), + NEONMAP0(vrndiq_v), + NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, + Add1ArgType | UnsignedAlts), + NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, + Add1ArgType | UnsignedAlts), + NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), + NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), + NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), + NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), + NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), + NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), + NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), + NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0), + NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0), + NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0), + NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0), + NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0), + NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0), + NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0), + NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0), + NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0), + NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0), + NEONMAP0(vshl_n_v), + NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, + Add1ArgType | UnsignedAlts), + NEONMAP0(vshll_n_v), + NEONMAP0(vshlq_n_v), + NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, + Add1ArgType | UnsignedAlts), + NEONMAP0(vshr_n_v), + NEONMAP0(vshrn_n_v), + NEONMAP0(vshrq_n_v), + NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0), + NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0), + NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0), + NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0), + NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0), + NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0), + NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0), + NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0), + NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0), + NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0), + NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0), + NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0), + NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0), + NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0), + NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0), + NEONMAP0(vsubhn_v), + NEONMAP0(vtst_v), + NEONMAP0(vtstq_v), + NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0), + NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0), + NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0), + NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0), +}; + +#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ + {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ + TypeModifier} #define SVEMAP2(NameBase, TypeModifier) \ - {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier} -static const AArch64BuiltinInfo aarch64SVEIntrinsicMap[] = { + {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier} +static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = { #define GET_SVE_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sve_builtin_cg.inc" #undef GET_SVE_LLVM_INTRINSIC_MAP }; -static const AArch64BuiltinInfo * -findARMVectorIntrinsicInMap(ArrayRef<AArch64BuiltinInfo> intrinsicMap, +static bool aarch64SIMDIntrinsicsProvenSorted = false; +static bool aarch64SVEIntrinsicsProvenSorted = false; + +// Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns +// the corresponding info struct. +static const ARMVectorIntrinsicInfo * +findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> intrinsicMap, unsigned builtinID, bool &mapProvenSorted) { #ifndef NDEBUG @@ -79,7 +437,8 @@ findARMVectorIntrinsicInMap(ArrayRef<AArch64BuiltinInfo> intrinsicMap, } #endif - const AArch64BuiltinInfo *info = llvm::lower_bound(intrinsicMap, builtinID); + const ARMVectorIntrinsicInfo *info = + llvm::lower_bound(intrinsicMap, builtinID); if (info != intrinsicMap.end() && info->builtinID == builtinID) return info; @@ -97,30 +456,383 @@ emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, CIRGenBuilderTy &builder, bool scalarCmp = !isa<cir::VectorType>(src.getType()); if (!scalarCmp) { - assert(cast<cir::VectorType>(retTy).getIsScalable() && + assert(!cast<cir::VectorType>(retTy).getIsScalable() && "This is only intended for fixed-width vectors"); - // Vector retTypes are cast to i8 vectors. Recover original retType. - cgf.cgm.errorNYI(loc, std::string("unimplemented vector compare")); + // Vector types are cast to i8 vectors. Recover original type. + src = builder.createBitcast(src, retTy); } mlir::Value zero = builder.getNullValue(src.getType(), loc); - mlir::Value cmp; - if (cir::isFPOrVectorOfFPType(src.getType())) { - cgf.cgm.errorNYI(loc, std::string("unimplemented FP compare")); - } else { - if (scalarCmp) - // For scalars, cast !cir.bool to !cir.int<s, 1> so that the compare - // result is sign- rather zero-extended when casting to the output - // retType. - cmp = builder.createCast( - loc, cir::CastKind::bool_to_int, - builder.createCompare(loc, cir::CmpOpKind::eq, src, zero), - builder.getSIntNTy(1)); + + if (!scalarCmp) + return builder.createVecCompare(loc, kind, src, zero); + + // For scalars, cast !cir.bool to !cir.int<s, 1> so that the compare + // result is sign- rather zero-extended when casting to the output + // retType. + mlir::Value cmp = builder.createCast( + loc, cir::CastKind::bool_to_int, + builder.createCompare(loc, kind, src, zero), builder.getSIntNTy(1)); + + return builder.createCast(loc, cir::CastKind::integral, cmp, retTy); +} + +// TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone. +static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags, + mlir::Location loc, + bool hasLegalHalfType = true, + bool v1Ty = false, + bool allowBFloatArgsAndRet = true) { + int isQuad = typeFlags.isQuad(); + switch (typeFlags.getEltType()) { + case NeonTypeFlags::Int8: + case NeonTypeFlags::Poly8: + return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty + : cgf->sInt8Ty, + v1Ty ? 1 : (8 << isQuad)); + case NeonTypeFlags::MFloat8: + cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: MFloat8")); + [[fallthrough]]; + case NeonTypeFlags::Int16: + case NeonTypeFlags::Poly16: + return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty + : cgf->sInt16Ty, + v1Ty ? 1 : (4 << isQuad)); + case NeonTypeFlags::BFloat16: + if (allowBFloatArgsAndRet) + cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16")); + else + cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16")); + [[fallthrough]]; + case NeonTypeFlags::Float16: + if (hasLegalHalfType) + cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16")); else - cgf.cgm.errorNYI(loc, std::string("unimplemented vector compare")); + cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16")); + [[fallthrough]]; + case NeonTypeFlags::Int32: + return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty + : cgf->sInt32Ty, + v1Ty ? 1 : (2 << isQuad)); + case NeonTypeFlags::Int64: + case NeonTypeFlags::Poly64: + return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty + : cgf->sInt64Ty, + v1Ty ? 1 : (1 << isQuad)); + case NeonTypeFlags::Poly128: + // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. + // There is a lot of i128 and f128 API missing. + // so we use v16i8 to represent poly128 and get pattern matched. + cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128")); + [[fallthrough]]; + case NeonTypeFlags::Float32: + return cir::VectorType::get(cgf->getCIRGenModule().floatTy, + v1Ty ? 1 : (2 << isQuad)); + case NeonTypeFlags::Float64: + return cir::VectorType::get(cgf->getCIRGenModule().doubleTy, + v1Ty ? 1 : (1 << isQuad)); } + llvm_unreachable("Unknown vector element type!"); +} - return builder.createCast(loc, cir::CastKind::integral, cmp, retTy); +static mlir::Value emitCommonNeonBuiltinExpr( + CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic, + unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier, + const CallExpr *expr, llvm::SmallVectorImpl<mlir::Value> &ops) { + + mlir::Location loc = cgf.getLoc(expr->getExprLoc()); + clang::ASTContext &ctx = cgf.getContext(); + + // Extract the trailing immediate argument that encodes the type discriminator + // for this overloaded intrinsic. + // TODO: Move to the parent code that takes care of argument processing. + const clang::Expr *arg = expr->getArg(expr->getNumArgs() - 1); + std::optional<llvm::APSInt> neonTypeConst = arg->getIntegerConstantExpr(ctx); + if (!neonTypeConst) + return nullptr; + + // Determine the type of this overloaded NEON intrinsic. + NeonTypeFlags neonType(neonTypeConst->getZExtValue()); + const bool hasLegalHalfType = cgf.getTarget().hasFastHalfType(); + + // The value of allowBFloatArgsAndRet is true for AArch64, but it should + // come from ABI info. + const bool allowBFloatArgsAndRet = false; + // FIXME + // getTargetHooks().getABIInfo().allowBFloatArgsAndRet(); + + cir::VectorType vTy = getNeonType(&cgf, neonType, loc, hasLegalHalfType, + false, allowBFloatArgsAndRet); + mlir::Type ty = vTy; + if (!ty) + return nullptr; + + switch (builtinID) { + case NEON::BI__builtin_neon_splat_lane_v: + case NEON::BI__builtin_neon_splat_laneq_v: + case NEON::BI__builtin_neon_splatq_lane_v: + case NEON::BI__builtin_neon_splatq_laneq_v: + case NEON::BI__builtin_neon_vpadd_v: + case NEON::BI__builtin_neon_vpaddq_v: + case NEON::BI__builtin_neon_vabs_v: + case NEON::BI__builtin_neon_vabsq_v: + case NEON::BI__builtin_neon_vadd_v: + case NEON::BI__builtin_neon_vaddq_v: + case NEON::BI__builtin_neon_vaddhn_v: + case NEON::BI__builtin_neon_vcale_v: + case NEON::BI__builtin_neon_vcaleq_v: + case NEON::BI__builtin_neon_vcalt_v: + case NEON::BI__builtin_neon_vcaltq_v: + case NEON::BI__builtin_neon_vcage_v: + case NEON::BI__builtin_neon_vcageq_v: + case NEON::BI__builtin_neon_vcagt_v: + case NEON::BI__builtin_neon_vcagtq_v: + cgf.cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + ctx.BuiltinInfo.getName(builtinID)); + return mlir::Value{}; + case NEON::BI__builtin_neon_vceqz_v: + case NEON::BI__builtin_neon_vceqzq_v: + return emitAArch64CompareBuiltinExpr(cgf, cgf.getBuilder(), loc, ops[0], + vTy, cir::CmpOpKind::eq); + case NEON::BI__builtin_neon_vcgez_v: + case NEON::BI__builtin_neon_vcgezq_v: + case NEON::BI__builtin_neon_vclez_v: + case NEON::BI__builtin_neon_vclezq_v: + case NEON::BI__builtin_neon_vcgtz_v: + case NEON::BI__builtin_neon_vcgtzq_v: + case NEON::BI__builtin_neon_vcltz_v: + case NEON::BI__builtin_neon_vcltzq_v: + case NEON::BI__builtin_neon_vclz_v: + case NEON::BI__builtin_neon_vclzq_v: + case NEON::BI__builtin_neon_vcvt_f32_v: + case NEON::BI__builtin_neon_vcvtq_f32_v: + case NEON::BI__builtin_neon_vcvt_f16_s16: + case NEON::BI__builtin_neon_vcvt_f16_u16: + case NEON::BI__builtin_neon_vcvtq_f16_s16: + case NEON::BI__builtin_neon_vcvtq_f16_u16: + case NEON::BI__builtin_neon_vcvt_n_f16_s16: + case NEON::BI__builtin_neon_vcvt_n_f16_u16: + case NEON::BI__builtin_neon_vcvtq_n_f16_s16: + case NEON::BI__builtin_neon_vcvtq_n_f16_u16: + case NEON::BI__builtin_neon_vcvt_n_f32_v: + case NEON::BI__builtin_neon_vcvt_n_f64_v: + case NEON::BI__builtin_neon_vcvtq_n_f32_v: + case NEON::BI__builtin_neon_vcvtq_n_f64_v: + case NEON::BI__builtin_neon_vcvt_n_s16_f16: + case NEON::BI__builtin_neon_vcvt_n_s32_v: + case NEON::BI__builtin_neon_vcvt_n_u16_f16: + case NEON::BI__builtin_neon_vcvt_n_u32_v: + case NEON::BI__builtin_neon_vcvt_n_s64_v: + case NEON::BI__builtin_neon_vcvt_n_u64_v: + case NEON::BI__builtin_neon_vcvtq_n_s16_f16: + case NEON::BI__builtin_neon_vcvtq_n_s32_v: + case NEON::BI__builtin_neon_vcvtq_n_u16_f16: + case NEON::BI__builtin_neon_vcvtq_n_u32_v: + case NEON::BI__builtin_neon_vcvtq_n_s64_v: + case NEON::BI__builtin_neon_vcvtq_n_u64_v: + case NEON::BI__builtin_neon_vcvt_s32_v: + case NEON::BI__builtin_neon_vcvt_u32_v: + case NEON::BI__builtin_neon_vcvt_s64_v: + case NEON::BI__builtin_neon_vcvt_u64_v: + case NEON::BI__builtin_neon_vcvt_s16_f16: + case NEON::BI__builtin_neon_vcvt_u16_f16: + case NEON::BI__builtin_neon_vcvtq_s32_v: + case NEON::BI__builtin_neon_vcvtq_u32_v: + case NEON::BI__builtin_neon_vcvtq_s64_v: + case NEON::BI__builtin_neon_vcvtq_u64_v: + case NEON::BI__builtin_neon_vcvtq_s16_f16: + case NEON::BI__builtin_neon_vcvtq_u16_f16: + case NEON::BI__builtin_neon_vcvta_s16_f16: + case NEON::BI__builtin_neon_vcvta_s32_v: + case NEON::BI__builtin_neon_vcvta_s64_v: + case NEON::BI__builtin_neon_vcvta_u16_f16: + case NEON::BI__builtin_neon_vcvta_u32_v: + case NEON::BI__builtin_neon_vcvta_u64_v: + case NEON::BI__builtin_neon_vcvtaq_s16_f16: + case NEON::BI__builtin_neon_vcvtaq_s32_v: + case NEON::BI__builtin_neon_vcvtaq_s64_v: + case NEON::BI__builtin_neon_vcvtaq_u16_f16: + case NEON::BI__builtin_neon_vcvtaq_u32_v: + case NEON::BI__builtin_neon_vcvtaq_u64_v: + case NEON::BI__builtin_neon_vcvtn_s16_f16: + case NEON::BI__builtin_neon_vcvtn_s32_v: + case NEON::BI__builtin_neon_vcvtn_s64_v: + case NEON::BI__builtin_neon_vcvtn_u16_f16: + case NEON::BI__builtin_neon_vcvtn_u32_v: + case NEON::BI__builtin_neon_vcvtn_u64_v: + case NEON::BI__builtin_neon_vcvtnq_s16_f16: + case NEON::BI__builtin_neon_vcvtnq_s32_v: + case NEON::BI__builtin_neon_vcvtnq_s64_v: + case NEON::BI__builtin_neon_vcvtnq_u16_f16: + case NEON::BI__builtin_neon_vcvtnq_u32_v: + case NEON::BI__builtin_neon_vcvtnq_u64_v: + case NEON::BI__builtin_neon_vcvtp_s16_f16: + case NEON::BI__builtin_neon_vcvtp_s32_v: + case NEON::BI__builtin_neon_vcvtp_s64_v: + case NEON::BI__builtin_neon_vcvtp_u16_f16: + case NEON::BI__builtin_neon_vcvtp_u32_v: + case NEON::BI__builtin_neon_vcvtp_u64_v: + case NEON::BI__builtin_neon_vcvtpq_s16_f16: + case NEON::BI__builtin_neon_vcvtpq_s32_v: + case NEON::BI__builtin_neon_vcvtpq_s64_v: + case NEON::BI__builtin_neon_vcvtpq_u16_f16: + case NEON::BI__builtin_neon_vcvtpq_u32_v: + case NEON::BI__builtin_neon_vcvtpq_u64_v: + case NEON::BI__builtin_neon_vcvtm_s16_f16: + case NEON::BI__builtin_neon_vcvtm_s32_v: + case NEON::BI__builtin_neon_vcvtm_s64_v: + case NEON::BI__builtin_neon_vcvtm_u16_f16: + case NEON::BI__builtin_neon_vcvtm_u32_v: + case NEON::BI__builtin_neon_vcvtm_u64_v: + case NEON::BI__builtin_neon_vcvtmq_s16_f16: + case NEON::BI__builtin_neon_vcvtmq_s32_v: + case NEON::BI__builtin_neon_vcvtmq_s64_v: + case NEON::BI__builtin_neon_vcvtmq_u16_f16: + case NEON::BI__builtin_neon_vcvtmq_u32_v: + case NEON::BI__builtin_neon_vcvtmq_u64_v: + case NEON::BI__builtin_neon_vcvtx_f32_v: + case NEON::BI__builtin_neon_vext_v: + case NEON::BI__builtin_neon_vextq_v: + case NEON::BI__builtin_neon_vfma_v: + case NEON::BI__builtin_neon_vfmaq_v: + case NEON::BI__builtin_neon_vld1_v: + case NEON::BI__builtin_neon_vld1q_v: + case NEON::BI__builtin_neon_vld1_x2_v: + case NEON::BI__builtin_neon_vld1q_x2_v: + case NEON::BI__builtin_neon_vld1_x3_v: + case NEON::BI__builtin_neon_vld1q_x3_v: + case NEON::BI__builtin_neon_vld1_x4_v: + case NEON::BI__builtin_neon_vld1q_x4_v: + case NEON::BI__builtin_neon_vld2_v: + case NEON::BI__builtin_neon_vld2q_v: + case NEON::BI__builtin_neon_vld3_v: + case NEON::BI__builtin_neon_vld3q_v: + case NEON::BI__builtin_neon_vld4_v: + case NEON::BI__builtin_neon_vld4q_v: + case NEON::BI__builtin_neon_vld2_dup_v: + case NEON::BI__builtin_neon_vld2q_dup_v: + case NEON::BI__builtin_neon_vld3_dup_v: + case NEON::BI__builtin_neon_vld3q_dup_v: + case NEON::BI__builtin_neon_vld4_dup_v: + case NEON::BI__builtin_neon_vld4q_dup_v: + case NEON::BI__builtin_neon_vld1_dup_v: + case NEON::BI__builtin_neon_vld1q_dup_v: + case NEON::BI__builtin_neon_vld2_lane_v: + case NEON::BI__builtin_neon_vld2q_lane_v: + case NEON::BI__builtin_neon_vld3_lane_v: + case NEON::BI__builtin_neon_vld3q_lane_v: + case NEON::BI__builtin_neon_vld4_lane_v: + case NEON::BI__builtin_neon_vld4q_lane_v: + case NEON::BI__builtin_neon_vmovl_v: + case NEON::BI__builtin_neon_vmovn_v: + case NEON::BI__builtin_neon_vmull_v: + case NEON::BI__builtin_neon_vpadal_v: + case NEON::BI__builtin_neon_vpadalq_v: + case NEON::BI__builtin_neon_vpaddl_v: + case NEON::BI__builtin_neon_vpaddlq_v: + case NEON::BI__builtin_neon_vqdmlal_v: + case NEON::BI__builtin_neon_vqdmlsl_v: + case NEON::BI__builtin_neon_vqdmulhq_lane_v: + case NEON::BI__builtin_neon_vqdmulh_lane_v: + case NEON::BI__builtin_neon_vqrdmulhq_lane_v: + case NEON::BI__builtin_neon_vqrdmulh_lane_v: + case NEON::BI__builtin_neon_vqdmulhq_laneq_v: + case NEON::BI__builtin_neon_vqdmulh_laneq_v: + case NEON::BI__builtin_neon_vqrdmulhq_laneq_v: + case NEON::BI__builtin_neon_vqrdmulh_laneq_v: + case NEON::BI__builtin_neon_vqshl_n_v: + case NEON::BI__builtin_neon_vqshlq_n_v: + case NEON::BI__builtin_neon_vqshlu_n_v: + case NEON::BI__builtin_neon_vqshluq_n_v: + case NEON::BI__builtin_neon_vrecpe_v: + case NEON::BI__builtin_neon_vrecpeq_v: + case NEON::BI__builtin_neon_vrsqrte_v: + case NEON::BI__builtin_neon_vrsqrteq_v: + case NEON::BI__builtin_neon_vrndi_v: + case NEON::BI__builtin_neon_vrndiq_v: + case NEON::BI__builtin_neon_vrshr_n_v: + case NEON::BI__builtin_neon_vrshrq_n_v: + case NEON::BI__builtin_neon_vsha512hq_u64: + case NEON::BI__builtin_neon_vsha512h2q_u64: + case NEON::BI__builtin_neon_vsha512su0q_u64: + case NEON::BI__builtin_neon_vsha512su1q_u64: + case NEON::BI__builtin_neon_vshl_n_v: + case NEON::BI__builtin_neon_vshlq_n_v: + case NEON::BI__builtin_neon_vshll_n_v: + case NEON::BI__builtin_neon_vshrn_n_v: + case NEON::BI__builtin_neon_vshr_n_v: + case NEON::BI__builtin_neon_vshrq_n_v: + case NEON::BI__builtin_neon_vst1_v: + case NEON::BI__builtin_neon_vst1q_v: + case NEON::BI__builtin_neon_vst2_v: + case NEON::BI__builtin_neon_vst2q_v: + case NEON::BI__builtin_neon_vst3_v: + case NEON::BI__builtin_neon_vst3q_v: + case NEON::BI__builtin_neon_vst4_v: + case NEON::BI__builtin_neon_vst4q_v: + case NEON::BI__builtin_neon_vst2_lane_v: + case NEON::BI__builtin_neon_vst2q_lane_v: + case NEON::BI__builtin_neon_vst3_lane_v: + case NEON::BI__builtin_neon_vst3q_lane_v: + case NEON::BI__builtin_neon_vst4_lane_v: + case NEON::BI__builtin_neon_vst4q_lane_v: + case NEON::BI__builtin_neon_vsm3partw1q_u32: + case NEON::BI__builtin_neon_vsm3partw2q_u32: + case NEON::BI__builtin_neon_vsm3ss1q_u32: + case NEON::BI__builtin_neon_vsm4ekeyq_u32: + case NEON::BI__builtin_neon_vsm4eq_u32: + case NEON::BI__builtin_neon_vsm3tt1aq_u32: + case NEON::BI__builtin_neon_vsm3tt1bq_u32: + case NEON::BI__builtin_neon_vsm3tt2aq_u32: + case NEON::BI__builtin_neon_vsm3tt2bq_u32: + case NEON::BI__builtin_neon_vst1_x2_v: + case NEON::BI__builtin_neon_vst1q_x2_v: + case NEON::BI__builtin_neon_vst1_x3_v: + case NEON::BI__builtin_neon_vst1q_x3_v: + case NEON::BI__builtin_neon_vst1_x4_v: + case NEON::BI__builtin_neon_vst1q_x4_v: + case NEON::BI__builtin_neon_vsubhn_v: + case NEON::BI__builtin_neon_vtrn_v: + case NEON::BI__builtin_neon_vtrnq_v: + case NEON::BI__builtin_neon_vtst_v: + case NEON::BI__builtin_neon_vtstq_v: + case NEON::BI__builtin_neon_vuzp_v: + case NEON::BI__builtin_neon_vuzpq_v: + case NEON::BI__builtin_neon_vxarq_u64: + case NEON::BI__builtin_neon_vzip_v: + case NEON::BI__builtin_neon_vzipq_v: + case NEON::BI__builtin_neon_vdot_s32: + case NEON::BI__builtin_neon_vdot_u32: + case NEON::BI__builtin_neon_vdotq_s32: + case NEON::BI__builtin_neon_vdotq_u32: + case NEON::BI__builtin_neon_vfmlal_low_f16: + case NEON::BI__builtin_neon_vfmlalq_low_f16: + case NEON::BI__builtin_neon_vfmlsl_low_f16: + case NEON::BI__builtin_neon_vfmlslq_low_f16: + case NEON::BI__builtin_neon_vfmlal_high_f16: + case NEON::BI__builtin_neon_vfmlalq_high_f16: + case NEON::BI__builtin_neon_vfmlsl_high_f16: + case NEON::BI__builtin_neon_vfmlslq_high_f16: + case NEON::BI__builtin_neon_vmmlaq_s32: + case NEON::BI__builtin_neon_vmmlaq_u32: + case NEON::BI__builtin_neon_vusmmlaq_s32: + case NEON::BI__builtin_neon_vusdot_s32: + case NEON::BI__builtin_neon_vusdotq_s32: + case NEON::BI__builtin_neon_vbfdot_f32: + case NEON::BI__builtin_neon_vbfdotq_f32: + case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: + default: + cgf.cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + ctx.BuiltinInfo.getName(builtinID)); + return mlir::Value{}; + + cgf.cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + ctx.BuiltinInfo.getName(builtinID)); + return mlir::Value{}; + } } // Emit an intrinsic where all operands are of the same type as the result. @@ -243,7 +955,7 @@ static unsigned getSVEMinEltCount(clang::SVETypeFlags::EltType sveType) { } } -// TODO: Share with OGCG +// TODO(cir): Share with OGCG constexpr unsigned sveBitsPerBlock = 128; static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, @@ -261,7 +973,7 @@ static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, /// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function /// should be kept consistent with the logic in Sema. /// TODO: Make this return false for SISD builtins. -/// TODO: Share this with ARM.cpp +/// TODO(cir): Share this with ARM.cpp static bool hasExtraNeonArgument(unsigned builtinID) { // Required by the headers included below, but not in this particular // function. @@ -290,64 +1002,6 @@ static bool hasExtraNeonArgument(unsigned builtinID) { return mask != 0; } -// TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone. -static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags, - mlir::Location loc, - bool hasLegalHalfType = true, - bool v1Ty = false, - bool allowBFloatArgsAndRet = true) { - int isQuad = typeFlags.isQuad(); - switch (typeFlags.getEltType()) { - case NeonTypeFlags::Int8: - case NeonTypeFlags::Poly8: - return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty - : cgf->sInt8Ty, - v1Ty ? 1 : (8 << isQuad)); - case NeonTypeFlags::MFloat8: - cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: MFloat8")); - [[fallthrough]]; - case NeonTypeFlags::Int16: - case NeonTypeFlags::Poly16: - return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty - : cgf->sInt16Ty, - v1Ty ? 1 : (4 << isQuad)); - case NeonTypeFlags::BFloat16: - if (allowBFloatArgsAndRet) - cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16")); - else - cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16")); - [[fallthrough]]; - case NeonTypeFlags::Float16: - if (hasLegalHalfType) - cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16")); - else - cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16")); - [[fallthrough]]; - case NeonTypeFlags::Int32: - return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty - : cgf->sInt32Ty, - v1Ty ? 1 : (2 << isQuad)); - case NeonTypeFlags::Int64: - case NeonTypeFlags::Poly64: - return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty - : cgf->sInt64Ty, - v1Ty ? 1 : (1 << isQuad)); - case NeonTypeFlags::Poly128: - // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. - // There is a lot of i128 and f128 API missing. - // so we use v16i8 to represent poly128 and get pattern matched. - cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128")); - [[fallthrough]]; - case NeonTypeFlags::Float32: - return cir::VectorType::get(cgf->getCIRGenModule().floatTy, - v1Ty ? 1 : (2 << isQuad)); - case NeonTypeFlags::Float64: - return cir::VectorType::get(cgf->getCIRGenModule().doubleTy, - v1Ty ? 1 : (1 << isQuad)); - } - llvm_unreachable("Unknown vector element type!"); -} - // TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone. template <typename Operation> static mlir::Value @@ -1584,6 +2238,18 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, mlir::Location loc = getLoc(expr->getExprLoc()); + // Not all intrinsics handled by the common case work for AArch64 yet, so only + // defer to common code if it's been added to our special map. + const ARMVectorIntrinsicInfo *builtin; + builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID, + aarch64SIMDIntrinsicsProvenSorted); + + if (builtin) + return emitCommonNeonBuiltinExpr( + *this, builtin->builtinID, builtin->llvmIntrinsic, + builtin->altLLVMIntrinsic, builtin->nameHint, builtin->typeModifier, + expr, ops); + // Handle non-overloaded intrinsics first. switch (builtinID) { default: diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index 157dc3fdd56f..61ccd85cd634 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -1021,6 +1021,16 @@ CIRGenTypes::arrangeFunctionDeclaration(const FunctionDecl *fd) { return arrangeFreeFunctionType(funcTy.castAs<FunctionProtoType>()); } +RValue CallArg::getRValue(CIRGenFunction &cgf, mlir::Location loc) const { + if (!hasLV) + return rv; + LValue copy = cgf.makeAddrLValue(cgf.createMemTemp(ty, loc), ty); + cgf.emitAggregateCopy(copy, lv, ty, AggValueSlot::DoesNotOverlap, + lv.isVolatile()); + isUsed = true; + return RValue::getAggregate(copy.getAddress()); +} + static cir::CIRCallOpInterface emitCallLikeOp(CIRGenFunction &cgf, mlir::Location callLoc, cir::FuncType indirectFuncTy, mlir::Value indirectFuncVal, diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.h b/clang/lib/CIR/CodeGen/CIRGenCall.h index 347bd4a7c826..b30b4969ca45 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.h +++ b/clang/lib/CIR/CodeGen/CIRGenCall.h @@ -202,7 +202,7 @@ private: /// A data-flow flag to make sure getRValue and/or copyInto are not /// called twice for duplicated IR emission. - [[maybe_unused]] mutable bool isUsed; + mutable bool isUsed; public: clang::QualType ty; @@ -215,6 +215,10 @@ public: bool hasLValue() const { return hasLV; } + /// \returns an independent RValue. If the CallArg contains an LValue, + /// a temporary copy is returned. + RValue getRValue(CIRGenFunction &cgf, mlir::Location loc) const; + LValue getKnownLValue() const { assert(hasLV && !isUsed); return lv; diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp index bdb2947200f2..cbed8452810c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp @@ -95,7 +95,6 @@ void *EHScopeStack::pushCleanup(CleanupKind kind, size_t size) { bool isLifetimeMarker = kind & LifetimeMarker; bool skipCleanupScope = false; - assert(!cir::MissingFeatures::innermostEHScope()); cir::CleanupKind cleanupKind = cir::CleanupKind::All; if (isEHCleanup && cgf->getLangOpts().Exceptions) { cleanupKind = @@ -193,6 +192,25 @@ bool EHScopeStack::requiresCatchOrCleanup() const { return false; } +/// Deactive a cleanup that was created in an active state. +void CIRGenFunction::deactivateCleanupBlock(EHScopeStack::stable_iterator c, + mlir::Operation *dominatingIP) { + assert(c != ehStack.stable_end() && "deactivating bottom of stack?"); + EHCleanupScope &scope = cast<EHCleanupScope>(*ehStack.find(c)); + assert(scope.isActive() && "double deactivation"); + + // If it's the top of the stack, just pop it, but do so only if it belongs + // to the current RunCleanupsScope. + if (c == ehStack.stable_begin() && + currentCleanupStackDepth.strictlyEncloses(c)) { + popCleanupBlock(); + return; + } + + // Otherwise, follow the general case. + cgm.errorNYI("deactivateCleanupBlock: setupCleanupBlockActivation"); +} + static void emitCleanup(CIRGenFunction &cgf, cir::CleanupScopeOp cleanupScope, EHScopeStack::Cleanup *cleanup, EHScopeStack::Cleanup::Flags flags) { @@ -245,10 +263,11 @@ void CIRGenFunction::popCleanupBlock() { bool hasFallthrough = fallthroughSource != nullptr && isActive; bool requiresNormalCleanup = scope.isNormalCleanup() && hasFallthrough; + bool requiresEHCleanup = scope.isEHCleanup() && hasFallthrough; // If we don't need the cleanup at all, we're done. assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup()); - if (!requiresNormalCleanup) { + if (!requiresNormalCleanup && !requiresEHCleanup) { ehStack.popCleanup(); return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 97f496c89ab0..35f74e7120b0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -20,6 +20,7 @@ #include "clang/AST/ExprObjC.h" #include "clang/Basic/OperatorKinds.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/Support/TrailingObjects.h" using namespace clang; using namespace clang::CIRGen; @@ -647,6 +648,209 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e, return size; } +/// Emit a call to an operator new or operator delete function, as implicitly +/// created by new-expressions and delete-expressions. +static RValue emitNewDeleteCall(CIRGenFunction &cgf, + const FunctionDecl *calleeDecl, + const FunctionProtoType *calleeType, + const CallArgList &args) { + cir::CIRCallOpInterface callOrTryCall; + cir::FuncOp calleePtr = cgf.cgm.getAddrOfFunction(calleeDecl); + CIRGenCallee callee = + CIRGenCallee::forDirect(calleePtr, GlobalDecl(calleeDecl)); + RValue rv = + cgf.emitCall(cgf.cgm.getTypes().arrangeFreeFunctionCall(args, calleeType), + callee, ReturnValueSlot(), args, &callOrTryCall); + + /// C++1y [expr.new]p10: + /// [In a new-expression,] an implementation is allowed to omit a call + /// to a replaceable global allocation function. + /// + /// We model such elidable calls with the 'builtin' attribute. + assert(!cir::MissingFeatures::attributeBuiltin()); + return rv; +} + +RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type, + const CallExpr *callExpr, + OverloadedOperatorKind op) { + CallArgList args; + emitCallArgs(args, type, callExpr->arguments()); + // Find the allocation or deallocation function that we're calling. + ASTContext &astContext = getContext(); + assert(op == OO_New || op == OO_Delete); + DeclarationName name = astContext.DeclarationNames.getCXXOperatorName(op); + + clang::DeclContextLookupResult lookupResult = + astContext.getTranslationUnitDecl()->lookup(name); + for (const NamedDecl *decl : lookupResult) { + if (const auto *funcDecl = dyn_cast<FunctionDecl>(decl)) { + if (astContext.hasSameType(funcDecl->getType().getTypePtr(), type)) { + if (sanOpts.has(SanitizerKind::AllocToken)) { + // TODO: Set !alloc_token metadata. + assert(!cir::MissingFeatures::allocToken()); + cgm.errorNYI("Alloc token sanitizer not yet supported!"); + } + + // Emit the call to operator new/delete. + return emitNewDeleteCall(*this, funcDecl, type, args); + } + } + } + + llvm_unreachable("predeclared global operator new/delete is missing"); +} + +namespace { +template <typename Traits> struct PlacementArg { + typename Traits::RValueTy argValue; + QualType argType; +}; + +/// A cleanup to call the given 'operator delete' function upon abnormal +/// exit from a new expression. Templated on a traits type that deals with +/// ensuring that the arguments dominate the cleanup if necessary. +template <typename Traits> +class CallDeleteDuringNew final + : public EHScopeStack::Cleanup, + private llvm::TrailingObjects<CallDeleteDuringNew<Traits>, + PlacementArg<Traits>> { + using TrailingObj = + llvm::TrailingObjects<CallDeleteDuringNew<Traits>, PlacementArg<Traits>>; + friend TrailingObj; + using TrailingObj::getTrailingObjects; + + /// Type used to hold llvm::Value*s. + typedef typename Traits::ValueTy ValueTy; + /// Type used to hold RValues. + typedef typename Traits::RValueTy RValueTy; + + unsigned numPlacementArgs : 30; + LLVM_PREFERRED_TYPE(AlignedAllocationMode) + unsigned passAlignmentToPlacementDelete : 1; + const FunctionDecl *operatorDelete; + ValueTy ptr; + ValueTy allocSize; + CharUnits allocAlign; + + PlacementArg<Traits> *getPlacementArgs() { return getTrailingObjects(); } + + void setPlacementArg(unsigned i, RValueTy argValue, QualType argType) { + assert(i < numPlacementArgs && "index out of range"); + getPlacementArgs()[i] = {argValue, argType}; + } + +public: + static size_t getExtraSize(size_t numPlacementArgs) { + return TrailingObj::template additionalSizeToAlloc<PlacementArg<Traits>>( + numPlacementArgs); + } + + CallDeleteDuringNew(size_t numPlacementArgs, + const FunctionDecl *operatorDelete, ValueTy ptr, + ValueTy allocSize, + const ImplicitAllocationParameters &iap, + CharUnits allocAlign, const CallArgList *newArgs, + unsigned numNonPlacementArgs, CIRGenFunction *cgf, + mlir::Location loc) + : numPlacementArgs(numPlacementArgs), + passAlignmentToPlacementDelete(isAlignedAllocation(iap.PassAlignment)), + operatorDelete(operatorDelete), ptr(ptr), allocSize(allocSize), + allocAlign(allocAlign) { + for (unsigned i = 0, n = numPlacementArgs; i != n; ++i) { + const CallArg &arg = (*newArgs)[i + numNonPlacementArgs]; + setPlacementArg(i, arg.getRValue(*cgf, loc), arg.ty); + } + } + + void emit(CIRGenFunction &cgf, Flags flags) override { + const auto *fpt = operatorDelete->getType()->castAs<FunctionProtoType>(); + CallArgList deleteArgs; + + unsigned firstNonTypeArg = 0; + TypeAwareAllocationMode typeAwareDeallocation = TypeAwareAllocationMode::No; + assert(!cir::MissingFeatures::typeAwareAllocation()); + + // The first argument after type-identity parameter (if any) is always + // a void* (or C* for a destroying operator delete for class type C). + deleteArgs.add(Traits::get(cgf, ptr), fpt->getParamType(firstNonTypeArg)); + + // Figure out what other parameters we should be implicitly passing. + UsualDeleteParams params; + if (numPlacementArgs) { + // A placement deallocation function is implicitly passed an alignment + // if the placement allocation function was, but is never passed a size. + params.Alignment = + alignedAllocationModeFromBool(passAlignmentToPlacementDelete); + params.TypeAwareDelete = typeAwareDeallocation; + params.Size = isTypeAwareAllocation(params.TypeAwareDelete); + } else { + // For a non-placement new-expression, 'operator delete' can take a + // size and/or an alignment if it has the right parameters. + params = operatorDelete->getUsualDeleteParams(); + } + + assert(!params.DestroyingDelete && + "should not call destroying delete in a new-expression"); + + // The second argument can be a std::size_t (for non-placement delete). + if (params.Size) + deleteArgs.add(Traits::get(cgf, allocSize), + cgf.getContext().getSizeType()); + + // The next (second or third) argument can be a std::align_val_t, which + // is an enum whose underlying type is std::size_t. + // FIXME: Use the right type as the parameter type. Note that in a call + // to operator delete(size_t, ...), we may not have it available. + if (isAlignedAllocation(params.Alignment)) + cgf.cgm.errorNYI("CallDeleteDuringNew: aligned allocation"); + + // Pass the rest of the arguments, which must match exactly. + for (unsigned i = 0; i != numPlacementArgs; ++i) { + auto arg = getPlacementArgs()[i]; + deleteArgs.add(Traits::get(cgf, arg.argValue), arg.argType); + } + + // Call 'operator delete'. + emitNewDeleteCall(cgf, operatorDelete, fpt, deleteArgs); + } +}; +} // namespace + +/// Enter a cleanup to call 'operator delete' if the initializer in a +/// new-expression throws. +static void enterNewDeleteCleanup(CIRGenFunction &cgf, const CXXNewExpr *e, + Address newPtr, mlir::Value allocSize, + CharUnits allocAlign, + const CallArgList &newArgs) { + unsigned numNonPlacementArgs = e->getNumImplicitArgs(); + + // If we're not inside a conditional branch, then the cleanup will + // dominate and we can do the easier (and more efficient) thing. + if (!cgf.isInConditionalBranch()) { + struct DirectCleanupTraits { + typedef mlir::Value ValueTy; + typedef RValue RValueTy; + static RValue get(CIRGenFunction &, ValueTy v) { return RValue::get(v); } + static RValue get(CIRGenFunction &, RValueTy v) { return v; } + }; + + typedef CallDeleteDuringNew<DirectCleanupTraits> DirectCleanup; + + assert(!cir::MissingFeatures::typeAwareAllocation()); + cgf.ehStack.pushCleanupWithExtra<DirectCleanup>( + EHCleanup, e->getNumPlacementArgs(), e->getOperatorDelete(), + newPtr.getPointer(), allocSize, e->implicitAllocationParameters(), + allocAlign, &newArgs, numNonPlacementArgs, &cgf, + cgf.getLoc(e->getSourceRange())); + + return; + } + + cgf.cgm.errorNYI(e->getSourceRange(), + "enterNewDeleteCleanup: conditional branch"); +} + static void storeAnyExprIntoOneUnit(CIRGenFunction &cgf, const Expr *init, QualType allocType, Address newPtr, AggValueSlot::Overlap_t mayOverlap) { @@ -912,59 +1116,6 @@ RValue CIRGenFunction::emitCXXPseudoDestructorExpr( return RValue::get(nullptr); } -/// Emit a call to an operator new or operator delete function, as implicitly -/// created by new-expressions and delete-expressions. -static RValue emitNewDeleteCall(CIRGenFunction &cgf, - const FunctionDecl *calleeDecl, - const FunctionProtoType *calleeType, - const CallArgList &args) { - cir::CIRCallOpInterface callOrTryCall; - cir::FuncOp calleePtr = cgf.cgm.getAddrOfFunction(calleeDecl); - CIRGenCallee callee = - CIRGenCallee::forDirect(calleePtr, GlobalDecl(calleeDecl)); - RValue rv = - cgf.emitCall(cgf.cgm.getTypes().arrangeFreeFunctionCall(args, calleeType), - callee, ReturnValueSlot(), args, &callOrTryCall); - - /// C++1y [expr.new]p10: - /// [In a new-expression,] an implementation is allowed to omit a call - /// to a replaceable global allocation function. - /// - /// We model such elidable calls with the 'builtin' attribute. - assert(!cir::MissingFeatures::attributeBuiltin()); - return rv; -} - -RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type, - const CallExpr *callExpr, - OverloadedOperatorKind op) { - CallArgList args; - emitCallArgs(args, type, callExpr->arguments()); - // Find the allocation or deallocation function that we're calling. - ASTContext &astContext = getContext(); - assert(op == OO_New || op == OO_Delete); - DeclarationName name = astContext.DeclarationNames.getCXXOperatorName(op); - - clang::DeclContextLookupResult lookupResult = - astContext.getTranslationUnitDecl()->lookup(name); - for (const auto *decl : lookupResult) { - if (const auto *funcDecl = dyn_cast<FunctionDecl>(decl)) { - if (astContext.hasSameType(funcDecl->getType(), QualType(type, 0))) { - if (sanOpts.has(SanitizerKind::AllocToken)) { - // TODO: Set !alloc_token metadata. - assert(!cir::MissingFeatures::allocToken()); - cgm.errorNYI("Alloc token sanitizer not yet supported!"); - } - - // Emit the call to operator new/delete. - return emitNewDeleteCall(*this, funcDecl, type, args); - } - } - } - - llvm_unreachable("predeclared global operator new/delete is missing"); -} - namespace { /// Calls the given 'operator delete' on a single object. struct CallObjectDelete final : EHScopeStack::Cleanup { @@ -1190,10 +1341,24 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: null check"); // If there's an operator delete, enter a cleanup to call it if an - // exception is thrown. - if (e->getOperatorDelete() && - !e->getOperatorDelete()->isReservedGlobalPlacementOperator()) - cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: operator delete"); + // exception is thrown. If we do this, we'll be creating the result pointer + // inside a cleanup scope, either with a bitcast or an offset based on the + // array cookie size. However, we need to return that pointer from outside + // the cleanup scope, so we need to store it in a temporary variable. + bool useNewDeleteCleanup = + e->getOperatorDelete() && + !e->getOperatorDelete()->isReservedGlobalPlacementOperator(); + EHScopeStack::stable_iterator operatorDeleteCleanup; + mlir::Operation *cleanupDominator = nullptr; + if (useNewDeleteCleanup) { + assert(!cir::MissingFeatures::typeAwareAllocation()); + enterNewDeleteCleanup(*this, e, allocation, allocSize, allocAlign, + allocatorArgs); + operatorDeleteCleanup = ehStack.stable_begin(); + cleanupDominator = + cir::UnreachableOp::create(builder, getLoc(e->getSourceRange())) + .getOperation(); + } if (allocSize != allocSizeWithoutCookie) { assert(e->isArray()); @@ -1212,6 +1377,16 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { Address result = builder.createElementBitCast(getLoc(e->getSourceRange()), allocation, elementTy); + // If we're inside a new delete cleanup, store the result pointer. + Address resultPtr = Address::invalid(); + if (useNewDeleteCleanup) { + resultPtr = + createTempAlloca(builder.getPointerTo(elementTy), result.getAlignment(), + getLoc(e->getSourceRange()), "__new_result"); + builder.createStore(getLoc(e->getSourceRange()), result.getPointer(), + resultPtr); + } + // Passing pointer through launder.invariant.group to avoid propagation of // vptrs information which may be included in previous type. // To not break LTO with different optimizations levels, we do it regardless @@ -1224,6 +1399,21 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { emitNewInitializer(*this, e, allocType, elementTy, result, numElements, allocSizeWithoutCookie); + + // Deactivate the 'operator delete' cleanup if we finished + // initialization. + if (useNewDeleteCleanup) { + assert(operatorDeleteCleanup.isValid()); + assert(resultPtr.isValid()); + deactivateCleanupBlock(operatorDeleteCleanup, cleanupDominator); + cleanupDominator->erase(); + cir::LoadOp loadResult = + builder.createLoad(getLoc(e->getSourceRange()), resultPtr); + result = result.withPointer(loadResult.getResult()); + } + + assert(!cir::MissingFeatures::exprNewNullCheck()); + return result.getPointer(); } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 539d7839d1df..0e82958ef6f3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -970,6 +970,16 @@ public: ArrayRef<mlir::Value *> valuesToReload = {}); void popCleanupBlock(); + /// Deactivates the given cleanup block. The block cannot be reactivated. Pops + /// it if it's the top of the stack. + /// + /// \param DominatingIP - An instruction which is known to + /// dominate the current IP (if set) and which lies along + /// all paths of execution between the current IP and the + /// the point at which the cleanup comes into scope. + void deactivateCleanupBlock(EHScopeStack::stable_iterator cleanup, + mlir::Operation *dominatingIP); + /// Push a cleanup to be run at the end of the current full-expression. Safe /// against the possibility that we're currently inside a /// conditionally-evaluated expression. diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h index 9d614c858dbe..09b78820a258 100644 --- a/clang/lib/CIR/CodeGen/EHScopeStack.h +++ b/clang/lib/CIR/CodeGen/EHScopeStack.h @@ -187,6 +187,25 @@ public: [[maybe_unused]] Cleanup *obj = new (buffer) T(a...); } + /// Push a cleanup with non-constant storage requirements on the + /// stack. The cleanup type must provide an additional static method: + /// static size_t getExtraSize(size_t); + /// The argument to this method will be the value N, which will also + /// be passed as the first argument to the constructor. + /// + /// The data stored in the extra storage must obey the same + /// restrictions as normal cleanup member data. + /// + /// The pointer returned from this method is valid until the cleanup + /// stack is modified. + template <class T, class... As> + T *pushCleanupWithExtra(CleanupKind kind, size_t n, As... a) { + static_assert(alignof(T) <= ScopeStackAlignment, + "Cleanup's alignment is too large."); + void *buffer = pushCleanup(kind, sizeof(T) + T::getExtraSize(n)); + return new (buffer) T(n, a...); + } + void setCGF(CIRGenFunction *inCGF) { cgf = inCGF; } /// Pops a cleanup scope off the stack. This is private to CIRGenCleanup.cpp. diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 47b7e2b18d94..d6687b8e295e 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -662,6 +662,54 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return Builder.CreateIntrinsic( RetTy, CGM.getHLSLRuntime().getSampleCmpLevelZeroIntrinsic(), Args); } + case Builtin::BI__builtin_hlsl_resource_gather: { + Value *HandleOp = EmitScalarExpr(E->getArg(0)); + Value *SamplerOp = EmitScalarExpr(E->getArg(1)); + Value *CoordOp = EmitScalarExpr(E->getArg(2)); + Value *ComponentOp = EmitScalarExpr(E->getArg(3)); + if (ComponentOp->getType() != Builder.getInt32Ty()) + ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(), + /*isSigned=*/false); + + SmallVector<Value *, 5> Args; + Args.push_back(HandleOp); + Args.push_back(SamplerOp); + Args.push_back(CoordOp); + Args.push_back(ComponentOp); + Args.push_back(emitHlslOffset(*this, E, 4)); + + llvm::Type *RetTy = ConvertType(E->getType()); + return Builder.CreateIntrinsic( + RetTy, CGM.getHLSLRuntime().getGatherIntrinsic(), Args); + } + case Builtin::BI__builtin_hlsl_resource_gather_cmp: { + Value *HandleOp = EmitScalarExpr(E->getArg(0)); + Value *SamplerOp = EmitScalarExpr(E->getArg(1)); + Value *CoordOp = EmitScalarExpr(E->getArg(2)); + Value *CompareOp = EmitScalarExpr(E->getArg(3)); + if (CompareOp->getType() != Builder.getFloatTy()) + CompareOp = Builder.CreateFPCast(CompareOp, Builder.getFloatTy()); + + SmallVector<Value *, 6> Args; + Args.push_back(HandleOp); + Args.push_back(SamplerOp); + Args.push_back(CoordOp); + Args.push_back(CompareOp); + + if (CGM.getTarget().getTriple().isDXIL()) { + Value *ComponentOp = EmitScalarExpr(E->getArg(4)); + if (ComponentOp->getType() != Builder.getInt32Ty()) + ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(), + /*isSigned=*/false); + Args.push_back(ComponentOp); + } + + Args.push_back(emitHlslOffset(*this, E, 5)); + + llvm::Type *RetTy = ConvertType(E->getType()); + return Builder.CreateIntrinsic( + RetTy, CGM.getHLSLRuntime().getGatherCmpIntrinsic(), Args); + } case Builtin::BI__builtin_hlsl_resource_load_with_status: case Builtin::BI__builtin_hlsl_resource_load_with_status_typed: { Value *HandleOp = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index aa5fed1ad751..466c809fdef7 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -175,6 +175,8 @@ public: GENERATE_HLSL_INTRINSIC_FUNCTION(SampleCmpClamp, resource_samplecmp_clamp) GENERATE_HLSL_INTRINSIC_FUNCTION(SampleCmpLevelZero, resource_samplecmplevelzero) + GENERATE_HLSL_INTRINSIC_FUNCTION(Gather, resource_gather) + GENERATE_HLSL_INTRINSIC_FUNCTION(GatherCmp, resource_gather_cmp) GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, resource_handlefrombinding) GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromImplicitBinding, diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 0658ecc93d88..ad31ecc75b01 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtSYCL.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticSema.h" @@ -99,6 +100,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::SEHExceptStmtClass: case Stmt::SEHFinallyStmtClass: case Stmt::MSDependentExistsStmtClass: + case Stmt::UnresolvedSYCLKernelCallStmtClass: llvm_unreachable("invalid statement class to emit generically"); case Stmt::NullStmtClass: case Stmt::CompoundStmtClass: @@ -543,21 +545,7 @@ bool CodeGenFunction::EmitSimpleStmt(const Stmt *S, EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S)); break; case Stmt::SYCLKernelCallStmtClass: - // SYCL kernel call statements are generated as wrappers around the body - // of functions declared with the sycl_kernel_entry_point attribute. Such - // functions are used to specify how a SYCL kernel (a function object) is - // to be invoked; the SYCL kernel call statement contains a transformed - // variation of the function body and is used to generate a SYCL kernel - // caller function; a function that serves as the device side entry point - // used to execute the SYCL kernel. The sycl_kernel_entry_point attributed - // function is invoked by host code in order to trigger emission of the - // device side SYCL kernel caller function and to generate metadata needed - // by SYCL run-time library implementations; the function is otherwise - // intended to have no effect. As such, the function body is not evaluated - // as part of the invocation during host compilation (and the function - // should not be called or emitted during device compilation); the SYCL - // kernel call statement is thus handled as a null statement for the - // purpose of code generation. + EmitSYCLKernelCallStmt(cast<SYCLKernelCallStmt>(*S)); break; } return true; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index ae2956eeac57..c3d470b179dc 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3675,6 +3675,8 @@ public: LValue EmitCoyieldLValue(const CoyieldExpr *E); RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID); + void EmitSYCLKernelCallStmt(const SYCLKernelCallStmt &S); + void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false); void ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false); diff --git a/clang/lib/CodeGen/CodeGenSYCL.cpp b/clang/lib/CodeGen/CodeGenSYCL.cpp index 7d66d96ad0a1..5a52675de299 100644 --- a/clang/lib/CodeGen/CodeGenSYCL.cpp +++ b/clang/lib/CodeGen/CodeGenSYCL.cpp @@ -13,10 +13,23 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include <cassert> using namespace clang; using namespace CodeGen; +void CodeGenFunction::EmitSYCLKernelCallStmt(const SYCLKernelCallStmt &S) { + // SYCLKernelCallStmt instances are only injected in the definitions of + // functions declared with the sycl_kernel_entry_point attribute. ODR-use of + // such a function in code emitted during device compilation should be + // diagnosed. Thus, any attempt to emit a SYCLKernelCallStmt during device + // compilation indicates a missing diagnostic. + assert(!getLangOpts().SYCLIsDevice && + "Attempt to emit a SYCL kernel call statement during device" + " compilation"); + EmitStmt(S.getKernelLaunchStmt()); +} + static void SetSYCLKernelAttributes(llvm::Function *Fn, CodeGenFunction &CGF) { // SYCL 2020 device language restrictions require forward progress and // disallow recursion. diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index bff1ed3d2ec1..72d5cb804011 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -89,6 +89,11 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion; + // Do not emit __oclc_ABI_version references with non-empty environment. + if (Cov == CodeObjectVersionKind::COV_None && + CGF.getTarget().getTriple().hasEnvironment()) + Cov = CodeObjectVersionKind::COV_6; + if (Cov == CodeObjectVersionKind::COV_None) { StringRef Name = "__oclc_ABI_version"; auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 45c717d6c5ba..cdee440a5c60 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -534,6 +534,11 @@ Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, return Builder.CreateAShr(Vec, Shift, name); } +//===----------------------------------------------------------------------===// +// Intrinsics maps +// +// Maps that help automate code-generation. +//===----------------------------------------------------------------------===// enum { AddRetType = (1 << 0), Add1ArgType = (1 << 1), @@ -556,6 +561,12 @@ enum { AddRetType | VectorizeRetType | Add1ArgType | InventFloatType }; +//===----------------------------------------------------------------------===// +// Intrinsic maps +// +// Maps that help automate code-generation. +//===----------------------------------------------------------------------===// + namespace { struct ARMVectorIntrinsicInfo { const char *NameHint; @@ -1654,6 +1665,8 @@ static bool AArch64SISDIntrinsicsProvenSorted = false; static bool AArch64SVEIntrinsicsProvenSorted = false; static bool AArch64SMEIntrinsicsProvenSorted = false; +// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns +// the corresponding info struct. static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted) { @@ -1783,7 +1796,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch) { - // Get the last argument, which specifies the vector type. + + // Extract the trailing immediate argument that encodes the type discriminator + // for this overloaded intrinsic. + // TODO: Move to the parent code that takes care of argument processing. const Expr *Arg = E->getArg(E->getNumArgs() - 1); std::optional<llvm::APSInt> NeonTypeConst = Arg->getIntegerConstantExpr(getContext()); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index afa1884d94b7..420340aaab88 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3082,7 +3082,7 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, InputTypeArg->claim(); // stdin must be handled specially. - if (memcmp(Value, "-", 2) == 0) { + if (strcmp(Value, "-") == 0) { if (IsFlangMode()) { Ty = types::TY_Fortran; } else if (IsDXCMode()) { diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index dd774f7319bb..60914d9b2cbc 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1214,6 +1214,9 @@ std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompileImpl( // Make a copy for the new instance. Instance.FailedModules = FailedModules; + // Pass along the GenModuleActionWrapper callback. + Instance.setGenModuleActionWrapper(getGenModuleActionWrapper()); + if (GetDependencyDirectives) Instance.GetDependencyDirectives = GetDependencyDirectives->cloneFor(Instance.getFileManager()); @@ -1268,8 +1271,14 @@ bool CompilerInstance::compileModule(SourceLocation ImportLoc, // thread so that we get a stack large enough. bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnNewStack( [&]() { - GenerateModuleFromModuleMapAction Action; - Instance.ExecuteAction(Action); + std::unique_ptr<FrontendAction> Action = + std::make_unique<GenerateModuleFromModuleMapAction>(); + + if (auto WrapGenModuleAction = Instance.getGenModuleActionWrapper()) + Action = WrapGenModuleAction(Instance.getFrontendOpts(), + std::move(Action)); + + Instance.ExecuteAction(*Action); }, DesiredStackSize); diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index f03b14058db4..492f7b1742be 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -476,6 +476,10 @@ private: return "TypeAliasTemplateInstantiation"; case CodeSynthesisContext::PartialOrderingTTP: return "PartialOrderingTTP"; + case CodeSynthesisContext::SYCLKernelLaunchLookup: + return "SYCLKernelLaunchLookup"; + case CodeSynthesisContext::SYCLKernelLaunchOverloadResolution: + return "SYCLKernelLaunchOverloadResolution"; } return ""; } diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp index 91b34838d572..f99c16c8fe92 100644 --- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp +++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp @@ -16,6 +16,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/HLSLResource.h" #include "clang/AST/Stmt.h" @@ -1304,7 +1305,7 @@ BuiltinTypeDeclBuilder & BuiltinTypeDeclBuilder::addSampleMethods(ResourceDimension Dim) { assert(!Record->isCompleteDefinition() && "record is already complete"); ASTContext &AST = Record->getASTContext(); - QualType ReturnType = getFirstTemplateTypeParam(); + QualType ReturnType = getHandleElementType(); QualType SamplerStateType = lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext()); uint32_t VecSize = getResourceDimensions(Dim); @@ -1352,7 +1353,7 @@ BuiltinTypeDeclBuilder & BuiltinTypeDeclBuilder::addSampleBiasMethods(ResourceDimension Dim) { assert(!Record->isCompleteDefinition() && "record is already complete"); ASTContext &AST = Record->getASTContext(); - QualType ReturnType = getFirstTemplateTypeParam(); + QualType ReturnType = getHandleElementType(); QualType SamplerStateType = lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext()); uint32_t VecSize = getResourceDimensions(Dim); @@ -1404,7 +1405,7 @@ BuiltinTypeDeclBuilder & BuiltinTypeDeclBuilder::addSampleGradMethods(ResourceDimension Dim) { assert(!Record->isCompleteDefinition() && "record is already complete"); ASTContext &AST = Record->getASTContext(); - QualType ReturnType = getFirstTemplateTypeParam(); + QualType ReturnType = getHandleElementType(); QualType SamplerStateType = lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext()); uint32_t VecSize = getResourceDimensions(Dim); @@ -1461,7 +1462,7 @@ BuiltinTypeDeclBuilder & BuiltinTypeDeclBuilder::addSampleLevelMethods(ResourceDimension Dim) { assert(!Record->isCompleteDefinition() && "record is already complete"); ASTContext &AST = Record->getASTContext(); - QualType ReturnType = getFirstTemplateTypeParam(); + QualType ReturnType = getHandleElementType(); QualType SamplerStateType = lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext()); uint32_t VecSize = getResourceDimensions(Dim); @@ -1588,6 +1589,129 @@ BuiltinTypeDeclBuilder::addSampleCmpLevelZeroMethods(ResourceDimension Dim) { .finalize(); } +QualType BuiltinTypeDeclBuilder::getGatherReturnType() { + ASTContext &AST = SemaRef.getASTContext(); + QualType T = getHandleElementType(); + if (T.isNull()) + return QualType(); + + if (const auto *VT = T->getAs<VectorType>()) + T = VT->getElementType(); + else if (const auto *DT = T->getAs<DependentSizedExtVectorType>()) + T = DT->getElementType(); + + return AST.getExtVectorType(T, 4); +} + +BuiltinTypeDeclBuilder & +BuiltinTypeDeclBuilder::addGatherMethods(ResourceDimension Dim) { + assert(!Record->isCompleteDefinition() && "record is already complete"); + ASTContext &AST = Record->getASTContext(); + QualType ReturnType = getGatherReturnType(); + + QualType SamplerStateType = + lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext()); + uint32_t VecSize = getResourceDimensions(Dim); + QualType LocationTy = AST.FloatTy; + QualType Float2Ty = AST.getExtVectorType(LocationTy, VecSize); + QualType IntTy = AST.IntTy; + QualType OffsetTy = AST.getExtVectorType(IntTy, VecSize); + using PH = BuiltinTypeMethodBuilder::PlaceHolder; + + // Overloads for Gather, GatherRed, GatherGreen, GatherBlue, GatherAlpha + struct GatherVariant { + const char *Name; + int Component; + }; + GatherVariant Variants[] = {{"Gather", 0}, + {"GatherRed", 0}, + {"GatherGreen", 1}, + {"GatherBlue", 2}, + {"GatherAlpha", 3}}; + + for (const auto &V : Variants) { + // ret GatherVariant(SamplerState s, float2 location) + BuiltinTypeMethodBuilder(*this, V.Name, ReturnType) + .addParam("Sampler", SamplerStateType) + .addParam("Location", Float2Ty) + .accessHandleFieldOnResource(PH::_0) + .callBuiltin("__builtin_hlsl_resource_gather", ReturnType, PH::Handle, + PH::LastStmt, PH::_1, + getConstantUnsignedIntExpr(V.Component)) + .finalize(); + + // ret GatherVariant(SamplerState s, float2 location, int2 offset) + BuiltinTypeMethodBuilder(*this, V.Name, ReturnType) + .addParam("Sampler", SamplerStateType) + .addParam("Location", Float2Ty) + .addParam("Offset", OffsetTy) + .accessHandleFieldOnResource(PH::_0) + .callBuiltin("__builtin_hlsl_resource_gather", ReturnType, PH::Handle, + PH::LastStmt, PH::_1, + getConstantUnsignedIntExpr(V.Component), PH::_2) + .finalize(); + } + + return *this; +} + +BuiltinTypeDeclBuilder & +BuiltinTypeDeclBuilder::addGatherCmpMethods(ResourceDimension Dim) { + assert(!Record->isCompleteDefinition() && "record is already complete"); + ASTContext &AST = Record->getASTContext(); + QualType ReturnType = AST.getExtVectorType(AST.FloatTy, 4); + + QualType SamplerComparisonStateType = lookupBuiltinType( + SemaRef, "SamplerComparisonState", Record->getDeclContext()); + uint32_t VecSize = getResourceDimensions(Dim); + QualType FloatTy = AST.FloatTy; + QualType Float2Ty = AST.getExtVectorType(FloatTy, VecSize); + QualType IntTy = AST.IntTy; + QualType Int2Ty = AST.getExtVectorType(IntTy, VecSize); + using PH = BuiltinTypeMethodBuilder::PlaceHolder; + + // Overloads for GatherCmp, GatherCmpRed, GatherCmpGreen, GatherCmpBlue, + // GatherCmpAlpha + struct GatherVariant { + const char *Name; + int Component; + }; + GatherVariant Variants[] = {{"GatherCmp", 0}, + {"GatherCmpRed", 0}, + {"GatherCmpGreen", 1}, + {"GatherCmpBlue", 2}, + {"GatherCmpAlpha", 3}}; + + for (const auto &V : Variants) { + // ret GatherCmpVariant(SamplerComparisonState s, float2 location, float + // compare_value) + BuiltinTypeMethodBuilder(*this, V.Name, ReturnType) + .addParam("Sampler", SamplerComparisonStateType) + .addParam("Location", Float2Ty) + .addParam("CompareValue", FloatTy) + .accessHandleFieldOnResource(PH::_0) + .callBuiltin("__builtin_hlsl_resource_gather_cmp", ReturnType, + PH::Handle, PH::LastStmt, PH::_1, PH::_2, + getConstantUnsignedIntExpr(V.Component)) + .finalize(); + + // ret GatherCmpVariant(SamplerComparisonState s, float2 location, float + // compare_value, int2 offset) + BuiltinTypeMethodBuilder(*this, V.Name, ReturnType) + .addParam("Sampler", SamplerComparisonStateType) + .addParam("Location", Float2Ty) + .addParam("CompareValue", FloatTy) + .addParam("Offset", Int2Ty) + .accessHandleFieldOnResource(PH::_0) + .callBuiltin("__builtin_hlsl_resource_gather_cmp", ReturnType, + PH::Handle, PH::LastStmt, PH::_1, PH::_2, + getConstantUnsignedIntExpr(V.Component), PH::_3) + .finalize(); + } + + return *this; +} + FieldDecl *BuiltinTypeDeclBuilder::getResourceHandleField() const { auto I = Fields.find("__handle"); assert(I != Fields.end() && @@ -1616,6 +1740,14 @@ QualType BuiltinTypeDeclBuilder::getFirstTemplateTypeParam() { QualType BuiltinTypeDeclBuilder::getHandleElementType() { if (Template) return getFirstTemplateTypeParam(); + + if (auto *PartialSpec = + dyn_cast<ClassTemplatePartialSpecializationDecl>(Record)) { + const auto &Args = PartialSpec->getTemplateArgs(); + if (Args.size() > 0 && Args[0].getKind() == TemplateArgument::Type) + return Args[0].getAsType(); + } + // TODO: Should we default to VoidTy? Using `i8` is arguably ambiguous. return SemaRef.getASTContext().Char8Ty; } @@ -1642,6 +1774,13 @@ Expr *BuiltinTypeDeclBuilder::getConstantIntExpr(int value) { SourceLocation()); } +Expr *BuiltinTypeDeclBuilder::getConstantUnsignedIntExpr(unsigned value) { + ASTContext &AST = SemaRef.getASTContext(); + return IntegerLiteral::Create( + AST, llvm::APInt(AST.getTypeSize(AST.UnsignedIntTy), value), + AST.UnsignedIntTy, SourceLocation()); +} + BuiltinTypeDeclBuilder & BuiltinTypeDeclBuilder::addSimpleTemplateParams(ArrayRef<StringRef> Names, ConceptDecl *CD = nullptr) { diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h index fcb61731c541..c27ff30c6ff7 100644 --- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h +++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h @@ -100,6 +100,8 @@ public: BuiltinTypeDeclBuilder &addSampleLevelMethods(ResourceDimension Dim); BuiltinTypeDeclBuilder &addSampleCmpMethods(ResourceDimension Dim); BuiltinTypeDeclBuilder &addSampleCmpLevelZeroMethods(ResourceDimension Dim); + BuiltinTypeDeclBuilder &addGatherMethods(ResourceDimension Dim); + BuiltinTypeDeclBuilder &addGatherCmpMethods(ResourceDimension Dim); BuiltinTypeDeclBuilder &addIncrementCounterMethod(); BuiltinTypeDeclBuilder &addDecrementCounterMethod(); BuiltinTypeDeclBuilder &addHandleAccessFunction(DeclarationName &Name, @@ -132,11 +134,13 @@ private: BuiltinTypeDeclBuilder & addCounterHandleMember(ResourceClass RC, bool IsROV, bool RawBuffer, AccessSpecifier Access = AccessSpecifier::AS_private); + QualType getGatherReturnType(); FieldDecl *getResourceHandleField() const; FieldDecl *getResourceCounterHandleField() const; QualType getFirstTemplateTypeParam(); QualType getHandleElementType(); Expr *getConstantIntExpr(int value); + Expr *getConstantUnsignedIntExpr(unsigned value); HLSLAttributedResourceType::Attributes getResourceAttrs() const; }; diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 662627901539..788a129ec539 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -15,12 +15,14 @@ #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" #include "clang/Basic/SourceLocation.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaHLSL.h" +#include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/SmallVector.h" using namespace clang; @@ -265,7 +267,72 @@ static BuiltinTypeDeclBuilder setupTextureType(CXXRecordDecl *Decl, Sema &S, .addSampleGradMethods(Dim) .addSampleLevelMethods(Dim) .addSampleCmpMethods(Dim) - .addSampleCmpLevelZeroMethods(Dim); + .addSampleCmpLevelZeroMethods(Dim) + .addGatherMethods(Dim) + .addGatherCmpMethods(Dim); +} + +// Add a partial specialization for a template. The `TextureTemplate` is +// `Texture<element_type>`, and it will be specialized for vectors: +// `Texture<vector<element_type, element_count>>`. +static ClassTemplatePartialSpecializationDecl * +addVectorTexturePartialSpecialization(Sema &S, NamespaceDecl *HLSLNamespace, + ClassTemplateDecl *TextureTemplate) { + ASTContext &AST = S.getASTContext(); + + // Create the template parameters: element_type and element_count. + auto *ElementType = TemplateTypeParmDecl::Create( + AST, HLSLNamespace, SourceLocation(), SourceLocation(), 0, 0, + &AST.Idents.get("element_type"), false, false); + auto *ElementCount = NonTypeTemplateParmDecl::Create( + AST, HLSLNamespace, SourceLocation(), SourceLocation(), 0, 1, + &AST.Idents.get("element_count"), AST.IntTy, false, + AST.getTrivialTypeSourceInfo(AST.IntTy)); + + auto *TemplateParams = TemplateParameterList::Create( + AST, SourceLocation(), SourceLocation(), {ElementType, ElementCount}, + SourceLocation(), nullptr); + + // Create the dependent vector type: vector<element_type, element_count>. + QualType VectorType = AST.getDependentSizedExtVectorType( + AST.getTemplateTypeParmType(0, 0, false, ElementType), + DeclRefExpr::Create( + AST, NestedNameSpecifierLoc(), SourceLocation(), ElementCount, false, + DeclarationNameInfo(ElementCount->getDeclName(), SourceLocation()), + AST.IntTy, VK_LValue), + SourceLocation()); + + // Create the partial specialization declaration. + QualType CanonInjectedTST = + AST.getCanonicalType(AST.getTemplateSpecializationType( + ElaboratedTypeKeyword::Class, TemplateName(TextureTemplate), + {TemplateArgument(VectorType)}, {})); + + auto *PartialSpec = ClassTemplatePartialSpecializationDecl::Create( + AST, TagDecl::TagKind::Class, HLSLNamespace, SourceLocation(), + SourceLocation(), TemplateParams, TextureTemplate, + {TemplateArgument(VectorType)}, + CanQualType::CreateUnsafe(CanonInjectedTST), nullptr); + + // Set the template arguments as written. + TemplateArgument Arg(VectorType); + TemplateArgumentLoc ArgLoc = + S.getTrivialTemplateArgumentLoc(Arg, QualType(), SourceLocation()); + TemplateArgumentListInfo ArgsInfo = + TemplateArgumentListInfo(SourceLocation(), SourceLocation()); + ArgsInfo.addArgument(ArgLoc); + PartialSpec->setTemplateArgsAsWritten( + ASTTemplateArgumentListInfo::Create(AST, ArgsInfo)); + + PartialSpec->setImplicit(true); + PartialSpec->setLexicalDeclContext(HLSLNamespace); + PartialSpec->setHasExternalLexicalStorage(); + + // Add the partial specialization to the namespace and the class template. + HLSLNamespace->addDecl(PartialSpec); + TextureTemplate->AddPartialSpecialization(PartialSpec, nullptr); + + return PartialSpec; } // This function is responsible for constructing the constraint expression for @@ -548,11 +615,20 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() { Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "Texture2D") .addSimpleTemplateParams({"element_type"}, TypedBufferConcept) .finalizeForwardDeclaration(); + onCompletion(Decl, [this](CXXRecordDecl *Decl) { setupTextureType(Decl, *SemaPtr, ResourceClass::SRV, /*IsROV=*/false, ResourceDimension::Dim2D) .completeDefinition(); }); + + auto *PartialSpec = addVectorTexturePartialSpecialization( + *SemaPtr, HLSLNamespace, Decl->getDescribedClassTemplate()); + onCompletion(PartialSpec, [this](CXXRecordDecl *Decl) { + setupTextureType(Decl, *SemaPtr, ResourceClass::SRV, /*IsROV=*/false, + ResourceDimension::Dim2D) + .completeDefinition(); + }); } void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record, @@ -568,8 +644,27 @@ void HLSLExternalSemaSource::CompleteType(TagDecl *Tag) { // If this is a specialization, we need to get the underlying templated // declaration and complete that. - if (auto TDecl = dyn_cast<ClassTemplateSpecializationDecl>(Record)) - Record = TDecl->getSpecializedTemplate()->getTemplatedDecl(); + if (auto TDecl = dyn_cast<ClassTemplateSpecializationDecl>(Record)) { + if (!isa<ClassTemplatePartialSpecializationDecl>(TDecl)) { + ClassTemplateDecl *Template = TDecl->getSpecializedTemplate(); + llvm::SmallVector<ClassTemplatePartialSpecializationDecl *, 4> Partials; + Template->getPartialSpecializations(Partials); + ClassTemplatePartialSpecializationDecl *MatchedPartial = nullptr; + for (auto *Partial : Partials) { + sema::TemplateDeductionInfo Info(TDecl->getLocation()); + if (SemaPtr->DeduceTemplateArguments(Partial, TDecl->getTemplateArgs(), + Info) == + TemplateDeductionResult::Success) { + MatchedPartial = Partial; + break; + } + } + if (MatchedPartial) + Record = MatchedPartial; + else + Record = Template->getTemplatedDecl(); + } + } Record = Record->getCanonicalDecl(); auto It = Completions.find(Record); if (It == Completions.end()) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index be84974c70f2..405832a446e1 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -16360,6 +16360,32 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D, maybeAddDeclWithEffects(FD); + if (FD && !FD->isInvalidDecl() && FD->hasAttr<SYCLKernelEntryPointAttr>() && + FnBodyScope) { + // An implicit call expression is synthesized for functions declared with + // the sycl_kernel_entry_point attribute. The call may resolve to a + // function template, a member function template, or a call operator + // of a variable template depending on the results of unqualified lookup + // for 'sycl_kernel_launch' from the beginning of the function body. + // Performing that lookup requires the stack of parsing scopes active + // when the definition is parsed and is thus done here; the result is + // cached in FunctionScopeInfo and used to synthesize the (possibly + // unresolved) call expression after the function body has been parsed. + const auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>(); + if (!SKEPAttr->isInvalidAttr()) { + ExprResult LaunchIdExpr = + SYCL().BuildSYCLKernelLaunchIdExpr(FD, SKEPAttr->getKernelName()); + // Do not mark 'FD' as invalid if construction of `LaunchIDExpr` produces + // an invalid result. Name lookup failure for 'sycl_kernel_launch' is + // treated as an error in the definition of 'FD'; treating it as an error + // of the declaration would affect overload resolution which would + // potentially result in additional errors. If construction of + // 'LaunchIDExpr' failed, then 'SYCLKernelLaunchIdExpr' will be assigned + // a null pointer value below; that is expected. + getCurFunction()->SYCLKernelLaunchIdExpr = LaunchIdExpr.get(); + } + } + return D; } @@ -16561,12 +16587,37 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, bool IsInstantiation, SKEPAttr->setInvalidAttr(); } - if (Body && !FD->isTemplated() && !SKEPAttr->isInvalidAttr()) { - StmtResult SR = - SYCL().BuildSYCLKernelCallStmt(FD, cast<CompoundStmt>(Body)); - if (SR.isInvalid()) - return nullptr; - Body = SR.get(); + // Build an unresolved SYCL kernel call statement for a function template, + // validate that a SYCL kernel call statement was instantiated for an + // (implicit or explicit) instantiation of a function template, or otherwise + // build a (resolved) SYCL kernel call statement for a non-templated + // function or an explicit specialization. + if (Body && !SKEPAttr->isInvalidAttr()) { + StmtResult SR; + if (FD->isTemplateInstantiation()) { + // The function body should already be a SYCLKernelCallStmt in this + // case, but might not be if there were previous errors. + SR = Body; + } else if (!getCurFunction()->SYCLKernelLaunchIdExpr) { + // If name lookup for a template named sycl_kernel_launch failed + // earlier, don't try to build a SYCL kernel call statement as that + // would cause additional errors to be issued; just proceed with the + // original function body. + SR = Body; + } else if (FD->isTemplated()) { + SR = SYCL().BuildUnresolvedSYCLKernelCallStmt( + cast<CompoundStmt>(Body), getCurFunction()->SYCLKernelLaunchIdExpr); + } else { + SR = SYCL().BuildSYCLKernelCallStmt( + FD, cast<CompoundStmt>(Body), + getCurFunction()->SYCLKernelLaunchIdExpr); + } + // If construction of the replacement body fails, just continue with the + // original function body. An early error return here is not valid; the + // current declaration context and function scopes must be popped before + // returning. + if (SR.isUsable()) + Body = SR.get(); } } @@ -21037,7 +21088,9 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, // SYCL functions can be template, so we check if they have appropriate // attribute prior to checking if it is a template. - if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>()) + if (LangOpts.SYCLIsDevice && (FD->hasAttr<SYCLKernelAttr>() || + FD->hasAttr<SYCLKernelEntryPointAttr>() || + FD->hasAttr<SYCLExternalAttr>())) return FunctionEmissionStatus::Emitted; // Templates are emitted when they're instantiated. diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 8df01a8a616c..56079ea8e1bf 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -15,6 +15,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtSYCL.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceManager.h" @@ -1250,6 +1251,18 @@ CanThrowResult Sema::canThrow(const Stmt *S) { return CT; } + case Stmt::SYCLKernelCallStmtClass: { + auto *SKCS = cast<SYCLKernelCallStmt>(S); + if (getLangOpts().SYCLIsDevice) + return canSubStmtsThrow(*this, + SKCS->getOutlinedFunctionDecl()->getBody()); + assert(getLangOpts().SYCLIsHost); + return canSubStmtsThrow(*this, SKCS->getKernelLaunchStmt()); + } + + case Stmt::UnresolvedSYCLKernelCallStmtClass: + return CT_Dependent; + // ObjC message sends are like function calls, but never have exception // specs. case Expr::ObjCMessageExprClass: @@ -1433,7 +1446,6 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::AttributedStmtClass: case Stmt::BreakStmtClass: case Stmt::CapturedStmtClass: - case Stmt::SYCLKernelCallStmtClass: case Stmt::CaseStmtClass: case Stmt::CompoundStmtClass: case Stmt::ContinueStmtClass: diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 53d215f5c5e3..04b3b36aacf6 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -406,6 +406,9 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs, targetDiag(*Locs.begin(), diag::err_thread_unsupported); } + if (LangOpts.SYCLIsDevice && isa<FunctionDecl>(D)) + SYCL().CheckDeviceUseOfDecl(D, Loc); + return false; } diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index f3e672642816..5701b76427d6 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3326,6 +3326,121 @@ static bool CheckVectorElementCount(Sema *S, QualType PassedType, enum class SampleKind { Sample, Bias, Grad, Level, Cmp, CmpLevelZero }; +static bool CheckTextureSamplerAndLocation(Sema &S, CallExpr *TheCall) { + // Check the texture handle. + if (CheckResourceHandle(&S, TheCall, 0, + [](const HLSLAttributedResourceType *ResType) { + return ResType->getAttrs().ResourceDimension == + llvm::dxil::ResourceDimension::Unknown; + })) + return true; + + // Check the sampler handle. + if (CheckResourceHandle(&S, TheCall, 1, + [](const HLSLAttributedResourceType *ResType) { + return ResType->getAttrs().ResourceClass != + llvm::hlsl::ResourceClass::Sampler; + })) + return true; + + auto *ResourceTy = + TheCall->getArg(0)->getType()->castAs<HLSLAttributedResourceType>(); + + // Check the location. + unsigned ExpectedDim = + getResourceDimensions(ResourceTy->getAttrs().ResourceDimension); + if (CheckVectorElementCount(&S, TheCall->getArg(2)->getType(), + S.Context.FloatTy, ExpectedDim, + TheCall->getBeginLoc())) + return true; + + return false; +} + +static bool CheckGatherBuiltin(Sema &S, CallExpr *TheCall, bool IsCmp) { + if (S.checkArgCountRange(TheCall, IsCmp ? 5 : 4, IsCmp ? 6 : 5)) + return true; + + if (CheckTextureSamplerAndLocation(S, TheCall)) + return true; + + unsigned NextIdx = 3; + if (IsCmp) { + // Check the compare value. + QualType CmpTy = TheCall->getArg(NextIdx)->getType(); + if (!CmpTy->isFloatingType() || CmpTy->isVectorType()) { + S.Diag(TheCall->getArg(NextIdx)->getBeginLoc(), + diag::err_typecheck_convert_incompatible) + << CmpTy << S.Context.FloatTy << 1 << 0 << 0; + return true; + } + NextIdx++; + } + + // Check the component operand. + Expr *ComponentArg = TheCall->getArg(NextIdx); + QualType ComponentTy = ComponentArg->getType(); + if (!ComponentTy->isIntegerType() || ComponentTy->isVectorType()) { + S.Diag(ComponentArg->getBeginLoc(), + diag::err_typecheck_convert_incompatible) + << ComponentTy << S.Context.UnsignedIntTy << 1 << 0 << 0; + return true; + } + + // GatherCmp operations on Vulkan target must use component 0 (Red). + if (IsCmp && S.getASTContext().getTargetInfo().getTriple().isSPIRV()) { + std::optional<llvm::APSInt> ComponentOpt = + ComponentArg->getIntegerConstantExpr(S.getASTContext()); + if (ComponentOpt) { + int64_t ComponentVal = ComponentOpt->getSExtValue(); + if (ComponentVal != 0) { + // Issue an error if the component is not 0 (Red). + // 0 -> Red, 1 -> Green, 2 -> Blue, 3 -> Alpha + assert(ComponentVal >= 0 && ComponentVal <= 3 && + "The component is not in the expected range."); + S.Diag(ComponentArg->getBeginLoc(), + diag::err_hlsl_gathercmp_invalid_component) + << ComponentVal; + return true; + } + } + } + + NextIdx++; + + // Check the offset operand. + const HLSLAttributedResourceType *ResourceTy = + TheCall->getArg(0)->getType()->castAs<HLSLAttributedResourceType>(); + if (TheCall->getNumArgs() > NextIdx) { + unsigned ExpectedDim = + getResourceDimensions(ResourceTy->getAttrs().ResourceDimension); + if (CheckVectorElementCount(&S, TheCall->getArg(NextIdx)->getType(), + S.Context.IntTy, ExpectedDim, + TheCall->getArg(NextIdx)->getBeginLoc())) + return true; + NextIdx++; + } + + assert(ResourceTy->hasContainedType() && + "Expecting a contained type for resource with a dimension " + "attribute."); + QualType ReturnType = ResourceTy->getContainedType(); + + if (IsCmp) { + if (!ReturnType->hasFloatingRepresentation()) { + S.Diag(TheCall->getBeginLoc(), diag::err_hlsl_samplecmp_requires_float); + return true; + } + } + + if (const auto *VecTy = ReturnType->getAs<VectorType>()) + ReturnType = VecTy->getElementType(); + ReturnType = S.Context.getExtVectorType(ReturnType, 4); + + TheCall->setType(ReturnType); + + return false; +} static bool CheckSamplingBuiltin(Sema &S, CallExpr *TheCall, SampleKind Kind) { unsigned MinArgs, MaxArgs; if (Kind == SampleKind::Sample) { @@ -3352,32 +3467,13 @@ static bool CheckSamplingBuiltin(Sema &S, CallExpr *TheCall, SampleKind Kind) { if (S.checkArgCountRange(TheCall, MinArgs, MaxArgs)) return true; - // Check the texture handle. - if (CheckResourceHandle(&S, TheCall, 0, - [](const HLSLAttributedResourceType *ResType) { - return ResType->getAttrs().ResourceDimension == - llvm::dxil::ResourceDimension::Unknown; - })) - return true; - - // Check the sampler handle. - if (CheckResourceHandle(&S, TheCall, 1, - [](const HLSLAttributedResourceType *ResType) { - return ResType->getAttrs().ResourceClass != - llvm::hlsl::ResourceClass::Sampler; - })) + if (CheckTextureSamplerAndLocation(S, TheCall)) return true; - auto *ResourceTy = + const HLSLAttributedResourceType *ResourceTy = TheCall->getArg(0)->getType()->castAs<HLSLAttributedResourceType>(); - - // Check the location. unsigned ExpectedDim = getResourceDimensions(ResourceTy->getAttrs().ResourceDimension); - if (CheckVectorElementCount(&S, TheCall->getArg(2)->getType(), - S.Context.FloatTy, ExpectedDim, - TheCall->getBeginLoc())) - return true; unsigned NextIdx = 3; if (Kind == SampleKind::Bias || Kind == SampleKind::Level || @@ -3577,6 +3673,10 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return CheckSamplingBuiltin(SemaRef, TheCall, SampleKind::Cmp); case Builtin::BI__builtin_hlsl_resource_sample_cmp_level_zero: return CheckSamplingBuiltin(SemaRef, TheCall, SampleKind::CmpLevelZero); + case Builtin::BI__builtin_hlsl_resource_gather: + return CheckGatherBuiltin(SemaRef, TheCall, /*IsCmp=*/false); + case Builtin::BI__builtin_hlsl_resource_gather_cmp: + return CheckGatherBuiltin(SemaRef, TheCall, /*IsCmp=*/true); case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: { assert(TheCall->getNumArgs() == 1 && "expected 1 arg"); // Update return type to be the attributed resource type from arg0. diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 280f9b1a4b42..ff8ad61aa3af 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -30,15 +30,25 @@ SemaSYCL::SemaSYCL(Sema &S) : SemaBase(S) {} Sema::SemaDiagnosticBuilder SemaSYCL::DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().SYCLIsDevice && - "Should only be called during SYCL compilation"); - FunctionDecl *FD = dyn_cast<FunctionDecl>(SemaRef.getCurLexicalContext()); - SemaDiagnosticBuilder::Kind DiagKind = [this, FD] { - if (!FD) - return SemaDiagnosticBuilder::K_Nop; - if (SemaRef.getEmissionStatus(FD) == Sema::FunctionEmissionStatus::Emitted) - return SemaDiagnosticBuilder::K_ImmediateWithCallStack; - return SemaDiagnosticBuilder::K_Deferred; - }(); + "Device diagnostics Should only be issued during device compilation"); + SemaDiagnosticBuilder::Kind DiagKind = SemaDiagnosticBuilder::K_Nop; + FunctionDecl *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); + if (FD) { + Sema::FunctionEmissionStatus FES = SemaRef.getEmissionStatus(FD); + switch (FES) { + case Sema::FunctionEmissionStatus::Emitted: + DiagKind = SemaDiagnosticBuilder::K_ImmediateWithCallStack; + break; + case Sema::FunctionEmissionStatus::Unknown: + case Sema::FunctionEmissionStatus::TemplateDiscarded: + DiagKind = SemaDiagnosticBuilder::K_Deferred; + break; + case Sema::FunctionEmissionStatus::OMPDiscarded: + llvm_unreachable("OMPDiscarded unexpected in SYCL device compilation"); + case Sema::FunctionEmissionStatus::CUDADiscarded: + llvm_unreachable("CUDADiscarded unexpected in SYCL device compilation"); + } + } return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, FD, SemaRef); } @@ -211,6 +221,23 @@ void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) { SYCLKernelEntryPointAttr(SemaRef.Context, AL, TSI)); } +void SemaSYCL::CheckDeviceUseOfDecl(NamedDecl *ND, SourceLocation Loc) { + assert(getLangOpts().SYCLIsDevice && + "Should only be called during SYCL device compilation"); + + // Function declarations with the sycl_kernel_entry_point attribute cannot + // be ODR-used in a potentially evaluated context. + if (FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) { + if (const auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>()) { + if (SemaRef.currentEvaluationContext().isPotentiallyEvaluated()) { + DiagIfDeviceCode(Loc, diag::err_sycl_entry_point_device_use) + << FD << SKEPAttr; + DiagIfDeviceCode(SKEPAttr->getLocation(), diag::note_attribute) << FD; + } + } + } +} + // Given a potentially qualified type, SourceLocationForUserDeclaredType() // returns the source location of the canonical declaration of the unqualified // desugared user declared type, if any. For non-user declared types, an @@ -315,10 +342,20 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) { } } + if (isa<CXXConstructorDecl>(FD)) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << SKEPAttr << diag::InvalidSKEPReason::Constructor; + SKEPAttr->setInvalidAttr(); + } + if (isa<CXXDestructorDecl>(FD)) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << SKEPAttr << diag::InvalidSKEPReason::Destructor; + SKEPAttr->setInvalidAttr(); + } if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { - if (!MD->isStatic()) { + if (MD->isExplicitObjectMemberFunction()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << diag::InvalidSKEPReason::NonStaticMemberFn; + << SKEPAttr << diag::InvalidSKEPReason::ExplicitObjectFn; SKEPAttr->setInvalidAttr(); } } @@ -387,8 +424,165 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) { } } +ExprResult SemaSYCL::BuildSYCLKernelLaunchIdExpr(FunctionDecl *FD, + QualType KNT) { + // The current context must be the function definition context to ensure + // that name lookup is performed within the correct scope. + assert(SemaRef.CurContext == FD && "The current declaration context does not " + "match the requested function context"); + + // An appropriate source location is required to emit diagnostics if + // lookup fails to produce an overload set. The desired location is the + // start of the function body, but that is not yet available since the + // body of the function has not yet been set when this function is called. + // The general location of the function is used instead. + SourceLocation Loc = FD->getLocation(); + + ASTContext &Ctx = SemaRef.getASTContext(); + IdentifierInfo &SYCLKernelLaunchID = + Ctx.Idents.get("sycl_kernel_launch", tok::TokenKind::identifier); + + // Establish a code synthesis context for the implicit name lookup of + // a template named 'sycl_kernel_launch'. In the event of an error, this + // ensures an appropriate diagnostic note is issued to explain why the + // lookup was performed. + Sema::CodeSynthesisContext CSC; + CSC.Kind = Sema::CodeSynthesisContext::SYCLKernelLaunchLookup; + CSC.Entity = FD; + Sema::ScopedCodeSynthesisContext ScopedCSC(SemaRef, CSC); + + // Perform ordinary name lookup for a function or variable template that + // accepts a single type template argument. + LookupResult Result(SemaRef, &SYCLKernelLaunchID, Loc, + Sema::LookupOrdinaryName); + CXXScopeSpec EmptySS; + if (SemaRef.LookupTemplateName(Result, SemaRef.getCurScope(), EmptySS, + /*ObjectType*/ QualType(), + /*EnteringContext*/ false, + Sema::TemplateNameIsRequired)) + return ExprError(); + if (Result.isAmbiguous()) + return ExprError(); + + TemplateArgumentListInfo TALI{Loc, Loc}; + TemplateArgument KNTA = TemplateArgument(KNT); + TemplateArgumentLoc TAL = + SemaRef.getTrivialTemplateArgumentLoc(KNTA, QualType(), Loc); + TALI.addArgument(TAL); + + ExprResult IdExpr; + if (SemaRef.isPotentialImplicitMemberAccess(EmptySS, Result, + /*IsAddressOfOperand*/ false)) { + // The lookup result allows for a possible implicit member access that + // would require an implicit or explicit 'this' argument. + IdExpr = SemaRef.BuildPossibleImplicitMemberExpr( + EmptySS, SourceLocation(), Result, &TALI, SemaRef.getCurScope()); + } else { + IdExpr = SemaRef.BuildTemplateIdExpr(EmptySS, SourceLocation(), Result, + /*RequiresADL*/ true, &TALI); + } + + // The resulting expression may be invalid if, for example, 'FD' is a + // non-static member function and sycl_kernel_launch lookup selects a + // member function (which would require a 'this' argument which is + // not available). + if (IdExpr.isInvalid()) + return ExprError(); + + return IdExpr; +} + namespace { +// Constructs the arguments to be passed for the SYCL kernel launch call. +// The first argument is a string literal that contains the SYCL kernel +// name. The remaining arguments are the parameters of 'FD' passed as +// move-elligible xvalues. Returns true on error and false otherwise. +bool BuildSYCLKernelLaunchCallArgs(Sema &SemaRef, FunctionDecl *FD, + const SYCLKernelInfo *SKI, + SmallVectorImpl<Expr *> &Args, + SourceLocation Loc) { + // The current context must be the function definition context to ensure + // that parameter references occur within the correct scope. + assert(SemaRef.CurContext == FD && "The current declaration context does not " + "match the requested function context"); + + // Prepare a string literal that contains the kernel name. + ASTContext &Ctx = SemaRef.getASTContext(); + const std::string &KernelName = SKI->GetKernelName(); + QualType KernelNameCharTy = Ctx.CharTy.withConst(); + llvm::APInt KernelNameSize(Ctx.getTypeSize(Ctx.getSizeType()), + KernelName.size() + 1); + QualType KernelNameArrayTy = Ctx.getConstantArrayType( + KernelNameCharTy, KernelNameSize, nullptr, ArraySizeModifier::Normal, 0); + Expr *KernelNameExpr = + StringLiteral::Create(Ctx, KernelName, StringLiteralKind::Ordinary, + /*Pascal*/ false, KernelNameArrayTy, Loc); + Args.push_back(KernelNameExpr); + + // Forward all parameters of 'FD' to the SYCL kernel launch function as if + // by std::move(). + for (ParmVarDecl *PVD : FD->parameters()) { + QualType ParamType = PVD->getOriginalType().getNonReferenceType(); + ExprResult E = SemaRef.BuildDeclRefExpr(PVD, ParamType, VK_LValue, Loc); + if (E.isInvalid()) + return true; + if (!PVD->getType()->isLValueReferenceType()) + E = ImplicitCastExpr::Create(SemaRef.Context, E.get()->getType(), CK_NoOp, + E.get(), nullptr, VK_XValue, + FPOptionsOverride()); + if (E.isInvalid()) + return true; + Args.push_back(E.get()); + } + + return false; +} + +// Constructs the SYCL kernel launch call. +StmtResult BuildSYCLKernelLaunchCallStmt(Sema &SemaRef, FunctionDecl *FD, + const SYCLKernelInfo *SKI, + Expr *IdExpr, SourceLocation Loc) { + SmallVector<Stmt *> Stmts; + // IdExpr may be null if name lookup failed. + if (IdExpr) { + llvm::SmallVector<Expr *, 12> Args; + + // Establish a code synthesis context for construction of the arguments + // for the implicit call to 'sycl_kernel_launch'. + { + Sema::CodeSynthesisContext CSC; + CSC.Kind = Sema::CodeSynthesisContext::SYCLKernelLaunchLookup; + CSC.Entity = FD; + Sema::ScopedCodeSynthesisContext ScopedCSC(SemaRef, CSC); + + if (BuildSYCLKernelLaunchCallArgs(SemaRef, FD, SKI, Args, Loc)) + return StmtError(); + } + + // Establish a code synthesis context for the implicit call to + // 'sycl_kernel_launch'. + { + Sema::CodeSynthesisContext CSC; + CSC.Kind = Sema::CodeSynthesisContext::SYCLKernelLaunchOverloadResolution; + CSC.Entity = FD; + CSC.CallArgs = Args.data(); + CSC.NumCallArgs = Args.size(); + Sema::ScopedCodeSynthesisContext ScopedCSC(SemaRef, CSC); + + ExprResult LaunchResult = + SemaRef.BuildCallExpr(SemaRef.getCurScope(), IdExpr, Loc, Args, Loc); + if (LaunchResult.isInvalid()) + return StmtError(); + + Stmts.push_back(SemaRef.MaybeCreateExprWithCleanups(LaunchResult).get()); + } + } + + return CompoundStmt::Create(SemaRef.getASTContext(), Stmts, + FPOptionsOverride(), Loc, Loc); +} + // The body of a function declared with the [[sycl_kernel_entry_point]] // attribute is cloned and transformed to substitute references to the original // function parameters with references to replacement variables that stand in @@ -399,9 +593,10 @@ class OutlinedFunctionDeclBodyInstantiator public: using ParmDeclMap = llvm::DenseMap<ParmVarDecl *, VarDecl *>; - OutlinedFunctionDeclBodyInstantiator(Sema &S, ParmDeclMap &M) + OutlinedFunctionDeclBodyInstantiator(Sema &S, ParmDeclMap &M, + FunctionDecl *FD) : TreeTransform<OutlinedFunctionDeclBodyInstantiator>(S), SemaRef(S), - MapRef(M) {} + MapRef(M), FD(FD) {} // A new set of AST nodes is always required. bool AlwaysRebuild() { return true; } @@ -427,18 +622,62 @@ public: return DRE; } + // Diagnose CXXThisExpr in a potentially evaluated expression. + ExprResult TransformCXXThisExpr(CXXThisExpr *CTE) { + if (SemaRef.currentEvaluationContext().isPotentiallyEvaluated()) { + SemaRef.Diag(CTE->getExprLoc(), diag::err_sycl_entry_point_invalid_this) + << (CTE->isImplicitCXXThis() ? /* implicit */ 1 : /* empty */ 0) + << FD->getAttr<SYCLKernelEntryPointAttr>(); + } + return CTE; + } + private: Sema &SemaRef; ParmDeclMap &MapRef; + FunctionDecl *FD; }; +OutlinedFunctionDecl *BuildSYCLKernelEntryPointOutline(Sema &SemaRef, + FunctionDecl *FD, + CompoundStmt *Body) { + using ParmDeclMap = OutlinedFunctionDeclBodyInstantiator::ParmDeclMap; + ParmDeclMap ParmMap; + + OutlinedFunctionDecl *OFD = OutlinedFunctionDecl::Create( + SemaRef.getASTContext(), FD, FD->getNumParams()); + unsigned i = 0; + for (ParmVarDecl *PVD : FD->parameters()) { + ImplicitParamDecl *IPD = ImplicitParamDecl::Create( + SemaRef.getASTContext(), OFD, SourceLocation(), PVD->getIdentifier(), + PVD->getType(), ImplicitParamKind::Other); + OFD->setParam(i, IPD); + ParmMap[PVD] = IPD; + ++i; + } + + OutlinedFunctionDeclBodyInstantiator OFDBodyInstantiator(SemaRef, ParmMap, + FD); + Stmt *OFDBody = OFDBodyInstantiator.TransformStmt(Body).get(); + OFD->setBody(OFDBody); + OFD->setNothrow(); + + return OFD; +} + } // unnamed namespace StmtResult SemaSYCL::BuildSYCLKernelCallStmt(FunctionDecl *FD, - CompoundStmt *Body) { + CompoundStmt *Body, + Expr *LaunchIdExpr) { assert(!FD->isInvalidDecl()); assert(!FD->isTemplated()); assert(FD->hasPrototype()); + // The current context must be the function definition context to ensure + // that name lookup and parameter and local variable creation are performed + // within the correct scope. + assert(SemaRef.CurContext == FD && "The current declaration context does not " + "match the requested function context"); const auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>(); assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute"); @@ -451,29 +690,28 @@ StmtResult SemaSYCL::BuildSYCLKernelCallStmt(FunctionDecl *FD, getASTContext().getSYCLKernelInfo(SKEPAttr->getKernelName()); assert(declaresSameEntity(SKI.getKernelEntryPointDecl(), FD) && "SYCL kernel name conflict"); - (void)SKI; - using ParmDeclMap = OutlinedFunctionDeclBodyInstantiator::ParmDeclMap; - ParmDeclMap ParmMap; - - assert(SemaRef.CurContext == FD); + // Build the outline of the synthesized device entry point function. OutlinedFunctionDecl *OFD = - OutlinedFunctionDecl::Create(getASTContext(), FD, FD->getNumParams()); - unsigned i = 0; - for (ParmVarDecl *PVD : FD->parameters()) { - ImplicitParamDecl *IPD = ImplicitParamDecl::Create( - getASTContext(), OFD, SourceLocation(), PVD->getIdentifier(), - PVD->getType(), ImplicitParamKind::Other); - OFD->setParam(i, IPD); - ParmMap[PVD] = IPD; - ++i; - } + BuildSYCLKernelEntryPointOutline(SemaRef, FD, Body); + assert(OFD); - OutlinedFunctionDeclBodyInstantiator OFDBodyInstantiator(SemaRef, ParmMap); - Stmt *OFDBody = OFDBodyInstantiator.TransformStmt(Body).get(); - OFD->setBody(OFDBody); - OFD->setNothrow(); - Stmt *NewBody = new (getASTContext()) SYCLKernelCallStmt(Body, OFD); + // Build the host kernel launch statement. An appropriate source location + // is required to emit diagnostics. + SourceLocation Loc = Body->getLBracLoc(); + StmtResult LaunchResult = + BuildSYCLKernelLaunchCallStmt(SemaRef, FD, &SKI, LaunchIdExpr, Loc); + if (LaunchResult.isInvalid()) + return StmtError(); + + Stmt *NewBody = + new (getASTContext()) SYCLKernelCallStmt(Body, LaunchResult.get(), OFD); return NewBody; } + +StmtResult SemaSYCL::BuildUnresolvedSYCLKernelCallStmt(CompoundStmt *Body, + Expr *LaunchIdExpr) { + return UnresolvedSYCLKernelCallStmt::Create(SemaRef.getASTContext(), Body, + LaunchIdExpr); +} diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index b4d8158525f0..a60d11d8eb36 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -10,7 +10,6 @@ //===----------------------------------------------------------------------===/ #include "TreeTransform.h" -#include "clang/AST/ASTConcept.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" @@ -593,6 +592,8 @@ bool Sema::CodeSynthesisContext::isInstantiationRecord() const { case BuildingDeductionGuides: case TypeAliasTemplateInstantiation: case PartialOrderingTTP: + case SYCLKernelLaunchLookup: + case SYCLKernelLaunchOverloadResolution: return false; // This function should never be called when Kind's value is Memoization. @@ -898,6 +899,26 @@ static std::string convertCallArgsToString(Sema &S, return Result; } +static std::string +convertCallArgsValueCategoryAndTypeToString(Sema &S, + llvm::ArrayRef<const Expr *> Args) { + std::string Result; + llvm::raw_string_ostream OS(Result); + llvm::ListSeparator Comma; + OS << "("; + for (const Expr *Arg : Args) { + ExprValueKind EVK = Arg->getValueKind(); + const char *ValueCategory = + (EVK == VK_LValue ? "lvalue" + : (EVK == VK_XValue ? "xvalue" : "prvalue")); + OS << Comma << ValueCategory << " of type '"; + Arg->getType().print(OS, S.getPrintingPolicy()); + OS << "'"; + } + OS << ")"; + return Result; +} + void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) { // Determine which template instantiations to skip, if any. unsigned SkipStart = CodeSynthesisContexts.size(), SkipEnd = SkipStart; @@ -1260,6 +1281,33 @@ void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) { << /*isTemplateTemplateParam=*/true << Active->InstantiationRange); break; + case CodeSynthesisContext::SYCLKernelLaunchLookup: { + const auto *SKEPAttr = + Active->Entity->getAttr<SYCLKernelEntryPointAttr>(); + assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute"); + assert(!SKEPAttr->isInvalidAttr() && + "sycl_kernel_entry_point attribute is invalid"); + DiagFunc(SKEPAttr->getLocation(), PDiag(diag::note_sycl_runtime_defect)); + DiagFunc(SKEPAttr->getLocation(), + PDiag(diag::note_sycl_kernel_launch_lookup_here) + << SKEPAttr->getKernelName()); + break; + } + case CodeSynthesisContext::SYCLKernelLaunchOverloadResolution: { + const auto *SKEPAttr = + Active->Entity->getAttr<SYCLKernelEntryPointAttr>(); + assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute"); + assert(!SKEPAttr->isInvalidAttr() && + "sycl_kernel_entry_point attribute is invalid"); + DiagFunc(SKEPAttr->getLocation(), PDiag(diag::note_sycl_runtime_defect)); + DiagFunc(SKEPAttr->getLocation(), + PDiag(diag::note_sycl_kernel_launch_overload_resolution_here) + << SKEPAttr->getKernelName() + << convertCallArgsValueCategoryAndTypeToString( + *this, llvm::ArrayRef(Active->CallArgs, + Active->NumCallArgs))); + break; + } } } } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index a416c73c458b..b8442f8fdd9e 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -13077,6 +13077,31 @@ ExprResult TreeTransform<Derived>::TransformSYCLUniqueStableNameExpr( } template <typename Derived> +StmtResult TreeTransform<Derived>::TransformUnresolvedSYCLKernelCallStmt( + UnresolvedSYCLKernelCallStmt *S) { + auto *FD = cast<FunctionDecl>(SemaRef.CurContext); + const auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>(); + if (!SKEPAttr || SKEPAttr->isInvalidAttr()) + return StmtError(); + + ExprResult IdExpr = getDerived().TransformExpr(S->getKernelLaunchIdExpr()); + if (IdExpr.isInvalid()) + return StmtError(); + + StmtResult Body = getDerived().TransformStmt(S->getOriginalStmt()); + if (Body.isInvalid()) + return StmtError(); + + StmtResult SR = SemaRef.SYCL().BuildSYCLKernelCallStmt( + cast<FunctionDecl>(SemaRef.CurContext), cast<CompoundStmt>(Body.get()), + IdExpr.get()); + if (SR.isInvalid()) + return StmtError(); + + return SR; +} + +template <typename Derived> ExprResult TreeTransform<Derived>::TransformCXXReflectExpr(CXXReflectExpr *E) { // TODO(reflection): Implement its transform assert(false && "not implemented yet"); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index a18fccb6518d..f351e185e5b5 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -543,6 +543,7 @@ void ASTStmtReader::VisitCXXReflectExpr(CXXReflectExpr *E) { void ASTStmtReader::VisitSYCLKernelCallStmt(SYCLKernelCallStmt *S) { VisitStmt(S); S->setOriginalStmt(cast<CompoundStmt>(Record.readSubStmt())); + S->setKernelLaunchStmt(cast<Stmt>(Record.readSubStmt())); S->setOutlinedFunctionDecl(readDeclAs<OutlinedFunctionDecl>()); } @@ -608,6 +609,14 @@ void ASTStmtReader::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) { E->setTypeSourceInfo(Record.readTypeSourceInfo()); } +void ASTStmtReader::VisitUnresolvedSYCLKernelCallStmt( + UnresolvedSYCLKernelCallStmt *S) { + VisitStmt(S); + + S->setOriginalStmt(cast<CompoundStmt>(Record.readSubStmt())); + S->setKernelLaunchIdExpr(Record.readExpr()); +} + void ASTStmtReader::VisitPredefinedExpr(PredefinedExpr *E) { VisitExpr(E); bool HasFunctionName = Record.readInt(); @@ -3212,6 +3221,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = SYCLUniqueStableNameExpr::CreateEmpty(Context); break; + case STMT_UNRESOLVED_SYCL_KERNEL_CALL: + S = UnresolvedSYCLKernelCallStmt::CreateEmpty(Context); + break; + case EXPR_OPENACC_ASTERISK_SIZE: S = OpenACCAsteriskSizeExpr::CreateEmpty(Context); break; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 4fcac4d0261a..d9b95e53f2da 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -637,6 +637,7 @@ void ASTStmtWriter::VisitCapturedStmt(CapturedStmt *S) { void ASTStmtWriter::VisitSYCLKernelCallStmt(SYCLKernelCallStmt *S) { VisitStmt(S); Record.AddStmt(S->getOriginalStmt()); + Record.AddStmt(S->getKernelLaunchStmt()); Record.AddDeclRef(S->getOutlinedFunctionDecl()); Code = serialization::STMT_SYCLKERNELCALL; @@ -695,6 +696,16 @@ void ASTStmtWriter::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) { Code = serialization::EXPR_SYCL_UNIQUE_STABLE_NAME; } +void ASTStmtWriter::VisitUnresolvedSYCLKernelCallStmt( + UnresolvedSYCLKernelCallStmt *S) { + VisitStmt(S); + + Record.AddStmt(S->getOriginalStmt()); + Record.AddStmt(S->getKernelLaunchIdExpr()); + + Code = serialization::STMT_UNRESOLVED_SYCL_KERNEL_CALL; +} + void ASTStmtWriter::VisitPredefinedExpr(PredefinedExpr *E) { VisitExpr(E); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 644d57cc6b0d..bc8e9040444c 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1825,6 +1825,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPTargetParallelGenericLoopDirectiveClass: case Stmt::CapturedStmtClass: case Stmt::SYCLKernelCallStmtClass: + case Stmt::UnresolvedSYCLKernelCallStmtClass: case Stmt::OpenACCComputeConstructClass: case Stmt::OpenACCLoopConstructClass: case Stmt::OpenACCCombinedConstructClass: diff --git a/clang/test/AST/HLSL/Texture2D-AST.hlsl b/clang/test/AST/HLSL/Texture2D-scalar-AST.hlsl index abdf0a8b35ab..8725bcc05882 100644 --- a/clang/test/AST/HLSL/Texture2D-AST.hlsl +++ b/clang/test/AST/HLSL/Texture2D-scalar-AST.hlsl @@ -415,7 +415,289 @@ // CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' // CHECK-NEXT: AlwaysInlineAttr -Texture2D<float4> t; +// CHECK: CXXMethodDecl {{.*}} Gather 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} Gather 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherRed 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherRed 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherGreen 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherGreen 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherBlue 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherBlue 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherAlpha 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherAlpha 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmp 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmp 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmpRed 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmpGreen 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmpBlue 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmpAlpha 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +Texture2D<float> t; SamplerState s; SamplerComparisonState scs; @@ -436,4 +718,5 @@ void main(float2 loc, float cmp) { t.SampleCmp(scs, loc, cmp, int2(1, 2), 1.0f); t.SampleCmpLevelZero(scs, loc, cmp); t.SampleCmpLevelZero(scs, loc, cmp, int2(1, 2)); + t.Gather(s, loc); } diff --git a/clang/test/AST/HLSL/Texture2D-vector-AST.hlsl b/clang/test/AST/HLSL/Texture2D-vector-AST.hlsl new file mode 100644 index 000000000000..4e1c41f05232 --- /dev/null +++ b/clang/test/AST/HLSL/Texture2D-vector-AST.hlsl @@ -0,0 +1,726 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -disable-llvm-passes -finclude-default-header -o - %s | FileCheck %s + +// CHECK: CXXRecordDecl {{.*}} SamplerState definition +// CHECK: FinalAttr {{.*}} Implicit final +// CHECK-NEXT: FieldDecl {{.*}} implicit {{.*}} __handle '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] + +// CHECK: CXXRecordDecl {{.*}} SamplerComparisonState definition +// CHECK: FinalAttr {{.*}} Implicit final +// CHECK-NEXT: FieldDecl {{.*}} implicit {{.*}} __handle '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] + +// CHECK: ClassTemplateDecl {{.*}} Texture2D +// CHECK: TemplateTypeParmDecl {{.*}} element_type +// CHECK: CXXRecordDecl {{.*}} Texture2D +// CHECK: FinalAttr {{.*}} Implicit final +// CHECK: ClassTemplatePartialSpecializationDecl {{.*}} Texture2D definition explicit_specialization +// CHECK: TemplateArgument type 'vector<element_type, element_count>':'vector<type-parameter-0-0, element_count>' +// CHECK: TemplateTypeParmDecl {{.*}} element_type +// CHECK: NonTypeTemplateParmDecl {{.*}} element_count +// CHECK-NEXT: FieldDecl {{.*}} implicit __handle '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] + +// CHECK: CXXMethodDecl {{.*}} Sample 'vector<element_type (hlsl::SamplerState, vector<float, 2>), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} Sample 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} Sample 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>, float), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Clamp 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Clamp' 'float' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleBias 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Bias 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_bias' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Bias' 'float' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleBias 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float, vector<int, 2>), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Bias 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_bias' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Bias' 'float' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleBias 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float, vector<int, 2>, float), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Bias 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Clamp 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_bias' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Bias' 'float' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Clamp' 'float' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleGrad 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<float, 2>, vector<float, 2>), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} DDX 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} DDY 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_grad' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDX' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDY' 'vector<float, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleGrad 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<float, 2>, vector<float, 2>, vector<int, 2>), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} DDX 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} DDY 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_grad' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDX' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDY' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleGrad 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<float, 2>, vector<float, 2>, vector<int, 2>, float), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} DDX 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} DDY 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Clamp 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_grad' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDX' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDY' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Clamp' 'float' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleLevel 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} LOD 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_level' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'LOD' 'float' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleLevel 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float, vector<int, 2>), element_count>' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} LOD 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_level' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'LOD' 'float' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleCmp 'float (hlsl::SamplerComparisonState, vector<float, 2>, float)' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleCmp 'float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>)' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleCmp 'float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>, float)' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Clamp 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Clamp' 'float' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleCmpLevelZero 'float (hlsl::SamplerComparisonState, vector<float, 2>, float)' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp_level_zero' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} SampleCmpLevelZero 'float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>)' +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp_level_zero' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]] +// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]] +// CHECK-SAME: ' lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} Gather 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} Gather 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherRed 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherRed 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherGreen 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherGreen 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherBlue 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherBlue 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherAlpha 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherAlpha 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmp 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmp 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmpRed 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmpGreen 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmpBlue 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2 +// CHECK-NEXT: AlwaysInlineAttr + +// CHECK: CXXMethodDecl {{.*}} GatherCmpAlpha 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>), 4>' inline +// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState' +// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>' +// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent> +// CHECK-NEXT: CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept' +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this +// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle +// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3 +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>' +// CHECK-NEXT: AlwaysInlineAttr + +Texture2D<float4> t; +SamplerState s; +SamplerComparisonState scs; + +void main(float2 loc, float cmp) { + t.Sample(s, loc); + t.Sample(s, loc, int2(1, 2)); + t.Sample(s, loc, int2(1, 2), 1.0); + t.SampleBias(s, loc, 0.0); + t.SampleBias(s, loc, 0.0, int2(1, 2)); + t.SampleBias(s, loc, 0.0, int2(1, 2), 1.0); + t.SampleGrad(s, loc, float2(0,0), float2(0,0)); + t.SampleGrad(s, loc, float2(0,0), float2(0,0), int2(1, 2)); + t.SampleGrad(s, loc, float2(0,0), float2(0,0), int2(1, 2), 1.0); + t.SampleLevel(s, loc, 0.0); + t.SampleLevel(s, loc, 0.0, int2(1, 2)); + t.SampleCmp(scs, loc, cmp); + t.SampleCmp(scs, loc, cmp, int2(1, 2)); + t.SampleCmp(scs, loc, cmp, int2(1, 2), 1.0f); + t.SampleCmpLevelZero(scs, loc, cmp); + t.SampleCmpLevelZero(scs, loc, cmp, int2(1, 2)); + t.Gather(s, loc); +} diff --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp index e3ff3dea1951..c5518d903844 100644 --- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp +++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp @@ -34,6 +34,8 @@ template<int> struct K { void operator()(Ts...) const {} }; +template<typename KernelName, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} [[clang::sycl_kernel_entry_point(KN<1>)]] void skep1() { @@ -41,6 +43,12 @@ void skep1() { // CHECK: |-FunctionDecl {{.*}} skep1 'void ()' // CHECK-NEXT: | |-SYCLKernelCallStmt {{.*}} // CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *)' <FunctionToPointerDecay> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *)' lvalue Function {{.*}} 'sycl_kernel_launch' {{.*}} +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay> +// CHECK-NEXT: | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi1EE" // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<1> @@ -57,9 +65,10 @@ void skep2<KN<2>>(K<2>); // CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} KT // CHECK-NEXT: | |-FunctionDecl {{.*}} skep2 'void (KT)' // CHECK-NEXT: | | |-ParmVarDecl {{.*}} k 'KT' -// CHECK-NEXT: | | |-CompoundStmt {{.*}} -// CHECK-NEXT: | | | `-CallExpr {{.*}} '<dependent type>' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' +// CHECK-NEXT: | | |-UnresolvedSYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | | `-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' // CHECK-NEXT: | | `-SYCLKernelEntryPointAttr {{.*}} KNT // CHECK-NEXT: | `-FunctionDecl {{.*}} skep2 'void (K<2>)' explicit_instantiation_definition instantiated_from 0x{{.+}} @@ -77,6 +86,15 @@ void skep2<KN<2>>(K<2>); // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const' // CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const K<2>' lvalue <NoOp> // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<2>' lvalue ParmVar {{.*}} 'k' 'K<2>' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} <FunctionToPointerDecay> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, K<2>)' lvalue Function {{.*}} 'sycl_kernel_launch' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay> +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi2EE" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'K<2>' 'void (K<2> &&) noexcept' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'K<2>' xvalue <NoOp> +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<2>' lvalue ParmVar {{.*}} 'k' 'K<2>' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<2>' // CHECK-NEXT: | | `-CompoundStmt {{.*}} @@ -102,9 +120,10 @@ void skep3<KN<3>>(K<3> k) { // CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} KT // CHECK-NEXT: | |-FunctionDecl {{.*}} skep3 'void (KT)' // CHECK-NEXT: | | |-ParmVarDecl {{.*}} k 'KT' -// CHECK-NEXT: | | |-CompoundStmt {{.*}} -// CHECK-NEXT: | | | `-CallExpr {{.*}} '<dependent type>' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' +// CHECK-NEXT: | | |-UnresolvedSYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | | `-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' // CHECK-NEXT: | | `-SYCLKernelEntryPointAttr {{.*}} KNT // CHECK-NEXT: | `-Function {{.*}} 'skep3' 'void (K<3>)' @@ -123,6 +142,15 @@ void skep3<KN<3>>(K<3> k) { // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const' // CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const K<3>' lvalue <NoOp> // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<3>' lvalue ParmVar {{.*}} 'k' 'K<3>' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, K<3>)' <FunctionToPointerDecay> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, K<3>)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, K<3>)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay> +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi3EE" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'K<3>' 'void (K<3> &&) noexcept' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'K<3>' xvalue <NoOp> +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<3>' lvalue ParmVar {{.*}} 'k' 'K<3>' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<3>' // CHECK-NEXT: | | `-CompoundStmt {{.*}} @@ -152,6 +180,21 @@ void skep4(K<4> k, int p1, int p2) { // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p1' 'int' // CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p2' 'int' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, K<4>, int, int)' <FunctionToPointerDecay> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, K<4>, int, int)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, K<4>, int, int)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay> +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi4EE" +// CHECK-NEXT: | | | |-CXXConstructExpr {{.*}} 'K<4>' 'void (K<4> &&) noexcept' +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'K<4>' xvalue <NoOp> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'K<4>' lvalue ParmVar {{.*}} 'k' 'K<4>' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p1' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp> +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p2' 'int' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<4>' // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used p1 'int' @@ -182,7 +225,28 @@ void skep5(int unused1, K<5> k, int unused2, int p, int unused3) { // CHECK-NEXT: | |-ParmVarDecl {{.*}} unused3 'int' // CHECK-NEXT: | |-SYCLKernelCallStmt {{.*}} // CHECK-NEXT: | | |-CompoundStmt {{.*}} -// CHECK: | | `-OutlinedFunctionDecl {{.*}} +// CHECK: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, int, K<5>, int, int, int)' <FunctionToPointerDecay> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, int, K<5>, int, int, int)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, int, K<5>, int, int, int)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay> +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi5EE" +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'unused1' 'int' +// CHECK-NEXT: | | | |-CXXConstructExpr {{.*}} 'K<5>' 'void (K<5> &&) noexcept' +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'K<5>' xvalue <NoOp> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'K<5>' lvalue ParmVar {{.*}} 'k' 'K<5>' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'unused2' 'int' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp> +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'unused3' 'int' +// CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit unused1 'int' // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<5>' // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit unused2 'int' @@ -227,6 +291,14 @@ void skep6(const S6 &k) { // CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)() const' <FunctionToPointerDecay> // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const' // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'const S6' lvalue ParmVar {{.*}} 'k' 'const S6 &' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, S6)' <FunctionToPointerDecay> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, S6)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, S6)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay> +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi6EE" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'S6' 'void (const S6 &) noexcept' +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'const S6' lvalue ParmVar {{.*}} 'k' 'const S6 &' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'const S6 &' // CHECK-NEXT: | | `-CompoundStmt {{.*}} @@ -260,6 +332,15 @@ void skep7(S7 k) { // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const' // CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const S7' lvalue <NoOp> // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S7' lvalue ParmVar {{.*}} 'k' 'S7' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, S7)' <FunctionToPointerDecay> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, S7)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, S7)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay> +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi7EE" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'S7' 'void (S7 &&) noexcept' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'S7' xvalue <NoOp> +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S7' lvalue ParmVar {{.*}} 'k' 'S7' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'S7' // CHECK-NEXT: | | `-CompoundStmt {{.*}} @@ -270,6 +351,114 @@ void skep7(S7 k) { // CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'S7' lvalue ImplicitParam {{.*}} 'k' 'S7' // CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<7> +// Symbol names generated for the kernel entry point function should be +// representable in the ordinary literal encoding even when the kernel name +// type is named with esoteric characters. +struct \u03b4\u03c4\u03c7; // Delta Tau Chi (δτχ) +struct S8 { + void operator()() const; +}; +[[clang::sycl_kernel_entry_point(\u03b4\u03c4\u03c7)]] +void skep8(S8 k) { + k(); +} +// CHECK: |-FunctionDecl {{.*}} skep8 'void (S8)' +// CHECK-NEXT: | |-ParmVarDecl {{.*}} used k 'S8' +// CHECK-NEXT: | |-SYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, S8)' <FunctionToPointerDecay> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, S8)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, S8)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay> +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[12]' lvalue "_ZTS6\316\264\317\204\317\207" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'S8' 'void (S8 &&) noexcept' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'S8' xvalue <NoOp> +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S8' lvalue ParmVar {{.*}} 'k' 'S8' +// CHECK: | | `-OutlinedFunctionDecl {{.*}} +// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} + +class Handler { + template <typename KNT, typename... Ts> + void sycl_kernel_launch(const char *, Ts...) {} +public: + template<typename KNT, typename KT> + [[clang::sycl_kernel_entry_point(KNT)]] + void skep9(KT k, int a, int b) { + k(a, b); + } +}; +void foo() { + Handler H; + H.skep9<KN<9>>([=] (int a, int b) { return a+b; }, 1, 2); +} + +// CHECK: | |-FunctionTemplateDecl {{.*}} skep9 +// CHECK-NEXT: | | |-TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 0 KNT +// CHECK-NEXT: | | |-TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 1 KT +// CHECK-NEXT: | | |-CXXMethodDecl {{.*}} skep9 'void (KT, int, int)' implicit-inline +// CHECK-NEXT: | | | |-ParmVarDecl {{.*}} referenced k 'KT' +// CHECK-NEXT: | | | |-ParmVarDecl {{.*}} referenced a 'int' +// CHECK-NEXT: | | | |-ParmVarDecl {{.*}} referenced b 'int' +// CHECK-NEXT: | | | |-UnresolvedSYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | | | `-CompoundStmt {{.*}} +// CHECK-NEXT: | | | | `-CallExpr {{.*}} '<dependent type>' +// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' +// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'a' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'b' 'int' +// CHECK-NEXT: | | | `-SYCLKernelEntryPointAttr {{.*}} KNT +// CHECK-NEXT: | | `-CXXMethodDecl {{.*}} used skep9 {{.*}} implicit_instantiation implicit-inline instantiated_from 0x{{.*}} +// CHECK-NEXT: | | |-TemplateArgument type 'KN<9>' +// CHECK-NEXT: | | | `-RecordType {{.*}} 'KN<9>' canonical +// CHECK-NEXT: | | | `-ClassTemplateSpecialization {{.*}}'KN' +// CHECK-NEXT: | | |-TemplateArgument type {{.*}} +// CHECK-NEXT: | | | `-RecordType {{.*}} +// CHECK-NEXT: | | | `-CXXRecord {{.*}} +// CHECK-NEXT: | | |-ParmVarDecl {{.*}} used k {{.*}} +// CHECK-NEXT: | | |-ParmVarDecl {{.*}} used a 'int' +// CHECK-NEXT: | | |-ParmVarDecl {{.*}} used b 'int' +// CHECK-NEXT: | | |-SYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | | `-CXXOperatorCallExpr {{.*}} 'int' '()' +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int (*)(int, int) const' <FunctionToPointerDecay> +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int (int, int) const' lvalue CXXMethod {{.*}} 'operator()' 'int (int, int) const' +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} lvalue <NoOp> +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} lvalue ParmVar {{.*}} 'k' {{.*}} +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'a' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'b' 'int' +// CHECK-NEXT: | | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | | `-CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: | | | | |-MemberExpr {{.*}} '<bound member function type>' ->sycl_kernel_launch {{.*}} +// CHECK-NEXT: | | | | | `-CXXThisExpr {{.*}} 'Handler *' implicit this +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay> +// CHECK-NEXT: | | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi9EE" +// CHECK-NEXT: | | | | |-CXXConstructExpr {{.*}} +// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} xvalue <NoOp> +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} lvalue ParmVar {{.*}} 'k' {{.*}} +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp> +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'a' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'b' 'int' +// CHECK-NEXT: | | | `-OutlinedFunctionDecl {{.*}} +// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} implicit used k {{.*}} +// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} implicit used a 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} implicit used b 'int' +// CHECK-NEXT: | | | `-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CXXOperatorCallExpr {{.*}} 'int' '()' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int (*)(int, int) const' <FunctionToPointerDecay> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int (int, int) const' lvalue CXXMethod {{.*}} 'operator()' 'int (int, int) const' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} lvalue <NoOp> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} lvalue ImplicitParam {{.*}} 'k' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ImplicitParam {{.*}} 'a' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue> +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ImplicitParam {{.*}} 'b' 'int' +// CHECK-NEXT: | | `-SYCLKernelEntryPointAttr {{.*}} struct KN<9> + void the_end() {} // CHECK: `-FunctionDecl {{.*}} the_end 'void ()' diff --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp index 0171f72df0b3..011f48e91c29 100644 --- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp +++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp @@ -28,6 +28,9 @@ // A unique kernel name type is required for each declared kernel entry point. template<int, int=0> struct KN; +template<typename KernelName, typename... Ts> +void sycl_kernel_launch(const char *, Ts... Args) {} + [[clang::sycl_kernel_entry_point(KN<1>)]] void skep1() { } diff --git a/clang/test/ASTSYCL/ast-print-sycl-kernel-call.cpp b/clang/test/ASTSYCL/ast-print-sycl-kernel-call.cpp new file mode 100644 index 000000000000..5adaa367ed9c --- /dev/null +++ b/clang/test/ASTSYCL/ast-print-sycl-kernel-call.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -fsycl-is-host -ast-print %s -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -ast-print %s -o - | FileCheck %s + +struct sycl_kernel_launcher { + template<typename KernelName, typename... Ts> + void sycl_kernel_launch(const char *, Ts...) {} + + template<typename KernelName, typename KernelType> + [[clang::sycl_kernel_entry_point(KernelName)]] + void sycl_kernel_entry_point(KernelType kernel) { + kernel(); + } +}; +// CHECK: template <typename KernelName, typename KernelType> void sycl_kernel_entry_point(KernelType kernel) +// CHECK-NEXT: { +// CHECK-NEXT: kernel(); +// CHECK-NEXT: } +// CHECK: template<> void sycl_kernel_entry_point<KN, (lambda at {{.*}})>((lambda at {{.*}}) kernel) +// CHECK-NEXT: { +// CHECK-NEXT: kernel(); +// CHECK-NEXT: } + +void f(sycl_kernel_launcher skl) { + skl.sycl_kernel_entry_point<struct KN>([]{}); +} diff --git a/clang/test/CIR/CodeGen/new-delete.cpp b/clang/test/CIR/CodeGen/new-delete.cpp new file mode 100644 index 000000000000..58db8f8646f4 --- /dev/null +++ b/clang/test/CIR/CodeGen/new-delete.cpp @@ -0,0 +1,164 @@ +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -triple=x86_64-linux-gnu -fclangir -emit-cir -std=c++98 -fcxx-exceptions -fexceptions -o %t.cir +// RUN: FileCheck -check-prefixes=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -triple=x86_64-linux-gnu -fclangir -emit-llvm -std=c++98 -fcxx-exceptions -fexceptions -o %t-cir.ll +// RUN: FileCheck -check-prefixes=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -triple=x86_64-linux-gnu -emit-llvm -std=c++98 -fcxx-exceptions -fexceptions -o %t.ll +// RUN: FileCheck -check-prefixes=OGCG --input-file=%t.ll %s + + +struct A { A(int); ~A(); void *p; }; + +A *a() { + return new A(5); +} + +// CIR: cir.func {{.*}} @_Z1av() -> !cir.ptr<!rec_A> { +// CIR: %[[RETVAL:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["__retval"] +// CIR: %[[NEW_RESULT:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["__new_result"] +// CIR: %[[ALLOC_SIZE:.*]] = cir.const #cir.int<8> : !u64i +// CIR: %[[PTR:.*]] = cir.call @_Znwm(%[[ALLOC_SIZE]]) +// CIR: cir.cleanup.scope { +// CIR: %[[PTR_A:.*]] = cir.cast bitcast %[[PTR]] : !cir.ptr<!void> -> !cir.ptr<!rec_A> +// CIR: cir.store{{.*}} %[[PTR_A]], %[[NEW_RESULT]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>> +// CIR: %[[FIVE:.*]] = cir.const #cir.int<5> : !s32i +// CIR: cir.call @_ZN1AC1Ei(%[[PTR_A]], %[[FIVE]]) +// CIR: cir.yield +// CIR: } cleanup eh { +// CIR: cir.call @_ZdlPv(%[[PTR]]) nothrow : (!cir.ptr<!void>) -> () +// CIR: cir.yield +// CIR: } + +// LLVM: define {{.*}} ptr @_Z1av() {{.*}} personality ptr @__gxx_personality_v0 { +// LLVM: %[[RETVAL:.*]] = alloca ptr +// LLVM: %[[NEW_RESULT:.*]] = alloca ptr +// LLVM: %[[PTR:.*]] = call ptr @_Znwm(i64 8) +// LLVM: br label %[[EH_SCOPE:.*]] +// LLVM: [[EH_SCOPE]]: +// LLVM: store ptr %[[PTR]], ptr %[[NEW_RESULT]] +// LLVM: invoke void @_ZN1AC1Ei(ptr %[[PTR]], i32 5) +// LLVM: to label %[[INVOKE_CONT:.*]] unwind label %[[UNWIND:.*]] +// LLVM: [[INVOKE_CONT]]: +// LLVM: br label %[[EH_SCOPE_END:.*]] +// LLVM: [[UNWIND]]: +// LLVM: %[[EXN:.*]] = landingpad { ptr, i32 } +// LLVM: cleanup +// LLVM: %[[EXN_PTR:.*]] = extractvalue { ptr, i32 } %[[EXN]], 0 +// LLVM: %[[TYPEID:.*]] = extractvalue { ptr, i32 } %[[EXN]], 1 +// LLVM: br label %[[EH_CLEANUP:.*]] +// LLVM: [[EH_CLEANUP]]: +// LLVM: %[[EXN_PTR_PHI:.*]] = phi ptr [ %[[EXN_PTR]], %[[UNWIND]] ] +// LLVM: %[[TYPEID_PHI:.*]] = phi i32 [ %[[TYPEID]], %[[UNWIND]] ] +// LLVM: call void @_ZdlPv(ptr %[[PTR]]) +// LLVM: %[[EXN_INSERT:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXN_PTR_PHI]], 0 +// LLVM: %[[EXN_INSERT_2:.*]] = insertvalue { ptr, i32 } %[[EXN_INSERT]], i32 %[[TYPEID_PHI]], 1 +// LLVM: resume { ptr, i32 } %[[EXN_INSERT_2]] +// LLVM: [[EH_SCOPE_END]]: +// LLVM: %[[LOAD:.*]] = load ptr, ptr %[[NEW_RESULT]] +// LLVM: store ptr %[[LOAD]], ptr %[[RETVAL]] +// LLVM: %[[RET:.*]] = load ptr, ptr %[[RETVAL]] +// LLVM: ret ptr %[[RET]] + +// OGCG: define {{.*}} ptr @_Z1av() {{.*}} personality ptr @__gxx_personality_v0 { +// OGCG: %[[EXN_SLOT:.*]] = alloca ptr +// OGCG: %[[EHSELECTOR_SLOT:.*]] = alloca i32 +// OGCG: %[[PTR:.*]] = call {{.*}} ptr @_Znwm(i64 8) +// OGCG: invoke void @_ZN1AC1Ei(ptr {{.*}} %[[PTR]], i32 5) +// OGCG: to label %[[INVOKE_CONT:.*]] unwind label %[[UNWIND:.*]] +// OGCG: [[INVOKE_CONT]]: +// OGCG: ret ptr %[[PTR]] +// OGCG: [[UNWIND]]: +// OGCG: %[[EXN:.*]] = landingpad { ptr, i32 } +// OGCG: cleanup +// OGCG: %[[EXN_PTR:.*]] = extractvalue { ptr, i32 } %[[EXN]], 0 +// OGCG: store ptr %[[EXN_PTR]], ptr %[[EXN_SLOT]] +// OGCG: %[[TYPEID:.*]] = extractvalue { ptr, i32 } %[[EXN]], 1 +// OGCG: store i32 %[[TYPEID]], ptr %[[EHSELECTOR_SLOT]] +// OGCG: call void @_ZdlPv(ptr %[[PTR]]) +// OGCG: br label %[[EH_RESUME:.*]] +// OGCG: [[EH_RESUME]]: +// OGCG: %[[EXN_PTR:.*]] = load ptr, ptr %[[EXN_SLOT]] +// OGCG: %[[EHSELECTOR:.*]] = load i32, ptr %[[EHSELECTOR_SLOT]] +// OGCG: %[[EXN_INSERT:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXN_PTR]], 0 +// OGCG: %[[EXN_INSERT_2:.*]] = insertvalue { ptr, i32 } %[[EXN_INSERT]], i32 %[[EHSELECTOR]], 1 +// OGCG: resume { ptr, i32 } %[[EXN_INSERT_2]] + +A *b() { + extern int foo(); + return new A(foo()); +} + +// CIR: cir.func {{.*}} @_Z1bv() -> !cir.ptr<!rec_A> { +// CIR: %[[RETVAL:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["__retval"] +// CIR: %[[NEW_RESULT:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["__new_result"] +// CIR: %[[ALLOC_SIZE:.*]] = cir.const #cir.int<8> : !u64i +// CIR: %[[PTR:.*]] = cir.call @_Znwm(%[[ALLOC_SIZE]]) +// CIR: cir.cleanup.scope { +// CIR: %[[PTR_A:.*]] = cir.cast bitcast %[[PTR]] : !cir.ptr<!void> -> !cir.ptr<!rec_A> +// CIR: cir.store{{.*}} %[[PTR_A]], %[[NEW_RESULT]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>> +// CIR: %[[FOO:.*]] = cir.call @_Z3foov() : () -> !s32i +// CIR: cir.call @_ZN1AC1Ei(%[[PTR_A]], %[[FOO]]) +// CIR: cir.yield +// CIR: } cleanup eh { +// CIR: cir.call @_ZdlPv(%[[PTR]]) nothrow : (!cir.ptr<!void>) -> () +// CIR: cir.yield +// CIR: } + +// LLVM: define {{.*}} ptr @_Z1bv() {{.*}} personality ptr @__gxx_personality_v0 { +// LLVM: %[[RETVAL:.*]] = alloca ptr +// LLVM: %[[NEW_RESULT:.*]] = alloca ptr +// LLVM: %[[PTR:.*]] = call ptr @_Znwm(i64 8) +// LLVM: br label %[[EH_SCOPE:.*]] +// LLVM: [[EH_SCOPE]]: +// LLVM: store ptr %[[PTR]], ptr %[[NEW_RESULT]] +// LLVM: %[[FOO:.*]] = invoke i32 @_Z3foov() +// LLVM: to label %[[INVOKE_CONT:.*]] unwind label %[[UNWIND:.*]] +// LLVM: [[INVOKE_CONT]]: +// LLVM: invoke void @_ZN1AC1Ei(ptr %[[PTR]], i32 %[[FOO]]) +// LLVM: to label %[[INVOKE_CONT_2:.*]] unwind label %[[UNWIND:.*]] +// LLVM: [[INVOKE_CONT_2]]: +// LLVM: br label %[[EH_SCOPE_END:.*]] +// LLVM: [[UNWIND]]: +// LLVM: %[[EXN:.*]] = landingpad { ptr, i32 } +// LLVM: cleanup +// LLVM: %[[EXN_PTR:.*]] = extractvalue { ptr, i32 } %[[EXN]], 0 +// LLVM: %[[TYPEID:.*]] = extractvalue { ptr, i32 } %[[EXN]], 1 +// LLVM: br label %[[EH_CLEANUP:.*]] +// LLVM: [[EH_CLEANUP]]: +// LLVM: %[[EXN_PTR_PHI:.*]] = phi ptr [ %[[EXN_PTR]], %[[UNWIND]] ] +// LLVM: %[[TYPEID_PHI:.*]] = phi i32 [ %[[TYPEID]], %[[UNWIND]] ] +// LLVM: call void @_ZdlPv(ptr %[[PTR]]) +// LLVM: %[[EXN_INSERT:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXN_PTR_PHI]], 0 +// LLVM: %[[EXN_INSERT_2:.*]] = insertvalue { ptr, i32 } %[[EXN_INSERT]], i32 %[[TYPEID_PHI]], 1 +// LLVM: resume { ptr, i32 } %[[EXN_INSERT_2]] +// LLVM: [[EH_SCOPE_END]]: +// LLVM: %[[LOAD:.*]] = load ptr, ptr %[[NEW_RESULT]] +// LLVM: store ptr %[[LOAD]], ptr %[[RETVAL]] +// LLVM: %[[RET:.*]] = load ptr, ptr %[[RETVAL]] +// LLVM: ret ptr %[[RET]] + +// OGCG: define {{.*}} ptr @_Z1bv() {{.*}} personality ptr @__gxx_personality_v0 { +// OGCG: %[[EXN_SLOT:.*]] = alloca ptr +// OGCG: %[[EHSELECTOR_SLOT:.*]] = alloca i32 +// OGCG: %[[PTR:.*]] = call {{.*}} ptr @_Znwm(i64 8) +// OGCG: %[[FOO:.*]] = invoke i32 @_Z3foov() +// OGCG: to label %[[INVOKE_CONT:.*]] unwind label %[[UNWIND:.*]] +// OGCG: [[INVOKE_CONT]]: +// OGCG: invoke void @_ZN1AC1Ei(ptr {{.*}} %[[PTR]], i32 %[[FOO]]) +// OGCG: to label %[[INVOKE_CONT_2:.*]] unwind label %[[UNWIND:.*]] +// OGCG: [[INVOKE_CONT_2]]: +// OGCG: ret ptr %[[PTR]] +// OGCG: [[UNWIND]]: +// OGCG: %[[EXN:.*]] = landingpad { ptr, i32 } +// OGCG: cleanup +// OGCG: %[[EXN_PTR:.*]] = extractvalue { ptr, i32 } %[[EXN]], 0 +// OGCG: store ptr %[[EXN_PTR]], ptr %[[EXN_SLOT]] +// OGCG: %[[TYPEID:.*]] = extractvalue { ptr, i32 } %[[EXN]], 1 +// OGCG: store i32 %[[TYPEID]], ptr %[[EHSELECTOR_SLOT]] +// OGCG: call void @_ZdlPv(ptr %[[PTR]]) +// OGCG: br label %[[EH_RESUME:.*]] +// OGCG: [[EH_RESUME]]: +// OGCG: %[[EXN_PTR:.*]] = load ptr, ptr %[[EXN_SLOT]] +// OGCG: %[[EHSELECTOR:.*]] = load i32, ptr %[[EHSELECTOR_SLOT]] +// OGCG: %[[EXN_INSERT:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXN_PTR]], 0 +// OGCG: %[[EXN_INSERT_2:.*]] = insertvalue { ptr, i32 } %[[EXN_INSERT]], i32 %[[EHSELECTOR]], 1 +// OGCG: resume { ptr, i32 } %[[EXN_INSERT_2]] diff --git a/clang/test/CodeGen/AArch64/neon-misc.c b/clang/test/CodeGen/AArch64/neon-misc.c index 6eadaaf27a21..ac2c83aa03cc 100644 --- a/clang/test/CodeGen/AArch64/neon-misc.c +++ b/clang/test/CodeGen/AArch64/neon-misc.c @@ -7,313 +7,8 @@ #include <arm_neon.h> -// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_s8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> -// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]] -// -uint8x8_t test_vceqz_s8(int8x8_t a) { - return vceqz_s8(a); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vceqz_s16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> -// CHECK-NEXT: ret <4 x i16> [[VCEQZ_I]] -// -uint16x4_t test_vceqz_s16(int16x4_t a) { - return vceqz_s16(a); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_s32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> -// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]] -// -uint32x2_t test_vceqz_s32(int32x2_t a) { - return vceqz_s32(a); -} - -// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_s64( -// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> -// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]] -// -uint64x1_t test_vceqz_s64(int64x1_t a) { - return vceqz_s64(a); -} - -// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_u64( -// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> -// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]] -// -uint64x1_t test_vceqz_u64(uint64x1_t a) { - return vceqz_u64(a); -} - -// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_p64( -// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> -// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]] -// -uint64x1_t test_vceqz_p64(poly64x1_t a) { - return vceqz_p64(a); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_s8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> -// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]] -// -uint8x16_t test_vceqzq_s8(int8x16_t a) { - return vceqzq_s8(a); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vceqzq_s16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[VCEQZ_I]] -// -uint16x8_t test_vceqzq_s16(int16x8_t a) { - return vceqzq_s16(a); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_s32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]] -// -uint32x4_t test_vceqzq_s32(int32x4_t a) { - return vceqzq_s32(a); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_s64( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> -// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]] -// -uint64x2_t test_vceqzq_s64(int64x2_t a) { - return vceqzq_s64(a); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_u8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> -// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]] -// -uint8x8_t test_vceqz_u8(uint8x8_t a) { - return vceqz_u8(a); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vceqz_u16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> -// CHECK-NEXT: ret <4 x i16> [[VCEQZ_I]] -// -uint16x4_t test_vceqz_u16(uint16x4_t a) { - return vceqz_u16(a); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_u32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> -// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]] -// -uint32x2_t test_vceqz_u32(uint32x2_t a) { - return vceqz_u32(a); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_u8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> -// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]] -// -uint8x16_t test_vceqzq_u8(uint8x16_t a) { - return vceqzq_u8(a); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vceqzq_u16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[VCEQZ_I]] -// -uint16x8_t test_vceqzq_u16(uint16x8_t a) { - return vceqzq_u16(a); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_u32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]] -// -uint32x4_t test_vceqzq_u32(uint32x4_t a) { - return vceqzq_u32(a); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_u64( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> -// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]] -// -uint64x2_t test_vceqzq_u64(uint64x2_t a) { - return vceqzq_u64(a); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_f32( -// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x float> [[TMP2]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> -// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]] -// -uint32x2_t test_vceqz_f32(float32x2_t a) { - return vceqz_f32(a); -} - -// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_f64( -// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 -// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <1 x double> [[TMP2]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64> -// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]] -// -uint64x1_t test_vceqz_f64(float64x1_t a) { - return vceqz_f64(a); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_f32( -// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]] -// -uint32x4_t test_vceqzq_f32(float32x4_t a) { - return vceqzq_f32(a); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_p8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> -// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]] -// -uint8x8_t test_vceqz_p8(poly8x8_t a) { - return vceqz_p8(a); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_p8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> -// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]] -// -uint8x16_t test_vceqzq_p8(poly8x16_t a) { - return vceqzq_p8(a); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_f64( -// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x double> [[TMP2]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> -// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]] -// -uint64x2_t test_vceqzq_f64(float64x2_t a) { - return vceqzq_f64(a); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_p64( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> -// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]] -// -uint64x2_t test_vceqzq_p64(poly64x2_t a) { - return vceqzq_p64(a); -} - // CHECK-LABEL: define dso_local <8 x i8> @test_vcgez_s8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = icmp sge <8 x i8> [[A]], zeroinitializer // CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c index ab424fc08f17..77b6c09de857 100644 --- a/clang/test/CodeGen/AArch64/neon/fullfp16.c +++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c @@ -17,10 +17,6 @@ // hence for CIR we use `opt -passes=simplifycfg` to reduce the control flow // and to make LLVM IR match for all paths. // -// Minor differences between RUN lines (e.g., the presence of `noundef` on -// arguments or the `align` attribute on pointers) are matched using -// catch-alls such as `{{.*}}`. -// // TODO: Remove `-simplifycfg` once CIR lowering includes the relevant // optimizations to reduce the CFG. // diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c index 2d476ad3028b..a711245b3372 100644 --- a/clang/test/CodeGen/AArch64/neon/intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c @@ -7,9 +7,8 @@ //============================================================================= // NOTES // -// Minor differences between RUNs (e.g. presence of `noundef` attached to -// argumens, `align` attribute attached to pointers), are matched using -// catch-alls like {{.*}}. +// ACLE section headings based on v2025Q2 of the ACLE specification: +// * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#bitwise-equal-to-zero // // Different labels for CIR stem from an additional function call that is // present at the AST and CIR levels, but is inlined at the LLVM IR level. @@ -17,6 +16,20 @@ #include <arm_neon.h> +// LLVM-LABEL: @test_vnegd_s64 +// CIR-LABEL: @vnegd_s64 +int64_t test_vnegd_s64(int64_t a) { +// CIR: cir.unary(minus, {{.*}}) : !s64 + +// LLVM-SAME: i64 {{.*}} [[A:%.*]]) +// LLVM: [[VNEGD_I:%.*]] = sub i64 0, [[A]] +// LLVM-NEXT: ret i64 [[VNEGD_I]] + return (int64_t)vnegd_s64(a); +} + +//===------------------------------------------------------===// +// 2.1.2.2 Bitwise equal to zero +//===------------------------------------------------------===// // LLVM-LABEL: @test_vceqzd_s64 // CIR-LABEL: @vceqzd_s64 uint64_t test_vceqzd_s64(int64_t a) { @@ -32,15 +45,363 @@ uint64_t test_vceqzd_s64(int64_t a) { return (uint64_t)vceqzd_s64(a); } -// LLVM-LABEL: @test_vnegd_s64 -// CIR-LABEL: @vnegd_s64 -int64_t test_vnegd_s64(int64_t a) { -// CIR: cir.unary(minus, {{.*}}) : !s64 +// LLVM-LABEL: @test_vceqz_s8( +// CIR-LABEL: @vceqz_s8( +uint8x8_t test_vceqz_s8(int8x8_t a) { +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !s8i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !s8i> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// LLVM: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> +// LLVM-NEXT: ret <8 x i8> [[VCEQZ_I]] + return vceqz_s8(a); +} -// LLVM-SAME: i64{{.*}} [[A:%.*]]) -// LLVM: [[VNEGD_I:%.*]] = sub i64 0, [[A]] -// LLVM-NEXT: ret i64 [[VNEGD_I]] - return (int64_t)vnegd_s64(a); +// LLVM-LABEL: @test_vceqz_s16( +// CIR-LABEL: @vceqz_s16( +uint16x4_t test_vceqz_s16(int16x4_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !s16i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !s16i>, !cir.vector<4 x !s16i> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> +// LLVM-NEXT: ret <4 x i16> [[VCEQZ_I]] + return vceqz_s16(a); +} + +// LLVM-LABEL: @test_vceqz_s32( +// CIR-LABEL: @vceqz_s32( +uint32x2_t test_vceqz_s32(int32x2_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !s32i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !s32i>, !cir.vector<2 x !s32i> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// LLVM-NEXT: ret <2 x i32> [[VCEQZ_I]] + return vceqz_s32(a); +} + +// LLVM-LABEL: @test_vceqz_s64( +// CIR-LABEL: @vceqz_s64( +uint64x1_t test_vceqz_s64(int64x1_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !s64i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !s64i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !s64i>, !cir.vector<1 x !s64i> + +// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]] + return vceqz_s64(a); +} + +// LLVM-LABEL: @test_vceqz_u64( +// CIR-LABEL: @vceqz_u64( +uint64x1_t test_vceqz_u64(uint64x1_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !u64i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !u64i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !u64i>, !cir.vector<1 x !s64i> + +// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]] + return vceqz_u64(a); +} + +// LLVM-LABEL: @test_vceqz_p64( +// CIR-LABEL: @vceqz_p64( +uint64x1_t test_vceqz_p64(poly64x1_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !s64i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !s64i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !s64i>, !cir.vector<1 x !s64i> + +// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]] + return vceqz_p64(a); +} + +// LLVM-LABEL: @test_vceqzq_s8( +// CIR-LABEL: @vceqzq_s8( +uint8x16_t test_vceqzq_s8(int8x16_t a) { +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<16 x !s8i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<16 x !s8i> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> +// LLVM-NEXT: ret <16 x i8> [[VCEQZ_I]] + return vceqzq_s8(a); +} + +// LLVM-LABEL: @test_vceqzq_s16( +// CIR-LABEL: @vceqzq_s16( +uint16x8_t test_vceqzq_s16(int16x8_t a) { +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> +// LLVM-NEXT: ret <8 x i16> [[VCEQZ_I]] + return vceqzq_s16(a); +} + +// LLVM-LABEL: @test_vceqzq_s32( +// CIR-LABEL: @vceqzq_s32( +uint32x4_t test_vceqzq_s32(int32x4_t a) { +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// LLVM-NEXT: ret <4 x i32> [[VCEQZ_I]] + return vceqzq_s32(a); +} + +// LLVM-LABEL: @test_vceqzq_s64( +// CIR-LABEL: @vceqzq_s64( +uint64x2_t test_vceqzq_s64(int64x2_t a) { +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]] + return vceqzq_s64(a); +} + +// LLVM-LABEL: @test_vceqz_u8( +// CIR-LABEL: @vceqz_u8( +uint8x8_t test_vceqz_u8(uint8x8_t a) { +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !u8i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !u8i> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> +// LLVM-NEXT: ret <8 x i8> [[VCEQZ_I]] + return vceqz_u8(a); +} + +// LLVM-LABEL: @test_vceqz_u16( +// CIR-LABEL: @vceqz_u16( +uint16x4_t test_vceqz_u16(uint16x4_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !u16i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !u16i>, !cir.vector<4 x !s16i> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> +// LLVM-NEXT: ret <4 x i16> [[VCEQZ_I]] + return vceqz_u16(a); +} + +// LLVM-LABEL: @test_vceqz_u32( +// CIR-LABEL: @vceqz_u32( +uint32x2_t test_vceqz_u32(uint32x2_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !u32i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !u32i>, !cir.vector<2 x !s32i> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// LLVM-NEXT: ret <2 x i32> [[VCEQZ_I]] + return vceqz_u32(a); +} + +// LLVM-LABEL: @test_vceqzq_u8( +// CIR-LABEL: @vceqzq_u8( +uint8x16_t test_vceqzq_u8(uint8x16_t a) { +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<16 x !u8i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<16 x !u8i> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> +// LLVM-NEXT: ret <16 x i8> [[VCEQZ_I]] + return vceqzq_u8(a); +} + +// LLVM-LABEL: @test_vceqzq_u16( +// CIR-LABEL: @vceqzq_u16( +uint16x8_t test_vceqzq_u16(uint16x8_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !u16i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !u16i>, !cir.vector<8 x !s16i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> +// LLVM-NEXT: ret <8 x i16> [[VCEQZ_I]] + return vceqzq_u16(a); +} + +// LLVM-LABEL: @test_vceqzq_u32( +// CIR-LABEL: @vceqzq_u32( +uint32x4_t test_vceqzq_u32(uint32x4_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !u32i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !u32i>, !cir.vector<4 x !s32i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// LLVM-NEXT: ret <4 x i32> [[VCEQZ_I]] + return vceqzq_u32(a); +} + +// LLVM-LABEL: @test_vceqzq_u64( +// CIR-LABEL: @vceqzq_u64( +uint64x2_t test_vceqzq_u64(uint64x2_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !u64i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !u64i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i> + +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]] + return vceqzq_u64(a); +} + +// LLVM-LABEL: @test_vceqz_f32( +// CIR-LABEL: @vceqz_f32( +uint32x2_t test_vceqz_f32(float32x2_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.float> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !cir.float>, !cir.vector<2 x !s32i> + +// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> +// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x float> [[TMP2]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> +// LLVM-NEXT: ret <2 x i32> [[VCEQZ_I]] + return vceqz_f32(a); +} + +// LLVM-LABEL: @test_vceqz_f64( +// CIR-LABEL: @vceqz_f64( +uint64x1_t test_vceqz_f64(float64x1_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !cir.double> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !cir.double> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !cir.double>, !cir.vector<1 x !s64i> + +// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 +// LLVM-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> +// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <1 x double> [[TMP2]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64> +// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]] + return vceqz_f64(a); +} + +// LLVM-LABEL: @test_vceqzq_f32( +// CIR-LABEL: @vceqzq_f32( +uint32x4_t test_vceqzq_f32(float32x4_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.float> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !cir.float>, !cir.vector<4 x !s32i> + +// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> +// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +// LLVM-NEXT: ret <4 x i32> [[VCEQZ_I]] + return vceqzq_f32(a); +} + +// LLVM-LABEL: @test_vceqz_p8( +// CIR-LABEL: @vceqz_p8( +uint8x8_t test_vceqz_p8(poly8x8_t a) { +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !s8i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !s8i> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> +// LLVM-NEXT: ret <8 x i8> [[VCEQZ_I]] + return vceqz_p8(a); +} + +// LLVM-LABEL: @test_vceqzq_p8( +// CIR-LABEL: @vceqzq_p8( +uint8x16_t test_vceqzq_p8(poly8x16_t a) { +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<16 x !s8i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<16 x !s8i> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> +// LLVM-NEXT: ret <16 x i8> [[VCEQZ_I]] + return vceqzq_p8(a); +} + +// LLVM-LABEL: @test_vceqzq_f64( +// CIR-LABEL: @vceqzq_f64( +uint64x2_t test_vceqzq_f64(float64x2_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !cir.double> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.double> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !cir.double>, !cir.vector<2 x !s64i> + +// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> +// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x double> [[TMP2]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> +// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]] + return vceqzq_f64(a); +} + +// LLVM-LABEL: @test_vceqzq_p64( +// CIR-LABEL: @vceqzq_p64( +uint64x2_t test_vceqzq_p64(poly64x2_t a) { +// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i> +// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !s64i> +// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i> + +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] { +// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> +// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer +// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]] + return vceqzq_p64(a); } //===------------------------------------------------------===// @@ -51,7 +412,7 @@ int64_t test_vnegd_s64(int64_t a) { int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s8i>, !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i> -// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) +// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]]) // LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) // LLVM-NEXT: ret <8 x i8> [[VABD_I]] return vabd_s8(v1, v2); @@ -64,7 +425,7 @@ int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) +// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -81,7 +442,7 @@ int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] -// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) +// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -96,7 +457,7 @@ int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !u8i>, !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i> -// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) +// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]]) // LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) // LLVM-NEXT: ret <8 x i8> [[VABD_I]] return vabd_u8(v1, v2); @@ -109,7 +470,7 @@ uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) +// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -126,7 +487,7 @@ uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] -// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) +// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -143,7 +504,7 @@ float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float> // CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]] -// LLVM-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) +// LLVM-SAME: <2 x float> {{.*}} [[V1:%.*]], <2 x float> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32> // LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> @@ -162,7 +523,7 @@ float64x1_t test_vabd_f64(float64x1_t v1, float64x1_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !cir.double> // CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]] -// LLVM-SAME: <1 x double> noundef [[V1:%.*]], <1 x double> noundef [[V2:%.*]]) +// LLVM-SAME: <1 x double> {{.*}} [[V1:%.*]], <1 x double> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[V1]] to i64 // LLVM-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 // LLVM-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[V2]] to i64 @@ -181,7 +542,7 @@ float64x1_t test_vabd_f64(float64x1_t v1, float64x1_t v2) { int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i> -// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) +// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]]) // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]]) // LLVM-NEXT: ret <16 x i8> [[VABD_I]] return vabdq_s8(v1, v2); @@ -194,7 +555,7 @@ int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] -// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) +// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -211,7 +572,7 @@ int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) +// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -226,7 +587,7 @@ int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : (!cir.vector<16 x !u8i>, !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i> -// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) +// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]]) // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]]) // LLVM-NEXT: ret <16 x i8> [[VABD_I]] return vabdq_u8(v1, v2); @@ -239,7 +600,7 @@ uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] -// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) +// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -256,7 +617,7 @@ uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) +// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -273,7 +634,7 @@ float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float> // CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) +// LLVM-SAME: <4 x float> {{.*}} [[V1:%.*]], <4 x float> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32> // LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> @@ -292,7 +653,7 @@ float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !cir.double> // CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]] -// LLVM-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) +// LLVM-SAME: <2 x double> {{.*}} [[V1:%.*]], <2 x double> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64> // LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> @@ -321,7 +682,7 @@ uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabd_u8 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) +// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]], <8 x i8> {{.*}} [[V3:%.*]]) // LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]]) // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]] // LLVM-NEXT: ret <8 x i8> [[ADD_I]] @@ -334,7 +695,7 @@ uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabd_u16 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) +// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]], <4 x i16> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -351,7 +712,7 @@ uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabd_u32 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) +// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]], <2 x i32> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -368,7 +729,7 @@ int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabd_s8 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) +// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]], <8 x i8> {{.*}} [[V3:%.*]]) // LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]]) // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]] // LLVM-NEXT: ret <8 x i8> [[ADD_I]] @@ -381,7 +742,7 @@ int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabd_s16 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) +// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]], <4 x i16> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -398,7 +759,7 @@ int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabd_s32 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) +// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]], <2 x i32> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -415,7 +776,7 @@ int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_s8 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) +// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]], <16 x i8> {{.*}} [[V3:%.*]]) // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]]) // LLVM-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]] // LLVM-NEXT: ret <16 x i8> [[ADD_I]] @@ -428,7 +789,7 @@ int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_s16 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) +// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]], <8 x i16> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -445,7 +806,7 @@ int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_s32 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) +// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]], <4 x i32> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -462,7 +823,7 @@ uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_u8 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) +// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]], <16 x i8> {{.*}} [[V3:%.*]]) // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]]) // LLVM-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]] // LLVM-NEXT: ret <16 x i8> [[ADD_I]] @@ -475,7 +836,7 @@ uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_u16 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) +// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]], <8 x i16> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -492,7 +853,7 @@ uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_u32 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) +// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]], <4 x i32> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> diff --git a/clang/test/CodeGen/amdgpu-abi-version.c b/clang/test/CodeGen/amdgpu-abi-version.c index c8bc7d0f0456..9b7011f36f52 100644 --- a/clang/test/CodeGen/amdgpu-abi-version.c +++ b/clang/test/CodeGen/amdgpu-abi-version.c @@ -1,29 +1,48 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 -// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s +// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck -check-prefixes=CHECK,LLVM %s +// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa-llvm -emit-llvm -mcode-object-version=none %s -o - | FileCheck -check-prefixes=CHECK,LLVMENV %s //. -// CHECK: @__oclc_ABI_version = external addrspace(4) global i32 +// LLVM: @__oclc_ABI_version = external addrspace(4) global i32 //. -// CHECK-LABEL: define dso_local i32 @foo( -// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500 -// CHECK-NEXT: [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i32 12 -// CHECK-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4 -// CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]] -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG1:![0-9]+]], !invariant.load [[META2:![0-9]+]], !noundef [[META2]] -// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// LLVM-LABEL: define dso_local i32 @foo( +// LLVM-SAME: ) #[[ATTR0:[0-9]+]] { +// LLVM-NEXT: [[ENTRY:.*:]] +// LLVM-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4 +// LLVM-NEXT: [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500 +// LLVM-NEXT: [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +// LLVM-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i32 12 +// LLVM-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// LLVM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4 +// LLVM-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]] +// LLVM-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG1:![0-9]+]], !invariant.load [[META2:![0-9]+]], !noundef [[META2]] +// LLVM-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32 +// LLVM-NEXT: ret i32 [[CONV]] +// +// LLVMENV-LABEL: define dso_local i32 @foo( +// LLVMENV-SAME: ) #[[ATTR0:[0-9]+]] { +// LLVMENV-NEXT: [[ENTRY:.*:]] +// LLVMENV-NEXT: [[TMP0:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +// LLVMENV-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 12 +// LLVMENV-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG1:![0-9]+]], !invariant.load [[META2:![0-9]+]], !noundef [[META2]] +// LLVMENV-NEXT: [[CONV:%.*]] = zext i16 [[TMP2]] to i32 +// LLVMENV-NEXT: ret i32 [[CONV]] // int foo() { return __builtin_amdgcn_workgroup_size_x(); } //. -// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +// LLVM: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// LLVM: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +//. +// LLVMENV: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// LLVMENV: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +//. +// LLVM: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// LLVM: [[RNG1]] = !{i16 1, i16 1025} +// LLVM: [[META2]] = !{} //. -// CHECK: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -// CHECK: [[RNG1]] = !{i16 1, i16 1025} -// CHECK: [[META2]] = !{} +// LLVMENV: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// LLVMENV: [[RNG1]] = !{i16 1, i16 1025} +// LLVMENV: [[META2]] = !{} //. +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGenHLSL/resources/Texture2D-Gather.hlsl b/clang/test/CodeGenHLSL/resources/Texture2D-Gather.hlsl new file mode 100644 index 000000000000..54d428285d88 --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/Texture2D-Gather.hlsl @@ -0,0 +1,183 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -emit-llvm -disable-llvm-passes -finclude-default-header -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL +// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -finclude-default-header -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPIRV + +// DXIL: %"class.hlsl::Texture2D" = type { target("dx.Texture", <4 x float>, 0, 0, 0, 2) } +// DXIL: %"class.hlsl::SamplerState" = type { target("dx.Sampler", 0) } +// DXIL: %"class.hlsl::SamplerComparisonState" = type { target("dx.Sampler", 0) } + +// SPIRV: %"class.hlsl::Texture2D" = type { target("spirv.Image", float, 1, 2, 0, 0, 1, 0) } +// SPIRV: %"class.hlsl::SamplerState" = type { target("spirv.Sampler") } +// SPIRV: %"class.hlsl::SamplerComparisonState" = type { target("spirv.Sampler") } + +Texture2D<float4> t; +SamplerState s; +SamplerComparisonState sc; + +// CHECK: define hidden {{.*}} <4 x float> @main(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]]) +// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::Gather(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}}) +// CHECK: ret <4 x float> %[[CALL]] + +float4 main(float2 loc : LOC) : SV_Target { + return t.Gather(s, loc); +} + +// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::Gather(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]]) +// CHECK: %[[THIS_ADDR:.*]] = alloca ptr +// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float> +// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]] +// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]] +// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]] +// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0 +// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]] +// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0 +// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]] +// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]] +// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> zeroinitializer) +// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> zeroinitializer) +// CHECK: ret <4 x float> %[[RES]] + +// CHECK: define hidden {{.*}} <4 x float> @test_offset(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]]) +// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::Gather(hlsl::SamplerState, float vector[2], int vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}}, <2 x i32> {{.*}} <i32 1, i32 2>) +// CHECK: ret <4 x float> %[[CALL]] + +float4 test_offset(float2 loc : LOC) : SV_Target { + return t.Gather(s, loc, int2(1, 2)); +} + +// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::Gather(hlsl::SamplerState, float vector[2], int vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^,]+]], <2 x i32> {{.*}} %[[OFFSET:[^)]+]]) +// CHECK: %[[THIS_ADDR:.*]] = alloca ptr +// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float> +// CHECK: %[[OFFSET_ADDR:.*]] = alloca <2 x i32> +// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]] +// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]] +// CHECK: store <2 x i32> %[[OFFSET]], ptr %[[OFFSET_ADDR]] +// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]] +// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0 +// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]] +// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0 +// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]] +// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]] +// CHECK: %[[OFFSET_VAL:.*]] = load <2 x i32>, ptr %[[OFFSET_ADDR]] +// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> %[[OFFSET_VAL]]) +// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> %[[OFFSET_VAL]]) +// CHECK: ret <4 x float> %[[RES]] + +// CHECK: define hidden {{.*}} <4 x float> @test_green(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]]) +// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherGreen(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}}) +// CHECK: ret <4 x float> %[[CALL]] + +float4 test_green(float2 loc : LOC) : SV_Target { + return t.GatherGreen(s, loc); +} + +// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherGreen(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]]) +// CHECK: %[[THIS_ADDR:.*]] = alloca ptr +// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float> +// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]] +// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]] +// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]] +// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0 +// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]] +// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0 +// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]] +// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]] +// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 1, <2 x i32> zeroinitializer) +// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 1, <2 x i32> zeroinitializer) +// CHECK: ret <4 x float> %[[RES]] + +// CHECK: define hidden {{.*}} <4 x float> @test_red(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]]) +// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherRed(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}}) +// CHECK: ret <4 x float> %[[CALL]] + +float4 test_red(float2 loc : LOC) : SV_Target { + return t.GatherRed(s, loc); +} + +// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherRed(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]]) +// CHECK: %[[THIS_ADDR:.*]] = alloca ptr +// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float> +// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]] +// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]] +// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]] +// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0 +// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]] +// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0 +// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]] +// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]] +// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> zeroinitializer) +// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> zeroinitializer) +// CHECK: ret <4 x float> %[[RES]] + +// CHECK: define hidden {{.*}} <4 x float> @test_blue(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]]) +// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherBlue(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}}) +// CHECK: ret <4 x float> %[[CALL]] + +float4 test_blue(float2 loc : LOC) : SV_Target { + return t.GatherBlue(s, loc); +} + +// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherBlue(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]]) +// CHECK: %[[THIS_ADDR:.*]] = alloca ptr +// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float> +// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]] +// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]] +// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]] +// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0 +// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]] +// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0 +// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]] +// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]] +// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 2, <2 x i32> zeroinitializer) +// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 2, <2 x i32> zeroinitializer) +// CHECK: ret <4 x float> %[[RES]] + +// CHECK: define hidden {{.*}} <4 x float> @test_alpha(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]]) +// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherAlpha(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}}) +// CHECK: ret <4 x float> %[[CALL]] + +float4 test_alpha(float2 loc : LOC) : SV_Target { + return t.GatherAlpha(s, loc); +} + +// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherAlpha(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]]) +// CHECK: %[[THIS_ADDR:.*]] = alloca ptr +// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float> +// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]] +// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]] +// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]] +// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0 +// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]] +// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0 +// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]] +// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]] +// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 3, <2 x i32> zeroinitializer) +// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 3, <2 x i32> zeroinitializer) +// CHECK: ret <4 x float> %[[RES]] + +// CHECK: define hidden {{.*}} <4 x float> @test_cmp(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]]) +// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherCmp(hlsl::SamplerComparisonState, float vector[2], float)(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerComparisonState") {{.*}}, <2 x float> {{.*}} %{{.*}}, float {{.*}} 5.000000e-01) +// CHECK: ret <4 x float> %[[CALL]] + +float4 test_cmp(float2 loc : LOC) : SV_Target { + return t.GatherCmp(sc, loc, 0.5); +} + +// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherCmp(hlsl::SamplerComparisonState, float vector[2], float)(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerComparisonState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^,]+]], float {{.*}} %[[CMP:[^)]+]]) +// CHECK: %[[THIS_ADDR:.*]] = alloca ptr +// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float> +// CHECK: %[[CMP_ADDR:.*]] = alloca float +// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]] +// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]] +// CHECK: store float %[[CMP]], ptr %[[CMP_ADDR]] +// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]] +// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0 +// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]] +// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerComparisonState", ptr %[[SAMPLER]], i32 0, i32 0 +// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]] +// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]] +// CHECK: %[[CMP_VAL:.*]] = load float, ptr %[[CMP_ADDR]] +// CHECK: %[[CONV:.*]] = fpext {{.*}} float %[[CMP_VAL]] to double +// CHECK: %[[TRUNC:.*]] = fptrunc {{.*}} double %[[CONV]] to float +// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.cmp.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], float %[[TRUNC]], i32 0, <2 x i32> zeroinitializer) +// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.cmp.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], float %[[TRUNC]], <2 x i32> zeroinitializer) +// CHECK: ret <4 x float> %[[RES]] diff --git a/clang/test/CodeGenSYCL/function-attrs.cpp b/clang/test/CodeGenSYCL/function-attrs.cpp index 4c55cf34aa0a..60d3cf10055e 100644 --- a/clang/test/CodeGenSYCL/function-attrs.cpp +++ b/clang/test/CodeGenSYCL/function-attrs.cpp @@ -26,6 +26,9 @@ int foo() { return 1; } +template <typename Name, typename... Ts> +void sycl_kernel_launch(Ts...) {} + template <typename Name, typename Func> [[clang::sycl_kernel_entry_point(Name)]] void kernel_single_task(const Func &kernelFunc) { kernelFunc(); diff --git a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp index 67b53f3ae81c..47c2c45ae774 100644 --- a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp +++ b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp @@ -2,31 +2,36 @@ // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s // RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-pc-windows-msvc -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s // RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-uefi -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s // RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s -// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s - -// Test the generation of SYCL kernel caller functions. These functions are -// generated from functions declared with the sycl_kernel_entry_point attribute -// and emited during device compilation. They are not emitted during device -// compilation. +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s +// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s + +// Test code generation for functions declared with the sycl_kernel_entry_point +// attribute. During host compilation, the bodies of such functions are replaced +// with calls to a function template or variable template (with suitable call +// operator) named sycl_kernel_launch. During device compilation, the bodies of +// these functions are used to generate offload kernel entry points (SYCL kernel +// caller functions). + +template <typename KernelName, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} struct single_purpose_kernel_name; struct single_purpose_kernel { @@ -44,57 +49,169 @@ void kernel_single_task(KernelType kernelFunc) { kernelFunc(42); } +// Exercise code gen with kernel name types named with esoteric characters. +struct \u03b4\u03c4\u03c7; // Delta Tau Chi (δτχ) + +class handler { + template <typename KernelName, typename... Ts> + void sycl_kernel_launch(const char *, Ts...) {} +public: + template <typename KernelName, typename KernelType> + [[clang::sycl_kernel_entry_point(KernelName)]] + void kernel_entry_point(KernelType k, int a, int b) { + k(a, b); + } +}; + +struct copyable { + int i; + ~copyable(); +}; + int main() { single_purpose_kernel obj; single_purpose_kernel_task(obj); int capture; auto lambda = [=](auto) { (void) capture; }; kernel_single_task<decltype(lambda)>(lambda); + kernel_single_task<\u03b4\u03c4\u03c7>([](int){}); + handler h; + copyable c{42}; + h.kernel_entry_point<struct KN>([=] (int a, int b) { return c.i + a + b; }, 1, 2); } // Verify that SYCL kernel caller functions are not emitted during host // compilation. // -// CHECK-HOST-NOT: _ZTS26single_purpose_kernel_name -// CHECK-HOST-NOT: _ZTSZ4mainE18lambda_kernel_name +// CHECK-HOST-NOT: define {{.*}} @_ZTS26single_purpose_kernel_name +// CHECK-HOST-NOT: define {{.*}} @_ZTSZ4mainEUlT_E_ +// CHECK-HOST-NOT: define {{.*}} @"_ZTS6\CE\B4\CF\84\CF\87" +// CHECK-HOST-NOT: define {{.*}} @_ZTSZ4mainE2KN // Verify that sycl_kernel_entry_point attributed functions are not emitted // during device compilation. // // CHECK-DEVICE-NOT: single_purpose_kernel_task // CHECK-DEVICE-NOT: kernel_single_task +// CHECK-DEVICE-NOT: kernel_entry_point -// Verify that no code is generated for the bodies of sycl_kernel_entry_point -// attributed functions during host compilation. ODR-use of these functions may -// require them to be emitted, but they have no effect if called. +// Verify that kernel launch code is generated for sycl_kernel_entry_point +// attributed functions during host compilation. +// +// CHECK-HOST-LINUX: @.str = private unnamed_addr constant [33 x i8] c"_ZTS26single_purpose_kernel_name\00", align 1 +// CHECK-HOST-LINUX: @.str.1 = private unnamed_addr constant [18 x i8] c"_ZTSZ4mainEUlT_E_\00", align 1 +// CHECK-HOST-LINUX: @.str.2 = private unnamed_addr constant [12 x i8] c"_ZTS6\CE\B4\CF\84\CF\87\00", align 1 // // CHECK-HOST-LINUX: define dso_local void @_Z26single_purpose_kernel_task21single_purpose_kernel() #{{[0-9]+}} { // CHECK-HOST-LINUX-NEXT: entry: // CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1 +// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %struct.single_purpose_kernel, align 1 +// CHECK-HOST-LINUX-NEXT: call void @_Z18sycl_kernel_launchI26single_purpose_kernel_nameJ21single_purpose_kernelEEvPKcDpT0_(ptr noundef @.str) // CHECK-HOST-LINUX-NEXT: ret void // CHECK-HOST-LINUX-NEXT: } // // CHECK-HOST-LINUX: define internal void @_Z18kernel_single_taskIZ4mainEUlT_E_S1_EvT0_(i32 %kernelFunc.coerce) #{{[0-9]+}} { // CHECK-HOST-LINUX-NEXT: entry: // CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %class.anon, align 4 +// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon, align 4 // CHECK-HOST-LINUX-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %kernelFunc, i32 0, i32 0 // CHECK-HOST-LINUX-NEXT: store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4 +// CHECK-HOST-LINUX-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %kernelFunc, i64 4, i1 false) +// CHECK-HOST-LINUX-NEXT: %coerce.dive1 = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-LINUX-NEXT: %0 = load i32, ptr %coerce.dive1, align 4 +// CHECK-HOST-LINUX-NEXT: call void @_Z18sycl_kernel_launchIZ4mainEUlT_E_JS1_EEvPKcDpT0_(ptr noundef @.str.1, i32 %0) // CHECK-HOST-LINUX-NEXT: ret void // CHECK-HOST-LINUX-NEXT: } // +// CHECK-HOST-LINUX: define internal void @"_Z18kernel_single_taskI6\CE\B4\CF\84\CF\87Z4mainEUliE_EvT0_"() #{{[0-9]+}} { +// CHECK-HOST-LINUX-NEXT: entry: +// CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %class.anon.0, align 1 +// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon.0, align 1 +// CHECK-HOST-LINUX-NEXT: call void @"_Z18sycl_kernel_launchI6\CE\B4\CF\84\CF\87JZ4mainEUliE_EEvPKcDpT0_"(ptr noundef @.str.2) +// CHECK-HOST-LINUX-NEXT: ret void +// CHECK-HOST-LINUX-NEXT: } + + +// CHECK-HOST-LINUX: define internal void @_ZN7handler18kernel_entry_pointIZ4mainE2KNZ4mainEUliiE_EEvT0_ii(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %k, i32 noundef %a, i32 noundef %b) #{{[0-9]+}} align 2 { +// CHECK-HOST-LINUX-NEXT: entry: +// CHECK-HOST-LINUX-NEXT: %this.addr = alloca ptr, align 8 +// CHECK-HOST-LINUX-NEXT: %k.indirect_addr = alloca ptr, align 8 +// CHECK-HOST-LINUX-NEXT: %a.addr = alloca i32, align 4 +// CHECK-HOST-LINUX-NEXT: %b.addr = alloca i32, align 4 +// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon.1, align 4 +// CHECK-HOST-LINUX-NEXT: store ptr %this, ptr %this.addr, align 8 +// CHECK-HOST-LINUX-NEXT: store ptr %k, ptr %k.indirect_addr, align 8 +// CHECK-HOST-LINUX-NEXT: store i32 %a, ptr %a.addr, align 4 +// CHECK-HOST-LINUX-NEXT: store i32 %b, ptr %b.addr, align 4 +// CHECK-HOST-LINUX-NEXT: %this1 = load ptr, ptr %this.addr, align 8 +// CHECK-HOST-LINUX-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %k, i64 4, i1 false) +// CHECK-HOST-LINUX-NEXT: %0 = load i32, ptr %a.addr, align 4 +// CHECK-HOST-LINUX-NEXT: %1 = load i32, ptr %b.addr, align 4 +// CHECK-HOST-LINUX-NEXT: call void @_ZN7handler18sycl_kernel_launchIZ4mainE2KNJZ4mainEUliiE_iiEEEvPKcDpT0_(ptr noundef nonnull align 1 dereferenceable(1) %this1, ptr noundef @.str.3, ptr noundef %agg.tmp, i32 noundef %0, i32 noundef %1) +// CHECK-HOST-LINUX-NEXT: call void @_ZZ4mainENUliiE_D1Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) %agg.tmp) #{{[0-9]+}} +// CHECK-HOST-LINUX-NEXT: ret void +// CHECK-HOST-LINUX-NEXT: } + // CHECK-HOST-WINDOWS: define dso_local void @"?single_purpose_kernel_task@@YAXUsingle_purpose_kernel@@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} { // CHECK-HOST-WINDOWS-NEXT: entry: // CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1 +// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %struct.single_purpose_kernel, align 1 // CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %struct.single_purpose_kernel, ptr %kernelFunc, i32 0, i32 0 // CHECK-HOST-WINDOWS-NEXT: store i8 %kernelFunc.coerce, ptr %coerce.dive, align 1 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %struct.single_purpose_kernel, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %0 = load i8, ptr %coerce.dive1, align 1 +// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@Usingle_purpose_kernel_name@@Usingle_purpose_kernel@@@@YAXPEBDUsingle_purpose_kernel@@@Z"(ptr noundef @"??_C@_0CB@KFIJOMLB@_ZTS26single_purpose_kernel_name@", i8 %0) // CHECK-HOST-WINDOWS-NEXT: ret void // CHECK-HOST-WINDOWS-NEXT: } // // CHECK-HOST-WINDOWS: define internal void @"??$kernel_single_task@V<lambda_1>@?0??main@@9@V1?0??2@9@@@YAXV<lambda_1>@?0??main@@9@@Z"(i32 %kernelFunc.coerce) #{{[0-9]+}} { // CHECK-HOST-WINDOWS-NEXT: entry: // CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %class.anon, align 4 +// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon, align 4 // CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %kernelFunc, i32 0, i32 0 // CHECK-HOST-WINDOWS-NEXT: store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4 +// CHECK-HOST-WINDOWS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %kernelFunc, i64 4, i1 false) +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %0 = load i32, ptr %coerce.dive1, align 4 +// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@V<lambda_1>@?0??main@@9@V1?0??2@9@@@YAXPEBDV<lambda_1>@?0??main@@9@@Z"(ptr noundef @"??_C@_0BC@NHCDOLAA@_ZTSZ4mainEUlT_E_?$AA@", i32 %0) +// +// CHECK-HOST-WINDOWS-NEXT: ret void +// CHECK-HOST-WINDOWS-NEXT: } +// +// CHECK-HOST-WINDOWS: define internal void @"??$kernel_single_task@U\CE\B4\CF\84\CF\87@@V<lambda_2>@?0??main@@9@@@YAXV<lambda_2>@?0??main@@9@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} { +// CHECK-HOST-WINDOWS-NEXT: entry: +// CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %class.anon.0, align 1 +// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon.0, align 1 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.0, ptr %kernelFunc, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: store i8 %kernelFunc.coerce, ptr %coerce.dive, align 1 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %class.anon.0, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %0 = load i8, ptr %coerce.dive1, align 1 +// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@U\CE\B4\CF\84\CF\87@@V<lambda_2>@?0??main@@9@@@YAXPEBDV<lambda_2>@?0??main@@9@@Z"(ptr noundef @"??_C@_0M@BCGAEMBE@_ZTS6?N?$LE?O?$IE?O?$IH?$AA@", i8 %0) +// CHECK-HOST-WINDOWS-NEXT: ret void +// CHECK-HOST-WINDOWS-NEXT: } + +// CHECK-HOST-WINDOWS: define internal void @"??$kernel_entry_point@UKN@?1??main@@9@V<lambda_3>@?0??2@9@@handler@@QEAAXV<lambda_3>@?0??main@@9@HH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this, i32 %k.coerce, i32 noundef %a, i32 noundef %b) #{{[0-9]+}} align 2 +// CHECK-HOST-WINDOWS-NEXT: entry: +// CHECK-HOST-WINDOWS-NEXT: %k = alloca %class.anon.1, align 4 +// CHECK-HOST-WINDOWS-NEXT: %b.addr = alloca i32, align 4 +// CHECK-HOST-WINDOWS-NEXT: %a.addr = alloca i32, align 4 +// CHECK-HOST-WINDOWS-NEXT: %this.addr = alloca ptr, align 8 +// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon.1, align 4 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.1, ptr %k, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %struct.copyable, ptr %coerce.dive, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: store i32 %k.coerce, ptr %coerce.dive1, align 4 +// CHECK-HOST-WINDOWS-NEXT: store i32 %b, ptr %b.addr, align 4 +// CHECK-HOST-WINDOWS-NEXT: store i32 %a, ptr %a.addr, align 4 +// CHECK-HOST-WINDOWS-NEXT: store ptr %this, ptr %this.addr, align 8 +// CHECK-HOST-WINDOWS-NEXT: %this2 = load ptr, ptr %this.addr, align 8 +// CHECK-HOST-WINDOWS-NEXT: %0 = load i32, ptr %b.addr, align 4 +// CHECK-HOST-WINDOWS-NEXT: %1 = load i32, ptr %a.addr, align 4 +// CHECK-HOST-WINDOWS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %k, i64 4, i1 false) +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive3 = getelementptr inbounds nuw %class.anon.1, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive4 = getelementptr inbounds nuw %struct.copyable, ptr %coerce.dive3, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %2 = load i32, ptr %coerce.dive4, align 4 +// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@UKN@?1??main@@9@V<lambda_3>@?0??2@9@HH@handler@@AEAAXPEBDV<lambda_3>@?0??main@@9@HH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this2, ptr noundef @"??_C@_0P@DLGHPODL@_ZTSZ4mainE2KN?$AA@", i32 %2, i32 noundef %1, i32 noundef %0) +// CHECK-HOST-WINDOWS-NEXT: call void @"??1<lambda_3>@?0??main@@9@QEAA@XZ"(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) %k) #{{[0-9]+}} // CHECK-HOST-WINDOWS-NEXT: ret void // CHECK-HOST-WINDOWS-NEXT: } @@ -179,6 +296,122 @@ int main() { // CHECK-SPIR-NEXT: } // CHECK-SPIR: define internal spir_func void @_ZZ4mainENKUlT_E_clIiEEDaS_ +// IR for the SYCL kernel caller function generated for kernel_single_task with +// the Delta Tau Chi type as the SYCL kernel name type. +// +// CHECK-AMDGCN: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-AMDGCN-NEXT: define dso_local amdgpu_kernel void @"_ZTS6\CE\B4\CF\84\CF\87" +// CHECK-AMDGCN-SAME: (ptr addrspace(4) noundef byref(%class.anon.0) align 1 %0) #[[AMDGCN_ATTR0]] { +// CHECK-AMDGCN-NEXT: entry: +// CHECK-AMDGCN-NEXT: %coerce = alloca %class.anon.0, align 1, addrspace(5) +// CHECK-AMDGCN-NEXT: %kernelFunc = addrspacecast ptr addrspace(5) %coerce to ptr +// CHECK-AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 1 %kernelFunc, ptr addrspace(4) align 1 %0, i64 1, i1 false) +// CHECK-AMDGCN-NEXT: call void @_ZZ4mainENKUliE_clEi +// CHECK-AMDGCN-SAME: (ptr noundef nonnull align 1 dereferenceable(1) %kernelFunc, i32 noundef 42) #[[AMDGCN_ATTR1:[0-9]+]] +// CHECK-AMDGCN-NEXT: ret void +// CHECK-AMDGCN-NEXT: } +// CHECK-AMDGCN: define internal void @_ZZ4mainENKUliE_clEi +// +// CHECK-NVPTX: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-NVPTX-NEXT: define dso_local ptx_kernel void @"_ZTS6\CE\B4\CF\84\CF\87" +// CHECK-NVPTX-SAME: (ptr noundef byval(%class.anon.0) align 1 %kernelFunc) #[[NVPTX_ATTR0:[0-9]+]] { +// CHECK-NVPTX-NEXT: entry: +// CHECK-NVPTX-NEXT: call void @_ZZ4mainENKUliE_clEi +// CHECK-NVPTX-SAME: (ptr noundef nonnull align 1 dereferenceable(1) %kernelFunc, i32 noundef 42) #[[NVPTX_ATTR1:[0-9]+]] +// CHECK-NVPTX-NEXT: ret void +// CHECK-NVPTX-NEXT: } +// CHECK-NVPTX: define internal void @_ZZ4mainENKUliE_clEi +// +// CHECK-SPIR: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-SPIR-NEXT: define {{[a-z_ ]*}}spir_kernel void @"_ZTS6\CE\B4\CF\84\CF\87" +// CHECK-SPIR-SAME: (ptr noundef byval(%class.anon.0) align 1 %kernelFunc) #[[SPIR_ATTR0:[0-9]+]] { +// CHECK-SPIR-NEXT: entry: +// CHECK-SPIR-NEXT: %kernelFunc.ascast = addrspacecast ptr %kernelFunc to ptr addrspace(4) +// CHECK-SPIR-NEXT: call spir_func void @_ZZ4mainENKUliE_clEi +// CHECK-SPIR-SAME: (ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %kernelFunc.ascast, i32 noundef 42) #[[SPIR_ATTR1:[0-9]+]] +// CHECK-SPIR-NEXT: ret void +// CHECK-SPIR-NEXT: } +// CHECK-SPIR: define internal spir_func void @_ZZ4mainENKUliE_clEi + +// IR for the SYCL kernel caller function generated for +// handler::kernel_entry_point with main::KN as the SYCL kernel name type. +// +// CHECK-AMDGCN: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-AMDGCN-NEXT: define dso_local amdgpu_kernel void @_ZTSZ4mainE2KN +// CHECK-AMDGCN-SAME: (i32 %k.coerce, i32 noundef %a, i32 noundef %b) #[[AMDGCN_ATTR0]] { +// CHECK-AMDGCN-NEXT: entry: +// CHECK-AMDGCN-NEXT: %k = alloca %class.anon.1, align 4, addrspace(5) +// CHECK-AMDGCN-NEXT: %a.addr = alloca i32, align 4, addrspace(5) +// CHECK-AMDGCN-NEXT: %b.addr = alloca i32, align 4, addrspace(5) +// CHECK-AMDGCN-NEXT: %k2 = addrspacecast ptr addrspace(5) %k to ptr +// CHECK-AMDGCN-NEXT: %a.addr.ascast = addrspacecast ptr addrspace(5) %a.addr to ptr +// CHECK-AMDGCN-NEXT: %b.addr.ascast = addrspacecast ptr addrspace(5) %b.addr to ptr +// CHECK-AMDGCN-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.1, ptr %k2, i32 0, i32 0 +// CHECK-AMDGCN-NEXT: %coerce.dive1 = getelementptr inbounds nuw %struct.copyable, ptr %coerce.dive, i32 0, i32 0 +// CHECK-AMDGCN-NEXT: store i32 %k.coerce, ptr %coerce.dive1, align 4 +// CHECK-AMDGCN-NEXT: store i32 %a, ptr %a.addr.ascast, align 4 +// CHECK-AMDGCN-NEXT: store i32 %b, ptr %b.addr.ascast, align 4 +// CHECK-AMDGCN-NEXT: %0 = load i32, ptr %a.addr.ascast, align 4 +// CHECK-AMDGCN-NEXT: %1 = load i32, ptr %b.addr.ascast, align 4 +// CHECK-AMDGCN-NEXT: %call = call noundef i32 @_ZZ4mainENKUliiE_clEii +// CHECK-AMDGCN-SAME: (ptr noundef nonnull align 4 dereferenceable(4) %k2, i32 noundef %0, i32 noundef %1) #[[AMDGCN_ATTR1:[0-9]+]] +// CHECK-AMDGCN-NEXT: ret void +// CHECK-AMDGCN-NEXT: } +// +// CHECK-NVPTX: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-NVPTX-NEXT: define dso_local ptx_kernel void @_ZTSZ4mainE2KN +// CHECK-NVPTX-SAME: (ptr noundef byval(%class.anon.1) align 4 %k, i32 noundef %a, i32 noundef %b) #[[NVPTX_ATTR0:[0-9]+]] { +// CHECK-NVPTX-NEXT: entry: +// CHECK-NVPTX-NEXT: %a.addr = alloca i32, align 4 +// CHECK-NVPTX-NEXT: %b.addr = alloca i32, align 4 +// CHECK-NVPTX-NEXT: store i32 %a, ptr %a.addr, align 4 +// CHECK-NVPTX-NEXT: store i32 %b, ptr %b.addr, align 4 +// CHECK-NVPTX-NEXT: %0 = load i32, ptr %a.addr, align 4 +// CHECK-NVPTX-NEXT: %1 = load i32, ptr %b.addr, align 4 +// CHECK-NVPTX-NEXT: %call = call noundef i32 @_ZZ4mainENKUliiE_clEii +// CHECK-NVPTX-SAME: (ptr noundef nonnull align 4 dereferenceable(4) %k, i32 noundef %0, i32 noundef %1) #[[NVPTX_ATTR1:[0-9]+]] +// CHECK-NVPTX-NEXT: ret void +// CHECK-NVPTX-NEXT: } +// +// CHECK-SPIRNV: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-SPIRNV-NEXT: define dso_local spir_kernel void @_ZTSZ4mainE2KN +// CHECK-SPIRNV-SAME: (ptr noundef %k, i32 noundef %a, i32 noundef %b) #[[SPIR_ATTR0:[0-9]+]] { +// CHECK-SPIRNV-NEXT: entry: +// CHECK-SPIRNV-NEXT: %k.indirect_addr = alloca ptr addrspace(4), align {{[48]}} +// CHECK-SPIRNV-NEXT: %a.addr = alloca i32, align 4 +// CHECK-SPIRNV-NEXT: %b.addr = alloca i32, align 4 +// CHECK-SPIRNV-NEXT: %k.indirect_addr.ascast = addrspacecast ptr %k.indirect_addr to ptr addrspace(4) +// CHECK-SPIRNV-NEXT: %a.addr.ascast = addrspacecast ptr %a.addr to ptr addrspace(4) +// CHECK-SPIRNV-NEXT: %b.addr.ascast = addrspacecast ptr %b.addr to ptr addrspace(4) +// CHECK-SPIRNV-NEXT: store ptr %k, ptr addrspace(4) %k.indirect_addr.ascast, align {{[48]}} +// CHECK-SPIRNV-NEXT: %k.ascast = addrspacecast ptr %k to ptr addrspace(4) +// CHECK-SPIRNV-NEXT: store i32 %a, ptr addrspace(4) %a.addr.ascast, align 4 +// CHECK-SPIRNV-NEXT: store i32 %b, ptr addrspace(4) %b.addr.ascast, align 4 +// CHECK-SPIRNV-NEXT: %0 = load i32, ptr addrspace(4) %a.addr.ascast, align 4 +// CHECK-SPIRNV-NEXT: %1 = load i32, ptr addrspace(4) %b.addr.ascast, align 4 +// CHECK-SPIRNV-NEXT: %call = call spir_func noundef i32 @_ZZ4mainENKUliiE_clEii +// CHECK-SPIRNV-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %k.ascast, i32 noundef %0, i32 noundef %1) #[[SPIR_ATTR1:[0-9]+]] +// CHECK-SPIRNV-NEXT: ret void +// CHECK-SPIRNV-NEXT: } +// +// CHECK-SPIRV: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-SPIRV-NEXT: define spir_kernel void @_ZTSZ4mainE2KN +// CHECK-SPIRV-SAME: (ptr noundef byval(%class.anon.1) align 4 %k, i32 noundef %a, i32 noundef %b) #[[SPIR_ATTR0:[0-9]+]] { +// CHECK-SPIRV-NEXT: entry: +// CHECK-SPIRV-NEXT: %a.addr = alloca i32, align 4 +// CHECK-SPIRV-NEXT: %b.addr = alloca i32, align 4 +// CHECK-SPIRV-NEXT: %a.addr.ascast = addrspacecast ptr %a.addr to ptr addrspace(4) +// CHECK-SPIRV-NEXT: %b.addr.ascast = addrspacecast ptr %b.addr to ptr addrspace(4) +// CHECK-SPIRV-NEXT: %k.ascast = addrspacecast ptr %k to ptr addrspace(4) +// CHECK-SPIRV-NEXT: store i32 %a, ptr addrspace(4) %a.addr.ascast, align 4 +// CHECK-SPIRV-NEXT: store i32 %b, ptr addrspace(4) %b.addr.ascast, align 4 +// CHECK-SPIRV-NEXT: %0 = load i32, ptr addrspace(4) %a.addr.ascast, align 4 +// CHECK-SPIRV-NEXT: %1 = load i32, ptr addrspace(4) %b.addr.ascast, align 4 +// CHECK-SPIRV-NEXT: %call = call spir_func noundef i32 @_ZZ4mainENKUliiE_clEii +// CHECK-SPIRV-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %k.ascast, i32 noundef %0, i32 noundef %1) #[[SPIR_ATTR1:[0-9]+]] +// CHECK-SPIRV-NEXT: ret void +// CHECK-SPIRV-NEXT: } + // CHECK-AMDGCN: #[[AMDGCN_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // CHECK-AMDGCN: #[[AMDGCN_ATTR1]] = { convergent nounwind } // diff --git a/clang/test/CodeGenSYCL/sycl-kernel-entry-point-exceptions.cpp b/clang/test/CodeGenSYCL/sycl-kernel-entry-point-exceptions.cpp new file mode 100644 index 000000000000..8fe7a148a2f6 --- /dev/null +++ b/clang/test/CodeGenSYCL/sycl-kernel-entry-point-exceptions.cpp @@ -0,0 +1,95 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fcxx-exceptions -fexceptions -fsycl-is-host -emit-llvm -o - %s | FileCheck %s + +// Validate generation of exception handling code for functions declared +// with the sycl_kernel_entry_point attribute that implicitly call a +// sycl_kernel_launch function that may throw an exception. Exception +// handling is not relevant for the generated offload kernel entry point +// function, so device compilation is intentionally not exercised. + +// A unique kernel name type is required for each declared kernel entry point. +template<int> struct KN; + +// A generic kernel object type. +template<int, int = 0> +struct KT { + void operator()() const; +}; + + +// Validate that exception handling instructions are omitted when a +// potentially throwing sycl_kernel_entry_point attributed function +// calls a potentially throwing sycl_kernel_launch function (a thrown +// exception will propagate with no explicit handling required). +namespace ns1 { + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + [[clang::sycl_kernel_entry_point(KN<1>)]] + void skep(KT<1> k) { + k(); + } +} +// CHECK: ; Function Attrs: mustprogress noinline optnone +// CHECK: define dso_local void @_ZN3ns14skepE2KTILi1ELi0EE() #{{[0-9]+}} { +// CHECK: call void @_ZN3ns118sycl_kernel_launchI2KNILi1EEJ2KTILi1ELi0EEEEEvPKcDpT0_(ptr noundef @.str) +// CHECK: ret void +// CHECK: } + + +// Validate that exception handling instructions are emitted when a +// non-throwing sycl_kernel_entry_point attributed function calls +// a potentially throwing sycl_kernel_launch function. +namespace ns2 { + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + [[clang::sycl_kernel_entry_point(KN<2>)]] + void skep(KT<2> k) noexcept { + k(); + } +} +// CHECK: ; Function Attrs: mustprogress noinline nounwind optnone +// CHECK: define dso_local void @_ZN3ns24skepE2KTILi2ELi0EE() #{{[0-9]+}} personality ptr @__gxx_personality_v0 { +// CHECK: invoke void @_ZN3ns218sycl_kernel_launchI2KNILi2EEJ2KTILi2ELi0EEEEEvPKcDpT0_(ptr noundef @.str.1) +// CHECK: to label %invoke.cont unwind label %terminate.lpad +// CHECK: invoke.cont: +// CHECK: ret void +// CHECK: terminate.lpad: +// CHECK: call void @__clang_call_terminate(ptr %1) #{{[0-9]+}} +// CHECK: unreachable +// CHECK: } + + +// Validate that exception handling instructions are omitted when a +// potentially throwing sycl_kernel_entry_point attributed function +// calls a non-throwing sycl_kernel_launch function (a thrown +// exception will terminate within sycl_kernel_launch). +namespace ns3 { + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...) noexcept; + [[clang::sycl_kernel_entry_point(KN<3>)]] + void skep(KT<3> k) { + k(); + } +} +// CHECK: ; Function Attrs: mustprogress noinline nounwind optnone +// CHECK: define dso_local void @_ZN3ns34skepE2KTILi3ELi0EE() #{{[0-9]+}} { +// CHECK: call void @_ZN3ns318sycl_kernel_launchI2KNILi3EEJ2KTILi3ELi0EEEEEvPKcDpT0_(ptr noundef @.str.2) +// CHECK: ret void +// CHECK: } + + +// Validate that exception handling instructions are omitted when a +// non-throwing sycl_kernel_entry_point attributed function calls a +// non-throwing sycl_kernel_launch function. +namespace ns4 { + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...) noexcept; + [[clang::sycl_kernel_entry_point(KN<4>)]] + void skep(KT<4> k) noexcept { + k(); + } +} +// CHECK: ; Function Attrs: mustprogress noinline nounwind optnone +// CHECK: define dso_local void @_ZN3ns44skepE2KTILi4ELi0EE() #{{[0-9]+}} { +// CHECK: call void @_ZN3ns418sycl_kernel_launchI2KNILi4EEJ2KTILi4ELi0EEEEEvPKcDpT0_(ptr noundef @.str.3) +// CHECK: ret void +// CHECK: } diff --git a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp index 14366a092a1f..c298593e2f1a 100644 --- a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp +++ b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp @@ -1,6 +1,8 @@ // RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s '-D$ADDRSPACE=addrspace(1) ' // RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-host -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s '-D$ADDRSPACE=' +template<typename KN, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} template<typename KN, typename Func> [[clang::sycl_kernel_entry_point(KN)]] void kernel(Func F){ diff --git a/clang/test/Driver/empty_arg.c b/clang/test/Driver/empty_arg.c new file mode 100644 index 000000000000..94ed8f13cbec --- /dev/null +++ b/clang/test/Driver/empty_arg.c @@ -0,0 +1,2 @@ +// RUN: not %clang -- "" 2>&1 | FileCheck %s +// CHECK: error: no such file or directory: '' diff --git a/clang/test/Modules/auto-module-import.m b/clang/test/Modules/auto-module-import.m index cfbb28fa20e6..578d41bfba50 100644 --- a/clang/test/Modules/auto-module-import.m +++ b/clang/test/Modules/auto-module-import.m @@ -96,6 +96,6 @@ namespace NS { // expected-note {{begins here}} } extern "C" { // expected-note {{begins here}} #include <NoUmbrella/A.h> // expected-remark {{treating #include as an import}} \ - expected-error {{import of C++ module 'NoUmbrella.A' appears within extern "C"}} + expected-warning {{import of C++ module 'NoUmbrella.A' appears within extern "C"}} } #endif diff --git a/clang/test/Modules/extern_c.cpp b/clang/test/Modules/extern_c.cpp index cc831bd2a089..aa39880c037a 100644 --- a/clang/test/Modules/extern_c.cpp +++ b/clang/test/Modules/extern_c.cpp @@ -42,7 +42,7 @@ extern "C++" { // expected-error-re@-3 {{import of module '{{c_library.inner|cxx_library}}' appears within namespace 'M'}} // expected-note@-21 {{namespace 'M' begins here}} #elif defined(EXTERN_C) && !defined(EXTERN_CXX) && defined(CXX_HEADER) && !defined(NO_EXTERN_C_ERROR) -// expected-error@-6 {{import of C++ module 'cxx_library' appears within extern "C" language linkage specification}} +// expected-warning@-6 {{import of C++ module 'cxx_library' appears within extern "C" language linkage specification}} // expected-note@-20 {{extern "C" language linkage specification begins here}} #endif diff --git a/clang/test/SemaHLSL/Texture2D-Gather.hlsl b/clang/test/SemaHLSL/Texture2D-Gather.hlsl new file mode 100644 index 000000000000..61b3c28a49e7 --- /dev/null +++ b/clang/test/SemaHLSL/Texture2D-Gather.hlsl @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -fsyntax-only -verify -finclude-default-header %s + +Texture2D<float4> Tex; +SamplerState Samp; +SamplerComparisonState SampCmp; + +void main() { + float2 uv = float2(0.5, 0.5); + int2 offset = int2(1, 1); + float compare = 0.5; + + // Gather + // Expected: Gather(SamplerState, float2, [int2]) + Tex.Gather(Samp, uv); + Tex.Gather(Samp, uv, offset); + + // Invalid Overloads + Tex.Gather(Samp); // expected-error {{no matching member function for call to 'Gather'}} + Tex.Gather(Samp, uv, offset, 1); // expected-error {{no matching member function for call to 'Gather'}} + + // Gather variants + Tex.GatherRed(Samp, uv); + Tex.GatherGreen(Samp, uv, offset); + Tex.GatherBlue(Samp, uv); + Tex.GatherAlpha(Samp, uv, offset); + + // GatherCmp + // Expected: GatherCmp(SamplerComparisonState, float2, float, [int2]) + Tex.GatherCmp(SampCmp, uv, compare); + Tex.GatherCmp(SampCmp, uv, compare, offset); + + // Invalid Overloads + Tex.GatherCmp(SampCmp, uv); // expected-error {{no matching member function for call to 'GatherCmp'}} + Tex.GatherCmp(SampCmp, uv, compare, offset, 1); // expected-error {{no matching member function for call to 'GatherCmp'}} + + // GatherCmp variants + Tex.GatherCmpRed(SampCmp, uv, compare); + Tex.GatherCmpGreen(SampCmp, uv, compare); + Tex.GatherCmpBlue(SampCmp, uv, compare, offset); + Tex.GatherCmpAlpha(SampCmp, uv, compare); + + // Type checks + // Offset must be int2 (SamplerState fails) + Tex.Gather(Samp, uv, Samp); // expected-error {{no matching member function for call to 'Gather'}} + + // Compare value must be scalar float + Tex.GatherCmp(SampCmp, uv, Samp); // expected-error {{no matching member function for call to 'GatherCmp'}} +} + +// expected-note@* 0+{{candidate function not viable}} diff --git a/clang/test/SemaHLSL/Texture2D-GatherCmp-Vulkan.hlsl b/clang/test/SemaHLSL/Texture2D-GatherCmp-Vulkan.hlsl new file mode 100644 index 000000000000..117686b4ef9c --- /dev/null +++ b/clang/test/SemaHLSL/Texture2D-GatherCmp-Vulkan.hlsl @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -fsyntax-only -verify -finclude-default-header %s + +Texture2D<float4> Tex; +SamplerComparisonState SampCmp; + +void main() { + float2 uv = float2(0.5, 0.5); + float compare = 0.5; + + Tex.GatherCmp(SampCmp, uv, compare); + Tex.GatherCmpRed(SampCmp, uv, compare); + + // expected-error@* {{gatherCmpGreen operations on the Vulkan target are not supported; only GatherCmp and GatherCmpRed are allowed}} + Tex.GatherCmpGreen(SampCmp, uv, compare); + + // expected-error@* {{gatherCmpBlue operations on the Vulkan target are not supported; only GatherCmp and GatherCmpRed are allowed}} + Tex.GatherCmpBlue(SampCmp, uv, compare); + + // expected-error@* {{gatherCmpAlpha operations on the Vulkan target are not supported; only GatherCmp and GatherCmpRed are allowed}} + Tex.GatherCmpAlpha(SampCmp, uv, compare); +} + +// expected-note@* 0+{{in instantiation of member function}} diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp index 9aba284145fc..45da8c71348b 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp @@ -1,6 +1,9 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s -// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s -// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-device -fcxx-exceptions -verify %s // These tests validate appertainment for the sycl_kernel_entry_point attribute. @@ -37,6 +40,9 @@ struct coroutine_traits { // A unique kernel name type is required for each declared kernel entry point. template<int, int = 0> struct KN; +// A generic kernel launch function. +template<typename KNT, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} //////////////////////////////////////////////////////////////////////////////// // Valid declarations. @@ -131,6 +137,16 @@ struct S15 { static T ok15(); }; +struct S16 { + // Non-static member function declaration. + [[clang::sycl_kernel_entry_point(KN<16>)]] + void ok16(); +}; + +#if __cplusplus >= 202302L +auto ok17 = [] [[clang::sycl_kernel_entry_point(KN<17>)]] -> void {}; +#endif + //////////////////////////////////////////////////////////////////////////////// // Invalid declarations. @@ -163,13 +179,6 @@ struct B2 { static int bad2; }; -struct B3 { - // Non-static member function declaration. - // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} - [[clang::sycl_kernel_entry_point(BADKN<3>)]] - void bad3(); -}; - // expected-error@+1 {{'clang::sycl_kernel_entry_point' attribute only applies to functions}} namespace [[clang::sycl_kernel_entry_point(BADKN<4>)]] bad4 {} @@ -244,13 +253,13 @@ void bad19() { #endif struct B20 { - // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} + // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a constructor}} [[clang::sycl_kernel_entry_point(BADKN<20>)]] B20(); }; struct B21 { - // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} + // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a destructor}} [[clang::sycl_kernel_entry_point(BADKN<21>)]] ~B21(); }; @@ -338,11 +347,6 @@ struct B34 { }; #if __cplusplus >= 202302L -// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} -auto bad35 = [] [[clang::sycl_kernel_entry_point(BADKN<35>)]] -> void {}; -#endif - -#if __cplusplus >= 202302L // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute only applies to functions with a non-deduced 'void' return type}} auto bad36 = [] [[clang::sycl_kernel_entry_point(BADKN<36>)]] static {}; #endif @@ -373,3 +377,29 @@ struct B42 { // expected-warning@+1 {{declaration does not declare anything}} [[clang::sycl_kernel_entry_point(BADKN<42>)]]; }; + +#if __cplusplus >= 202302L +struct B43 { + // expected-error@+2 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function with an explicit object parameter}} + template<typename KNT> + [[clang::sycl_kernel_entry_point(KNT)]] + void bad43(this B43) {} +}; +#endif + +#if __cplusplus >= 202302L +struct B44 { + // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function with an explicit object parameter}} + [[clang::sycl_kernel_entry_point(BADKN<44>)]] + void bad44(this B44); +}; +#endif + +#if __cplusplus >= 202302L +template<typename KNT> +struct B45 { + // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function with an explicit object parameter}} + [[clang::sycl_kernel_entry_point(KNT)]] + void bad45(this B45); +}; +#endif diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-device-odr-use.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-device-odr-use.cpp new file mode 100644 index 000000000000..1aa48c739c04 --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-device-odr-use.cpp @@ -0,0 +1,142 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsycl-is-host -verify=host %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsycl-is-device -verify=device %s + +// These tests validate that a diagnostic is issued if a function declared with +// the sycl_kernel_entry_point attribute is ODR-used from code that is emitted +// during device compilation. Such uses are ill-formed because such functions +// are used to define an offload kernel entry point function; they aren't +// available for ordinary function use. + +// host-no-diagnostics + +// Emulate inclusion of <typeinfo>. +namespace std { +struct type_info { + virtual ~type_info(); +}; +} // namespace std + +// A generic kernel launch function. +template<typename KernelName, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} + +// A kernel name type template. +template<int> struct KN; + +// SYCL kernel entry point functions. These are used to both trigger the +// emission of a function during device compilation (but not during host +// compilation) and to trigger a diagnostic if ODR-used from a function +// emitted during device compilation. +// device-note@+1 4 {{attribute is here}} +[[clang::sycl_kernel_entry_point(KN<1>)]] +void skep(); +struct SKL { + // device-note@+1 6 {{attribute is here}} + [[clang::sycl_kernel_entry_point(KN<2>)]] + void mskep(); + // device-note@+1 6 {{attribute is here}} + [[clang::sycl_kernel_entry_point(KN<3>)]] + static void smskep(); + // device-note@+1 2 {{attribute is here}} + [[clang::sycl_kernel_entry_point(KN<4>)]] + void operator()() const; +}; + +// A function that is emitted on the device due to usage reachable from a +// SYCL kernel entry point function. ODR-uses of sycl_kernel_entry_point +// attributed functions within this function require a diagnostic during +// device compilation. +void df() { + // Not ODR-uses; ok. + decltype(&skep) p1 = nullptr; + decltype(&SKL::mskep) p2 = nullptr; + decltype(&SKL::smskep) p3 = nullptr; + + // Not ODR-uses; ok. + (void)noexcept(skep()); + (void)noexcept(SKL{}.mskep()); + (void)noexcept(SKL::smskep()); + + // Not ODR-uses; ok. + (void)typeid(&skep); + (void)typeid(&SKL::mskep); + (void)typeid(&SKL::smskep); + + // device-error@+1 2 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + skep(); + // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL{}.mskep(); + // device-error@+1 2 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL::smskep(); + + // device-error@+1 2 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + (void)&skep; + // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + (void)&SKL::mskep; + // device-error@+1 2 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + (void)&SKL::smskep; + + SKL sklo; + // device-error@+1 2 {{function 'operator()' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + sklo(); +} + +// device-note@+1 5 {{attribute is here}} +[[clang::sycl_kernel_entry_point(KN<1>)]] +void skep() { + // device-note@+1 {{called by 'skep'}} + df(); + // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + skep(); + // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL{}.mskep(); + // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL::smskep(); +} + +// device-note@+1 7 {{attribute is here}} +[[clang::sycl_kernel_entry_point(KN<2>)]] +void SKL::mskep() { + df(); + // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + skep(); + // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL{}.mskep(); + // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL::smskep(); +} + +// device-note@+1 3 {{attribute is here}} +[[clang::sycl_kernel_entry_point(KN<3>)]] +void SKL::smskep() { + df(); + // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + skep(); + // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL{}.mskep(); + // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL::smskep(); +} + +[[clang::sycl_kernel_entry_point(KN<4>)]] +void SKL::operator()() const { + df(); + // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + skep(); + // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL{}.mskep(); + // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL::smskep(); +} + +[[clang::sycl_external]] +void sedf() { + // device-note@+1 {{called by 'sedf'}} + df(); + // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + skep(); + // device-error@+1 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL{}.mskep(); + // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}} + SKL::smskep(); +} diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp index 8f81fa218c17..b1c9e270a02b 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp @@ -1,4 +1,6 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s // These tests validate parsing of the sycl_kernel_entry_point argument list @@ -8,6 +10,9 @@ template<int> struct ST; // #ST-decl template<int N> using TTA = ST<N>; // #TTA-decl +// A generic kernel launch function. +template<typename KN, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} //////////////////////////////////////////////////////////////////////////////// // Valid declarations. diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp index 8788e147a2ae..05a660e91e82 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp @@ -17,6 +17,10 @@ module M2 { header "m2.h" } #--- common.h template<int> struct KN; +// A generic kernel launch function. +template<typename KN, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} + [[clang::sycl_kernel_entry_point(KN<1>)]] void common_test1() {} diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp index 0575a7a5a67e..dcea60e016d1 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp @@ -15,6 +15,10 @@ #--- pch.h template<int> struct KN; +// A generic kernel launch function. +template<typename KN, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} + [[clang::sycl_kernel_entry_point(KN<1>)]] void pch_test1() {} // << expected previous declaration note here. @@ -26,11 +30,11 @@ template void pch_test2<KN<2>>(); #--- test.cpp // expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} -// expected-note@pch.h:4 {{previous declaration is here}} +// expected-note@pch.h:8 {{previous declaration is here}} [[clang::sycl_kernel_entry_point(KN<1>)]] void test1() {} // expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} -// expected-note@pch.h:8 {{previous declaration is here}} +// expected-note@pch.h:12 {{previous declaration is here}} [[clang::sycl_kernel_entry_point(KN<2>)]] void test2() {} diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp index c7b83932fefe..2abb24cde666 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp @@ -1,4 +1,6 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s // These tests validate that the kernel name type argument provided to the @@ -7,6 +9,11 @@ // specification. struct S1; + +// A generic kernel launch function. +template<typename KernelName, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} + // expected-warning@+3 {{redundant 'clang::sycl_kernel_entry_point' attribute}} // expected-note@+1 {{previous attribute is here}} [[clang::sycl_kernel_entry_point(S1), diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp index 4c6157041962..b39a77bd3587 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp @@ -1,4 +1,6 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s // These tests are intended to validate that a sycl_kernel_entry_point attribute @@ -8,6 +10,10 @@ // attribute during instantiation of a specialization unless that specialization // is selected by overload resolution. +// A generic kernel launch function. +template<typename KernelName, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} + // FIXME: C++23 [temp.expl.spec]p12 states: // FIXME: ... Similarly, attributes appearing in the declaration of a template // FIXME: have no effect on an explicit specialization of that template. diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp new file mode 100644 index 000000000000..2112733b41fc --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++17 -fsycl-is-host -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++17 -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++20 -fsycl-is-host -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++20 -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++23 -fsycl-is-host -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++23 -fsycl-is-device -verify %s + +// These tests validate diagnostics for invalid use of 'this' in the body of +// a function declared with the sycl_kernel_entry_point attribute. + + +template<typename T> struct remove_reference_t { + using type = T; +}; +template<typename T> struct remove_reference_t<T&> { + using type = T; +}; + +namespace std { +struct type_info { + virtual ~type_info(); +}; +} // namespace std + +// A generic kernel launch function. +template<typename KernelName, typename... Ts> +void sycl_kernel_launch(const char *, Ts...) {} + +//////////////////////////////////////////////////////////////////////////////// +// Valid declarations. +//////////////////////////////////////////////////////////////////////////////// +template<int, int=0> struct KN; + +struct S1 { + [[clang::sycl_kernel_entry_point(KN<1>)]] void ok1() { + (void)sizeof(this); + } +}; + +struct S2 { + [[clang::sycl_kernel_entry_point(KN<2>)]] void ok2() { + (void)noexcept(this); + } +}; + +struct S3 { + [[clang::sycl_kernel_entry_point(KN<3>)]] void ok3() { + decltype(this) x = nullptr; + } +}; + +struct S4 { + static void smf(); + [[clang::sycl_kernel_entry_point(KN<4>)]] void ok4() { + remove_reference_t<decltype(*this)>::type::smf(); + } +}; + +struct S5 { + int dm; + void mf(); + [[clang::sycl_kernel_entry_point(KN<5>)]] void ok5() { + (void)typeid(*this); // S5 is not abstract, so 'this' is not evaluated. + (void)typeid(dm); // 'int' is not an abstract class type; implicit 'this' is not evaluated. + (void)typeid(mf()); // 'void' is not an abstract class type; implicit 'this' is not evaluated. + } +}; + +template<typename KN, bool B> +struct S6 { + void mf() noexcept(B); + [[clang::sycl_kernel_entry_point(KN)]] void ok6() noexcept(noexcept(mf())) {} +}; +template void S6<KN<6,0>, false>::ok6(); +template void S6<KN<6,1>, true>::ok6(); + +template<typename KN, bool B> +struct S7 { + void mf() noexcept(B); + [[clang::sycl_kernel_entry_point(KN)]] void ok7() noexcept(noexcept(this->mf())) {} +}; +template void S7<KN<7,0>, false>::ok7(); +template void S7<KN<7,1>, true>::ok7(); + +#if __cplusplus >= 202002L +template<typename KN, typename T> +struct S8 { + void mf(T); + [[clang::sycl_kernel_entry_point(KN)]] void ok8() requires(requires { mf(1); }) {} +}; +template void S8<KN<8>, int>::ok8(); + +template<typename KN, typename T> +struct S9 { + void mf(T); + [[clang::sycl_kernel_entry_point(KN)]] void ok9() requires(requires { this->mf(1); }) {} +}; +template void S9<KN<9>, int>::ok9(); +#endif + + +//////////////////////////////////////////////////////////////////////////////// +// Invalid declarations. +//////////////////////////////////////////////////////////////////////////////// + +template<int, int=0> struct BADKN; + +// expected-error@+3 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B1 { + [[clang::sycl_kernel_entry_point(BADKN<1>)]] void bad1() { + (void)this; + } +}; + +// expected-error@+4 {{'this' cannot be implicitly used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B2 { + int dm; + [[clang::sycl_kernel_entry_point(BADKN<2>)]] void bad2() { + (void)dm; + } +}; + +// expected-error@+4 {{'this' cannot be implicitly used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B3 { + void mf(); + [[clang::sycl_kernel_entry_point(BADKN<3>)]] void bad3() { + (void)mf(); + } +}; + +// expected-error@+4 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B4 { + virtual void vmf() = 0; + [[clang::sycl_kernel_entry_point(BADKN<4>)]] void bad4() { + (void)typeid(*this); // B4 is abstract, so 'this' is evaluated. + } +}; + +// A diagnostic is not currently issued for uninstantiated definitions. In this +// case, a declaration is instantiated, but a definition isn't. A diagnostic +// will be issued if a definition is instantiated (as the next test exercises). +struct B5 { + template<typename KN> + [[clang::sycl_kernel_entry_point(KN)]] void bad5() { + (void)this; + } +}; +extern template void B5::bad5<BADKN<5>>(); + +// expected-error@+4 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B6 { + template<typename KN> + [[clang::sycl_kernel_entry_point(KN)]] void bad6() { + (void)this; + } +}; +// expected-note@+1 {{in instantiation of function template specialization 'B6::bad6<BADKN<6>>' requested here}} +template void B6::bad6<BADKN<6>>(); + +// A diagnostic is not currently issued for uninstantiated definitions. In this +// case, a declaration is instantiated, but a definition isn't. A diagnostic +// will be issued if a definition is instantiated (as the next test exercises). +template<typename KN> +struct B7 { + [[clang::sycl_kernel_entry_point(KN)]] void bad7() { + (void)this; + } +}; +extern template void B7<BADKN<7>>::bad7(); + +// expected-error@+4 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +template<typename KN> +struct B8 { + [[clang::sycl_kernel_entry_point(KN)]] void bad8() { + (void)this; + } +}; +// expected-note@+1 {{in instantiation of member function 'B8<BADKN<8>>::bad8' requested here}} +template void B8<BADKN<8>>::bad8(); + +#if __cplusplus >= 202302L +struct B9 { + // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function with an explicit object parameter}} + [[clang::sycl_kernel_entry_point(BADKN<9>)]] void bad9(this B9 self) { + (void)self; + } +}; +#endif diff --git a/clang/test/SemaSYCL/sycl-kernel-launch-ms-compat.cpp b/clang/test/SemaSYCL/sycl-kernel-launch-ms-compat.cpp new file mode 100644 index 000000000000..cd186a833b02 --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-launch-ms-compat.cpp @@ -0,0 +1,88 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++20 -fsyntax-only -fsycl-is-host -fms-compatibility -fcxx-exceptions -verify=host,expected %s +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++20 -fsyntax-only -fsycl-is-device -fms-compatibility -verify=device,expected %s + +// Test Microsoft extensions for lookup of a sycl_kernel_launch member template +// in a dependent base class. + + +//////////////////////////////////////////////////////////////////////////////// +// Valid declarations. +//////////////////////////////////////////////////////////////////////////////// + +// A unique kernel name type is required for each declared kernel entry point. +template<int> struct KN; + +// A generic kernel object type. +template<int> +struct KT { + void operator()() const; +}; + + +namespace ok1 { + template<typename Derived> + struct base_handler { + protected: + // expected-note@+2 {{must qualify identifier to find this declaration in dependent base class}} + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + }; + template<int N> + struct handler : protected base_handler<handler<N>> { + // A warning is issued because, in standard C++, unqualified lookup for + // sycl_kernel_launch would not consider dependent base classes. Such + // lookups are allowed as a Microsoft compatible extension. + // expected-warning@+4 {{use of member 'sycl_kernel_launch' found via unqualified lookup into dependent bases of class templates is a Microsoft extension}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'KN<1>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'KT<1>') required here}} + [[clang::sycl_kernel_entry_point(KN<1>)]] + void skep(KT<1> k) { + k(); + } + }; + // expected-note@+1 {{in instantiation of member function 'ok1::handler<1>::skep' requested here}} + template void handler<1>::skep(KT<1>); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Invalid declarations. +//////////////////////////////////////////////////////////////////////////////// + +// A unique kernel name type is required for each declared kernel entry point. +template<int> struct BADKN; + +// A generic kernel object type. +template<int> +struct BADKT { + void operator()() const; +}; + + +namespace bad1 { + template<typename Derived> + struct base_handler { + private: + // expected-note@+3 {{must qualify identifier to find this declaration in dependent base class}} + // expected-note@+2 {{declared private here}} + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + }; + template<int N> + struct handler : protected base_handler<handler<N>> { + // In standard C++, unqualified lookup for sycl_kernel_launch would not + // consider dependent base classes. Such lookups are allowed as a Microsoft + // compatible extension, but access checks are still performed which makes + // this case an error. + // expected-warning@+5 {{use of member 'sycl_kernel_launch' found via unqualified lookup into dependent bases of class templates is a Microsoft extension}} + // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<1>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<1>') required here}} + // expected-error@+2 {{'sycl_kernel_launch' is a private member of 'bad1::base_handler<bad1::handler<1>>'}} + [[clang::sycl_kernel_entry_point(BADKN<1>)]] + void skep(BADKT<1> k) { + k(); + } + }; + // expected-note@+1 {{in instantiation of member function 'bad1::handler<1>::skep' requested here}} + template void handler<1>::skep(BADKT<1>); +} diff --git a/clang/test/SemaSYCL/sycl-kernel-launch.cpp b/clang/test/SemaSYCL/sycl-kernel-launch.cpp new file mode 100644 index 000000000000..20d9becb8192 --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-launch.cpp @@ -0,0 +1,560 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-device -verify %s + +// Test overload resolution for implicit calls to sycl_kernel_launch<KN>(...) +// synthesized for functions declared with the sycl_kernel_entry_point +// attribute. + + +//////////////////////////////////////////////////////////////////////////////// +// Valid declarations. +//////////////////////////////////////////////////////////////////////////////// + +// A unique kernel name type is required for each declared kernel entry point. +template<int, int = 0> struct KN; + +// A generic kernel object type. +template<int, int = 0> +struct KT { + void operator()() const; +}; + + +// sycl_kernel_launch as function template at namespace scope. +namespace ok1 { + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + [[clang::sycl_kernel_entry_point(KN<1>)]] + void skep(KT<1> k) { + k(); + } +} + +// sycl_kernel_launch as function template at namespace scope with default +// template arguments and default function arguments.. +namespace ok2 { + template<typename KN, typename T = int> + void sycl_kernel_launch(const char *, KT<2>, T = 2); + [[clang::sycl_kernel_entry_point(KN<2>)]] + void skep(KT<2> k) { + k(); + } +} + +// sycl_kernel_launch as overload set. +namespace ok3 { + template<typename KN> + void sycl_kernel_launch(const char *); + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + [[clang::sycl_kernel_entry_point(KN<3>)]] + void skep(KT<3> k) { + k(); + } +} + +// sycl_kernel_launch as static member function template. +namespace ok4 { + struct handler { + private: + template<typename KN, typename... Ts> + static void sycl_kernel_launch(const char *, Ts...); + public: + [[clang::sycl_kernel_entry_point(KN<4,0>)]] + static void skep(KT<4,0> k) { + k(); + } + [[clang::sycl_kernel_entry_point(KN<4,1>)]] + void skep(KT<4,1> k) { + k(); + } + }; +} + +// sycl_kernel_launch as non-static member function template. +namespace ok5 { + struct handler { + private: + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + public: + [[clang::sycl_kernel_entry_point(KN<5>)]] + void skep(KT<5> k) { + k(); + } + }; +} + +#if __cplusplus >= 202302L +// sycl_kernel_launch as non-static member function template with explicit +// object parameter. +namespace ok6 { + struct handler { + private: + template<typename KN, typename... Ts> + void sycl_kernel_launch(this handler self, const char *, Ts...); + public: + [[clang::sycl_kernel_entry_point(KN<6>)]] + void skep(KT<6> k) { + k(); + } + }; +} +#endif + +// sycl_kernel_launch as variable template. +namespace ok7 { + template<typename KN> + struct launcher { + template<typename... Ts> + void operator()(const char *, Ts...); + }; + template<typename KN> + launcher<KN> sycl_kernel_launch; + [[clang::sycl_kernel_entry_point(KN<7>)]] + void skep(KT<7> k) { + k(); + } +} + +#if __cplusplus >= 202302L +// sycl_kernel_launch as variable template with static call operator template. +namespace ok8 { + template<typename KN> + struct launcher { + template<typename... Ts> + static void operator()(const char *, Ts...); + }; + template<typename KN> + launcher<KN> sycl_kernel_launch; + [[clang::sycl_kernel_entry_point(KN<8>)]] + void skep(KT<8> k) { + k(); + } +} +#endif + +#if __cplusplus >= 202302L +// sycl_kernel_launch as variable template with call operator template with +// explicit object parameter. +namespace ok9 { + template<typename KN> + struct launcher { + template<typename... Ts> + void operator()(this launcher self, const char *, Ts...); + }; + template<typename KN> + launcher<KN> sycl_kernel_launch; + [[clang::sycl_kernel_entry_point(KN<9>)]] + void skep(KT<9> k) { + k(); + } +} +#endif + +// sycl_kernel_launch as base class non-static member function template. +namespace ok10 { + template<typename Derived> + struct base_handler { + protected: + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + }; + struct handler : protected base_handler<handler> { + public: + [[clang::sycl_kernel_entry_point(KN<10>)]] + void skep(KT<10> k) { + k(); + } + }; +} + +// sycl_kernel_launch with non-reference parameters. +namespace ok11 { + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + struct move_only { + move_only(move_only&&) = default; + }; + [[clang::sycl_kernel_entry_point(KN<11>)]] + void skep(KT<11> k, move_only) { + k(); + } +} + +// sycl_kernel_launch with forward reference parameters. +namespace ok12 { + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts &&...); + struct non_copyable { + non_copyable(const non_copyable&) = delete; + }; + struct non_moveable { + non_moveable(non_moveable&&) = delete; + }; + struct move_only { + move_only(move_only&&) = default; + }; + [[clang::sycl_kernel_entry_point(KN<12>)]] + void skep(KT<12> k, non_copyable, non_moveable, move_only) { + k(); + } +} + +// ADL for sycl_kernel_launch. +namespace ok13 { + template<typename KN, typename KT, typename T> + [[clang::sycl_kernel_entry_point(KN)]] + void skep(KT k, T t) { + k(); + } + namespace nested { + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + struct S13 {}; + } + template void skep<KN<13>>(KT<13>, nested::S13); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Invalid declarations. +//////////////////////////////////////////////////////////////////////////////// + +// A unique kernel name type is required for each declared kernel entry point. +template<int, int = 0> struct BADKN; + +// A generic kernel object type. +template<int, int = 0> +struct BADKT { + void operator()() const; +}; + + +// Undeclared sycl_kernel_launch identifier from non-template function. +namespace bad1 { + // expected-error@+4 {{use of undeclared identifier 'sycl_kernel_launch'}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<1>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<1>') required here}} + [[clang::sycl_kernel_entry_point(BADKN<1>)]] + void skep(BADKT<1> k) { + k(); + } +} + +// Undeclared sycl_kernel_launch identifier from function template. +namespace bad2 { + // expected-error@+5 {{use of undeclared identifier 'sycl_kernel_launch'}} + // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<2>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<2>') required here}} + template<typename KN, typename KT> + [[clang::sycl_kernel_entry_point(KN)]] + void skep(KT k) { + k(); + } + // expected-note@+1 {{in instantiation of function template specialization 'bad2::skep<BADKN<2>, BADKT<2>>' requested here}} + template void skep<BADKN<2>>(BADKT<2>); +} + +// No matching function for call to sycl_kernel_launch; not a template. +namespace bad3 { + // expected-note@+1 {{declared as a non-template here}} + void sycl_kernel_launch(const char *, BADKT<3>); + // expected-error@+4 {{'sycl_kernel_launch' does not refer to a template}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<3>' required here}} + [[clang::sycl_kernel_entry_point(BADKN<3>)]] + void skep(BADKT<3> k) { + k(); + } +} + +// No matching function for call to sycl_kernel_launch; not enough arguments. +namespace bad4 { + // expected-note@+2 {{candidate function template not viable: requires 2 arguments, but 1 was provided}} + template<typename KN, typename KT> + void sycl_kernel_launch(const char *, KT); + // expected-error@+5 {{no matching function for call to 'sycl_kernel_launch'}} + // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<4>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]') required here}} + template<typename KN> + [[clang::sycl_kernel_entry_point(KN)]] + void skep() {} + // expected-note@+1 {{in instantiation of function template specialization 'bad4::skep<BADKN<4>>' requested here}} + template void skep<BADKN<4>>(); +} + +// No matching function for call to sycl_kernel_launch; too many arguments. +namespace bad5 { + // expected-note@+2 {{candidate function template not viable: requires 2 arguments, but 3 were provided}} + template<typename KN, typename KT> + void sycl_kernel_launch(const char *, KT); + // expected-error@+5 {{no matching function for call to 'sycl_kernel_launch'}} + // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<5>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<5>', xvalue of type 'int') required here}} + template<typename KN, typename KT> + [[clang::sycl_kernel_entry_point(KN)]] + void skep(KT k, int i) { + k(); + } + // expected-note@+1 {{in instantiation of function template specialization 'bad5::skep<BADKN<5>, BADKT<5>>' requested here}} + template void skep<BADKN<5>>(BADKT<5>, int); +} + +// No matching function for call to sycl_kernel_launch; mismatched function parameter type. +namespace bad6 { + // expected-note-re@+2 {{candidate function template not viable: no known conversion from 'const char[{{[0-9]*}}]' to 'int' for 1st argument}} + template<typename KN, typename... Ts> + void sycl_kernel_launch(int, Ts...); + // expected-error@+5 {{no matching function for call to 'sycl_kernel_launch'}} + // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<6>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<6>') required here}} + template<typename KN, typename KT> + [[clang::sycl_kernel_entry_point(KN)]] + void skep(KT k) { + k(); + } + // expected-note@+1 {{in instantiation of function template specialization 'bad6::skep<BADKN<6>, BADKT<6>>' requested here}} + template void skep<BADKN<6>>(BADKT<6>); +} + +// No matching function for call to sycl_kernel_launch; mismatched template parameter kind. +namespace bad7 { + // expected-note@+2 {{candidate template ignored: invalid explicitly-specified argument for 1st template parameter}} + template<int, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + // expected-error@+4 {{no matching function for call to 'sycl_kernel_launch'}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<7>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<7>') required here}} + [[clang::sycl_kernel_entry_point(BADKN<7>)]] + void skep(BADKT<7> k) { + k(); + } +} + +// No matching function for call to sycl_kernel_launch; substitution failure. +namespace bad8 { + // expected-note@+2 {{candidate template ignored: substitution failure [with KN = BADKN<8>, KT = BADKT<8>]: no type named 'no_such_type' in 'BADKT<8>'}} + template<typename KN, typename KT, typename T = typename KT::no_such_type> + void sycl_kernel_launch(const char *, KT); + // expected-error@+4 {{no matching function for call to 'sycl_kernel_launch'}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<8>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<8>') required here}} + [[clang::sycl_kernel_entry_point(BADKN<8>)]] + void skep(BADKT<8> k) { + k(); + } +} + +// No matching function for call to sycl_kernel_launch; deduction failure. +namespace bad9 { + // expected-note@+2 {{candidate template ignored: couldn't infer template argument 'T'}} + template<typename KN, typename KT, typename T> + void sycl_kernel_launch(const char *, KT); + // expected-error@+4 {{no matching function for call to 'sycl_kernel_launch'}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<9>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<9>') required here}} + [[clang::sycl_kernel_entry_point(BADKN<9>)]] + void skep(BADKT<9> k) { + k(); + } +} + +// No matching function for call to sycl_kernel_launch object; mismatched function parameter type. +namespace bad10 { + template<typename KN> + struct launcher { + // expected-note-re@+2 {{candidate function template not viable: no known conversion from 'const char[{{[0-9]*}}]' to 'int' for 1st argument}} + template<typename... Ts> + void operator()(int, Ts...); + }; + template<typename KN> + launcher<KN> sycl_kernel_launch; + // expected-error@+5 {{no matching function for call to object of type 'launcher<BADKN<10, 0>>'}} + // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<10>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<10>') required here}} + template<typename KN, typename KT> + [[clang::sycl_kernel_entry_point(KN)]] + void skep(KT k) { + k(); + } + // expected-note@+1 {{in instantiation of function template specialization 'bad10::skep<BADKN<10>, BADKT<10>>' requested here}} + template void skep<BADKN<10>>(BADKT<10>); +} + +// No matching function for call to sycl_kernel_launch object; mismatched template parameter kind. +namespace bad11 { + template<int KN> + struct launcher { + template<typename... Ts> + void operator()(int, Ts...); + }; + // expected-note@+1 {{template parameter is declared here}} + template<int KN> + launcher<KN> sycl_kernel_launch; + // expected-error@+5 {{template argument for non-type template parameter must be an expression}} + // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'KN' required here}} + template<typename KN, typename KT> + [[clang::sycl_kernel_entry_point(KN)]] + void skep(KT k) { + k(); + } + template void skep<BADKN<11>>(BADKT<11>); +} + +// sycl_kernel_launch as variable template with private call operator template. +namespace bad12 { + template<typename KN> + struct launcher { + private: + // expected-note@+2 {{declared private here}} + template<typename... Ts> + void operator()(const char *, Ts...); + }; + template<typename KN> + launcher<KN> sycl_kernel_launch; + // expected-error@+4 {{'operator()' is a private member of 'bad12::launcher<BADKN<12>>'}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<12>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<12>') required here}} + [[clang::sycl_kernel_entry_point(BADKN<12>)]] + void skep(BADKT<12> k) { + k(); + } +} + +// Ambiguous reference to sycl_kernel_launch. +namespace bad13 { + inline namespace in1 { + // expected-note@+2 {{candidate found by name lookup is 'bad13::in1::sycl_kernel_launch'}} + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + } + inline namespace in2 { + template<typename KN> + struct launcher { + template<typename KT, typename... Ts> + void operator()(const char *, Ts...); + }; + // expected-note@+2 {{candidate found by name lookup is 'bad13::in2::sycl_kernel_launch'}} + template<typename KN> + launcher<KN> sycl_kernel_launch; + } + // expected-error@+5 {{reference to 'sycl_kernel_launch' is ambiguous}} + // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'KN' required here}} + template<typename KN, typename KT> + [[clang::sycl_kernel_entry_point(KN)]] + void skep(KT k) { + k(); + } + template void skep<BADKN<13>>(BADKT<13>); +} + +// Ambiguous call to sycl_kernel_launch. +namespace bad14 { + // expected-note@+2 {{candidate function [with KN = BADKN<14>, KT = BADKT<14>]}} + template<typename KN, typename KT> + void sycl_kernel_launch(const char *, KT, signed char); + // expected-note@+2 {{candidate function [with KN = BADKN<14>, KT = BADKT<14>]}} + template<typename KN, typename KT> + void sycl_kernel_launch(const char *, KT, unsigned char); + // expected-error@+4 {{call to 'sycl_kernel_launch' is ambiguous}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<14>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<14>', xvalue of type 'int') required here}} + [[clang::sycl_kernel_entry_point(BADKN<14>)]] + void skep(BADKT<14> k, int i) { + k(); + } +} + +// Call to member sycl_kernel_launch from non-static member. +namespace bad15 { + struct S { + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + // expected-error@+4 {{call to non-static member function without an object argument}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<15>' required here}} + [[clang::sycl_kernel_entry_point(BADKN<15>)]] + static void skep(BADKT<15> k) { + k(); + } + }; +} + +// sycl_kernel_launch as dependent base class non-static member function +// template. +namespace bad16 { + template<typename Derived> + struct base_handler { + protected: + // expected-note@+2 {{member is declared here}} + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + }; + template<int N> + struct handler : protected base_handler<handler<N>> { + // Lookup for sycl_kernel_launch fails because lookup in dependent base + // classes requires explicit qualification. + // expected-error@+4 {{explicit qualification required to use member 'sycl_kernel_launch' from dependent base class}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<16>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<16>') required here}} + [[clang::sycl_kernel_entry_point(BADKN<16>)]] + void skep(BADKT<16> k) { + k(); + } + }; + // expected-note@+1 {{in instantiation of member function 'bad16::handler<16>::skep' requested here}} + template void handler<16>::skep(BADKT<16>); +} + +// sycl_kernel_launch with non-reference parameters and non-moveable arguments. +namespace bad17 { + // expected-note@+2 2 {{passing argument to parameter here}} + template<typename KN, typename... Ts> + void sycl_kernel_launch(const char *, Ts...); + struct non_copyable { + // expected-note@+1 {{'non_copyable' has been explicitly marked deleted here}} + non_copyable(const non_copyable&) = delete; + }; + // expected-error@+4 {{call to deleted constructor of 'bad17::non_copyable'}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<17, 0>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<17, 0>', xvalue of type 'non_copyable') required here}} + [[clang::sycl_kernel_entry_point(BADKN<17,0>)]] + void skep(BADKT<17,0> k, non_copyable) { + k(); + } + struct non_moveable { + // expected-note@+1 {{'non_moveable' has been explicitly marked deleted here}} + non_moveable(non_moveable&&) = delete; + }; + // expected-error@+4 {{call to deleted constructor of 'bad17::non_moveable'}} + // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<17, 1>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<17, 1>', xvalue of type 'non_moveable') required here}} + [[clang::sycl_kernel_entry_point(BADKN<17,1>)]] + void skep(BADKT<17,1> k, non_moveable) { + k(); + } +} + +// sycl_kernel_launch declared after use and not found by ADL. +namespace bad18 { + // expected-error@+5 {{call to function 'sycl_kernel_launch' that is neither visible in the template definition nor found by argument-dependent lookup}} + // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}} + // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<18>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<18>') required here}} + template<typename KN, typename KT> + [[clang::sycl_kernel_entry_point(KN)]] + void skep(KT k) { + k(); + } + // expected-note@+2 {{'sycl_kernel_launch' should be declared prior to the call site or in the global namespace}} + template<typename KN, typename... Ts> + void sycl_kernel_launch(Ts...) {} + // expected-note@+1 {{in instantiation of function template specialization 'bad18::skep<BADKN<18>, BADKT<18>>' requested here}} + template void skep<BADKN<18>>(BADKT<18>); +} diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 17f485e5c78a..d31d2c0c9bb6 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -383,6 +383,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, break; case Stmt::SYCLKernelCallStmtClass: + case Stmt::UnresolvedSYCLKernelCallStmtClass: K = CXCursor_UnexposedStmt; break; diff --git a/flang-rt/lib/runtime/execute.cpp b/flang-rt/lib/runtime/execute.cpp index 8da7069f5700..b843a0c7f463 100644 --- a/flang-rt/lib/runtime/execute.cpp +++ b/flang-rt/lib/runtime/execute.cpp @@ -111,7 +111,17 @@ std::int64_t TerminationCheck(std::int64_t status, const Descriptor *cmdstat, // On WIN32 API std::system() returns exit status directly. On other OS'es, // special status codes are handled below. std::int64_t exitStatusVal{status}; -#ifndef _WIN32 +#ifdef _WIN32 + if (status == 9009) { + // cmd.exe returns status code 9009 for "command not found" error + if (!cmdstat) { + terminator.Crash("Command not found."); + } else { + StoreIntToDescriptor(cmdstat, COMMAND_NOT_FOUND_ERR, terminator); + CheckAndCopyCharsToDescriptor(cmdmsg, "Command not found."); + } + } +#else #if defined(WIFSIGNALED) && defined(WTERMSIG) if (WIFSIGNALED(status)) { @@ -195,9 +205,36 @@ void RTNAME(ExecuteCommandLine)(const Descriptor &command, bool wait, RUNTIME_CHECK(terminator, IsValidCharDescriptor(cmdmsg)); } + const char *cmd{newCmd}; +#ifdef _WIN32 + // Construct a string that looks like + // "cmd.exe /v:on /c \"mycommand & exit /b !ERRORLEVEL!\"" + // Explanantion: + // /v:on - turns delayed environment variable expansion on, so + // variables written as !VAR! are expanded at execution time + // instead of at parse time. This is required for !ERRORLEVEL! + // to reflect the current error code at the moment exit runs. + // exit /b !ERRORLEVEL! - exits the current cmd instance (/b) and + // sets its process exit code to the current ERRORLEVEL value. + // Because delayed expansion is on, !ERRORLEVEL! is evaluated at + // execution time, so this cmd instance returns the same error + // code as mycommand. + // This allows cmd.exe to either return the exit code of mycommand, or + // to return its own exit code to the caller. The code 9009 is used + // by cmd.exe to indicate "not found" condition. + const char prefix[]{"cmd.exe /v:on /c \""}; + const char suffix[]{" & exit /b !ERRORLEVEL!\""}; + const size_t newCmdWinLen{ + (sizeof(prefix) - 1) + std::strlen(newCmd) + (sizeof(suffix) - 1) + 1}; + char *newCmdWin{ + static_cast<char *>(AllocateMemoryOrCrash(terminator, newCmdWinLen))}; + std::snprintf(newCmdWin, newCmdWinLen, "%s%s%s", prefix, newCmd, suffix); + cmd = newCmdWin; +#endif + if (wait) { // either wait is not specified or wait is true: synchronous mode - std::int64_t status{std::system(newCmd)}; + std::int64_t status{std::system(cmd)}; std::int64_t exitStatusVal{ TerminationCheck(status, cmdstat, cmdmsg, terminator)}; // If sync, assigned processor-dependent exit status. Otherwise unchanged @@ -211,13 +248,6 @@ void RTNAME(ExecuteCommandLine)(const Descriptor &command, bool wait, si.cb = sizeof(si); ZeroMemory(&pi, sizeof(pi)); - // add "cmd.exe /c " to the beginning of command - const char *prefix{"cmd.exe /c "}; - char *newCmdWin{static_cast<char *>(AllocateMemoryOrCrash( - terminator, std::strlen(prefix) + std::strlen(newCmd) + 1))}; - std::strcpy(newCmdWin, prefix); - std::strcat(newCmdWin, newCmd); - // Convert the char to wide char const size_t sizeNeeded{mbstowcs(NULL, newCmdWin, 0) + 1}; wchar_t *wcmd{static_cast<wchar_t *>( @@ -225,7 +255,6 @@ void RTNAME(ExecuteCommandLine)(const Descriptor &command, bool wait, if (std::mbstowcs(wcmd, newCmdWin, sizeNeeded) == static_cast<size_t>(-1)) { terminator.Crash("Char to wide char failed for newCmd"); } - FreeMemory(newCmdWin); if (CreateProcessW(nullptr, wcmd, nullptr, nullptr, FALSE, 0, nullptr, nullptr, &si, &pi)) { @@ -278,6 +307,11 @@ void RTNAME(ExecuteCommandLine)(const Descriptor &command, bool wait, } #endif } + +#ifdef _WIN32 + FreeMemory(newCmdWin); +#endif + // Deallocate memory if EnsureNullTerminated dynamically allocated memory if (newCmd != command.OffsetElement()) { FreeMemory(newCmd); diff --git a/flang-rt/unittests/Runtime/CommandTest.cpp b/flang-rt/unittests/Runtime/CommandTest.cpp index 4509c9a34c79..3bed8acafc59 100644 --- a/flang-rt/unittests/Runtime/CommandTest.cpp +++ b/flang-rt/unittests/Runtime/CommandTest.cpp @@ -365,9 +365,9 @@ TEST_F(ZeroArguments, ECLNotExecutedCommandErrorSync) { RTNAME(ExecuteCommandLine) (*command.get(), wait, exitStat.get(), cmdStat.get(), cmdMsg.get()); #ifdef _WIN32 - CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 1); - CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 0); - CheckDescriptorEqStr(cmdMsg.get(), "cmd msg buffer XXXXXXXX"); + CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 9009); + CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 5); + CheckDescriptorEqStr(cmdMsg.get(), "Command not found."); #else CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 126); CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 4); @@ -394,9 +394,9 @@ TEST_F(ZeroArguments, ECLNotFoundCommandErrorSync) { RTNAME(ExecuteCommandLine) (*command.get(), wait, exitStat.get(), cmdStat.get(), cmdMsg.get()); #ifdef _WIN32 - CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 1); - CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 0); - CheckDescriptorEqStr(cmdMsg.get(), "unmodified buffer XXXXXXXXX"); + CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 9009); + CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 5); + CheckDescriptorEqStr(cmdMsg.get(), "Command not found."); #else CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 127); CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 5); @@ -412,7 +412,7 @@ TEST_F(ZeroArguments, ECLInvalidCommandTerminatedSync) { #ifdef _WIN32 EXPECT_DEATH(RTNAME(ExecuteCommandLine)( *command.get(), wait, nullptr, nullptr, cmdMsg.get()), - "Invalid command quit with exit status code: 1"); + "Command not found."); #else EXPECT_DEATH(RTNAME(ExecuteCommandLine)( *command.get(), wait, nullptr, nullptr, cmdMsg.get()), @@ -490,7 +490,7 @@ TEST_F(ZeroArguments, SystemInvalidCommandExitStat) { RTNAME(ExecuteCommandLine) (*command.get(), wait, exitStat.get(), cmdStat.get(), nullptr); #ifdef _WIN32 - CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 1); + CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 9009); #else CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 127); #endif diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index 028e3ea80623..a997980ca18e 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -484,6 +484,18 @@ end * A pointer component that has no default initialization or explicit value in a structure constructor is defaulted to `NULL()`. * An assumed-rank entity is an acceptable `NAMELIST` group item. +* A named constant (`PARAMETER`) may appear as a `namelist-group-object` in a + `NAMELIST` statement. The Fortran standard requires namelist group objects + to be variables, but this usage is accepted by Flang as an extension. + When `-pedantic` is enabled, Flang emits a warning for this case. + For example: +``` +program p + implicit none + integer, parameter :: k = 3 + namelist /g/ k +end program +``` ### Extensions supported when enabled by options diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h index b8fb6078d59a..f23e52585d56 100644 --- a/flang/include/flang/Parser/openmp-utils.h +++ b/flang/include/flang/Parser/openmp-utils.h @@ -226,6 +226,9 @@ const T *GetFirstArgument(const OmpDirectiveSpecification &spec) { return nullptr; } +const OmpClause *FindClause( + const OmpDirectiveSpecification &spec, llvm::omp::Clause clauseId); + const BlockConstruct *GetFortranBlockConstruct( const ExecutionPartConstruct &epc); const Block &GetInnermostExecPart(const Block &block); diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h index e5cf915e9f78..cbcb3592f04c 100644 --- a/flang/include/flang/Support/Fortran-features.h +++ b/flang/include/flang/Support/Fortran-features.h @@ -82,7 +82,7 @@ ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable, HostAssociatedIntentOutInSpecExpr, NonVolatilePointerToVolatile, RealConstantWidening, VolatileOrAsynchronousTemporary, UnusedVariable, UsedUndefinedVariable, BadValueInDeadCode, AssumedTypeSizeDummy, - MisplacedIgnoreTKR) + MisplacedIgnoreTKR, NamelistParameter) using LanguageFeatures = EnumSet<LanguageFeature, LanguageFeature_enumSize>; using UsageWarnings = EnumSet<UsageWarning, UsageWarning_enumSize>; diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index e9ba5f386803..394c7a485525 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -786,13 +786,9 @@ static void processTileSizesFromOpenMPConstruct( innerConstruct->BeginDir(); if (innerBeginSpec.DirId() == llvm::omp::Directive::OMPD_tile) { // Get the size values from parse tree and convert to a vector. - for (const auto &clause : innerBeginSpec.Clauses().v) { - if (const auto tclause{ - std::get_if<parser::OmpClause::Sizes>(&clause.u)}) { - processFun(tclause); - break; - } - } + if (auto *clause = parser::omp::FindClause( + innerBeginSpec, llvm::omp::Clause::OMPC_sizes)) + processFun(&std::get<parser::OmpClause::Sizes>(clause->u)); } } } diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp index d7fadbc84ff1..a994f30a6dd7 100644 --- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp @@ -77,6 +77,8 @@ void registerOpenACCExtensions(mlir::DialectRegistry ®istry) { *ctx); fir::FieldIndexOp::attachInterface< OutlineRematerializationModel<fir::FieldIndexOp>>(*ctx); + fir::ConvertOp::attachInterface< + OutlineRematerializationModel<fir::ConvertOp>>(*ctx); }); // Register HLFIR operation interfaces diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp index c81f48f6323d..6d4326af7834 100644 --- a/flang/lib/Parser/openmp-utils.cpp +++ b/flang/lib/Parser/openmp-utils.cpp @@ -145,6 +145,16 @@ const OmpObjectList *GetOmpObjectList(const OmpDependClause::TaskDep &x) { return &std::get<OmpObjectList>(x.t); } +const OmpClause *FindClause( + const OmpDirectiveSpecification &spec, llvm::omp::Clause clauseId) { + for (auto &clause : spec.Clauses().v) { + if (clause.Id() == clauseId) { + return &clause; + } + } + return nullptr; +} + const BlockConstruct *GetFortranBlockConstruct( const ExecutionPartConstruct &epc) { // ExecutionPartConstruct -> ExecutableConstruct diff --git a/flang/lib/Parser/parse-tree.cpp b/flang/lib/Parser/parse-tree.cpp index afe28182f862..5bdfa47bea1c 100644 --- a/flang/lib/Parser/parse-tree.cpp +++ b/flang/lib/Parser/parse-tree.cpp @@ -347,16 +347,12 @@ llvm::omp::Clause OpenMPAtomicConstruct::GetKind() const { bool OpenMPAtomicConstruct::IsCapture() const { const OmpDirectiveSpecification &dirSpec{std::get<OmpBeginDirective>(t)}; - return llvm::any_of(dirSpec.Clauses().v, [](auto &clause) { - return clause.Id() == llvm::omp::Clause::OMPC_capture; - }); + return omp::FindClause(dirSpec, llvm::omp::Clause::OMPC_capture); } bool OpenMPAtomicConstruct::IsCompare() const { const OmpDirectiveSpecification &dirSpec{std::get<OmpBeginDirective>(t)}; - return llvm::any_of(dirSpec.Clauses().v, [](auto &clause) { - return clause.Id() == llvm::omp::Clause::OMPC_compare; - }); + return omp::FindClause(dirSpec, llvm::omp::Clause::OMPC_compare); } } // namespace Fortran::parser diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp index 0a41484399b5..732531b1bdfc 100644 --- a/flang/lib/Semantics/check-acc-structure.cpp +++ b/flang/lib/Semantics/check-acc-structure.cpp @@ -688,13 +688,7 @@ void AccStructureChecker::Enter(const parser::OpenACCCacheConstruct &x) { if (const auto *triplet = std::get_if<parser::SubscriptTriplet>( &subscript.u)) { - const auto &lower{std::get<0>(triplet->t)}; - const auto &upper{std::get<1>(triplet->t)}; const auto &stride{std::get<2>(triplet->t)}; - if (!lower && !upper) { - context_.Say(designator.source, - "The CACHE directive requires at least one of the bounds in the array section subscript triplet to be specified"_err_en_US); - } if (stride) { if (auto strideVal{GetIntValue(*stride)}) { if (*strideVal != 1) { diff --git a/flang/lib/Semantics/check-namelist.cpp b/flang/lib/Semantics/check-namelist.cpp index c2804c5d874e..eedc1a66b563 100644 --- a/flang/lib/Semantics/check-namelist.cpp +++ b/flang/lib/Semantics/check-namelist.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "check-namelist.h" +#include "flang/Semantics/tools.h" namespace Fortran::semantics { @@ -28,6 +29,13 @@ void NamelistChecker::Leave(const parser::NamelistStmt &nmlStmt) { "PUBLIC namelist"_err_en_US, nmlObjSymbol->name()); } + // `namelist-group-object` may only contain variables. + if (IsNamedConstant(*nmlObjSymbol)) { + context_.Warn(common::UsageWarning::NamelistParameter, + nmlObjName.source, + "A namelist group object '%s' should not be a PARAMETER"_port_en_US, + nmlObjSymbol->name()); + } } } } diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index 0cad16dc3deb..f81bde981594 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -159,12 +159,11 @@ void OmpStructureChecker::HasInvalidLoopBinding( const parser::OmpDirectiveName &beginName{beginSpec.DirName()}; auto teamsBindingChecker = [&](parser::MessageFixedText msg) { - for (const auto &clause : beginSpec.Clauses().v) { - if (const auto *bindClause{ - std::get_if<parser::OmpClause::Bind>(&clause.u)}) { - if (bindClause->v.v != parser::OmpBindClause::Binding::Teams) { - context_.Say(beginName.source, msg); - } + if (auto *clause{ + parser::omp::FindClause(beginSpec, llvm::omp::Clause::OMPC_bind)}) { + auto &bind{std::get<parser::OmpClause::Bind>(clause->u).v}; + if (bind.v != parser::OmpBindClause::Binding::Teams) { + context_.Say(beginName.source, msg); } } }; @@ -204,11 +203,9 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { [&](const parser::OmpBlockConstruct &c) { const parser::OmpDirectiveSpecification &beginSpec{c.BeginDir()}; if (beginSpec.DirId() == llvm::omp::Directive::OMPD_ordered) { - for (const auto &clause : beginSpec.Clauses().v) { - if (std::get_if<parser::OmpClause::Simd>(&clause.u)) { - eligibleSIMD = true; - break; - } + if (parser::omp::FindClause( + beginSpec, llvm::omp::Clause::OMPC_simd)) { + eligibleSIMD = true; } } }, @@ -217,11 +214,9 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { &c.u)}) { llvm::omp::Directive dirId{ssc->v.DirId()}; if (dirId == llvm::omp::Directive::OMPD_ordered) { - for (const parser::OmpClause &x : ssc->v.Clauses().v) { - if (x.Id() == llvm::omp::Clause::OMPC_simd) { - eligibleSIMD = true; - break; - } + if (parser::omp::FindClause( + ssc->v, llvm::omp::Clause::OMPC_simd)) { + eligibleSIMD = true; } } else if (dirId == llvm::omp::Directive::OMPD_scan) { eligibleSIMD = true; @@ -274,9 +269,8 @@ static bool IsFullUnroll(const parser::OpenMPLoopConstruct &x) { const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()}; if (beginSpec.DirName().v == llvm::omp::Directive::OMPD_unroll) { - return llvm::none_of(beginSpec.Clauses().v, [](const parser::OmpClause &c) { - return c.Id() == llvm::omp::Clause::OMPC_partial; - }); + return parser::omp::FindClause( + beginSpec, llvm::omp::Clause::OMPC_partial) == nullptr; } return false; } @@ -312,15 +306,13 @@ static std::optional<size_t> CountGeneratedNests( if (!nestedCount || *nestedCount == 0) { return std::nullopt; } - auto rangeAt{ - llvm::find_if(beginSpec.Clauses().v, [](const parser::OmpClause &c) { - return c.Id() == llvm::omp::Clause::OMPC_looprange; - })}; - if (rangeAt == beginSpec.Clauses().v.end()) { + auto *clause{ + parser::omp::FindClause(beginSpec, llvm::omp::Clause::OMPC_looprange)}; + if (!clause) { return 1; } - auto *loopRange{parser::Unwrap<parser::OmpLooprangeClause>(*rangeAt)}; + auto *loopRange{parser::Unwrap<parser::OmpLooprangeClause>(*clause)}; std::optional<int64_t> count{GetIntValue(std::get<1>(loopRange->t))}; if (!count || *count <= 0) { return std::nullopt; @@ -617,23 +609,22 @@ void OmpStructureChecker::CheckDistLinear( void OmpStructureChecker::CheckLooprangeBounds( const parser::OpenMPLoopConstruct &x) { - for (const parser::OmpClause &clause : x.BeginDir().Clauses().v) { - if (auto *lrClause{parser::Unwrap<parser::OmpLooprangeClause>(clause)}) { - auto first{GetIntValue(std::get<0>(lrClause->t))}; - auto count{GetIntValue(std::get<1>(lrClause->t))}; - if (!first || !count || *first <= 0 || *count <= 0) { - return; - } - auto requiredCount{static_cast<size_t>(*first + *count - 1)}; - if (auto loopCount{CountGeneratedNests(std::get<parser::Block>(x.t))}) { - if (*loopCount < requiredCount) { - context_.Say(clause.source, - "The specified loop range requires %zu loops, but the loop sequence has a length of %zu"_err_en_US, - requiredCount, *loopCount); - } - } + if (auto *clause{parser::omp::FindClause( + x.BeginDir(), llvm::omp::Clause::OMPC_looprange)}) { + auto *lrClause{parser::Unwrap<parser::OmpLooprangeClause>(clause)}; + auto first{GetIntValue(std::get<0>(lrClause->t))}; + auto count{GetIntValue(std::get<1>(lrClause->t))}; + if (!first || !count || *first <= 0 || *count <= 0) { return; } + auto requiredCount{static_cast<size_t>(*first + *count - 1)}; + if (auto loopCount{CountGeneratedNests(std::get<parser::Block>(x.t))}) { + if (*loopCount < requiredCount) { + context_.Say(clause->source, + "The specified loop range requires %zu loops, but the loop sequence has a length of %zu"_err_en_US, + requiredCount, *loopCount); + } + } } } diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 7ffda12c1fb8..431c41f443f7 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1800,16 +1800,8 @@ void OmpStructureChecker::CheckIndividualAllocateDirective( return true; }}; - const auto *allocator{[&]() { - // Can't use FindClause in Enter (because clauses haven't been visited - // yet). - for (const parser::OmpClause &c : beginSpec.Clauses().v) { - if (c.Id() == llvm::omp::Clause::OMPC_allocator) { - return &c; - } - } - return static_cast<const parser::OmpClause *>(nullptr); - }()}; + const auto *allocator{ + parser::omp::FindClause(beginSpec, llvm::omp::Clause::OMPC_allocator)}; if (InTargetRegion()) { bool hasDynAllocators{ @@ -4674,11 +4666,9 @@ void OmpStructureChecker::CheckDoacross(const parser::OmpDoacross &doa) { const parser::OmpDirectiveSpecification &beginSpec{(*loopc)->BeginDir()}; llvm::omp::Directive loopDir{beginSpec.DirId()}; if (loopDir == llvm::omp::OMPD_do || loopDir == llvm::omp::OMPD_simd) { - auto IsOrdered{[](const parser::OmpClause &c) { - return c.Id() == llvm::omp::OMPC_ordered; - }}; // If it has ORDERED clause, stop the traversal. - if (llvm::any_of(beginSpec.Clauses().v, IsOrdered)) { + if (parser::omp::FindClause( + beginSpec, llvm::omp::Clause::OMPC_ordered)) { break; } } diff --git a/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir b/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir index 6ecccde39d3f..fa9f9c429fa0 100644 --- a/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir +++ b/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir @@ -253,3 +253,121 @@ func.func @test_accbounds_rematerialize_fir() { // CHECK: acc.bounds // CHECK: acc.serial { // CHECK: acc.bounds + +// ----- + +// Test fir.convert rematerialization (ViewLikeOpInterface + +// OutlineRematerializationOpInterface). +func.func private @use_i64(i64) -> () + +func.func @test_convert_rematerialize(%arg0: !fir.ref<i32>) { + %0 = fir.convert %arg0 : (!fir.ref<i32>) -> i64 + fir.call @use_i64(%0) : (i64) -> () + acc.parallel { + fir.call @use_i64(%0) : (i64) -> () + acc.yield + } + return +} + +// CHECK-LABEL: @test_convert_rematerialize +// CHECK: %[[CVT_OUTER:.*]] = fir.convert +// CHECK: fir.call @use_i64(%[[CVT_OUTER]]) +// CHECK: acc.parallel { +// CHECK: %[[CVT_INNER:.*]] = fir.convert +// CHECK: fir.call @use_i64(%[[CVT_INNER]]) + +// ----- + +// Test fir.convert sinking (only used inside region). +func.func private @use_i64(i64) -> () + +func.func @test_convert_sink(%arg0: !fir.ref<i32>) { + %0 = fir.convert %arg0 : (!fir.ref<i32>) -> i64 + acc.parallel { + fir.call @use_i64(%0) : (i64) -> () + acc.yield + } + return +} + +// CHECK-LABEL: @test_convert_sink +// CHECK: acc.parallel { +// CHECK: %[[CVT:.*]] = fir.convert +// CHECK: fir.call @use_i64(%[[CVT]]) + +// ----- + +// Test fir.convert sinking when input is fir.alloca (not a block argument). +func.func private @use_i64(i64) -> () + +func.func @test_convert_alloca_sink() { + %0 = fir.alloca i32 + %1 = fir.convert %0 : (!fir.ref<i32>) -> i64 + acc.parallel { + fir.call @use_i64(%1) : (i64) -> () + acc.yield + } + return +} + +// CHECK-LABEL: @test_convert_alloca_sink +// CHECK: %[[ALLOCA:.*]] = fir.alloca i32 +// CHECK: acc.parallel { +// CHECK: %[[CVT:.*]] = fir.convert %[[ALLOCA]] +// CHECK: fir.call @use_i64(%[[CVT]]) + +// ----- + +// Test fir.convert rematerialization when input is fir.alloca. +func.func private @use_i64(i64) -> () + +func.func @test_convert_alloca_remat() { + %0 = fir.alloca i32 + %1 = fir.convert %0 : (!fir.ref<i32>) -> i64 + fir.call @use_i64(%1) : (i64) -> () + acc.parallel { + fir.call @use_i64(%1) : (i64) -> () + acc.yield + } + return +} + +// CHECK-LABEL: @test_convert_alloca_remat +// CHECK: %[[ALLOCA:.*]] = fir.alloca i32 +// CHECK: %[[CVT_OUTER:.*]] = fir.convert %[[ALLOCA]] +// CHECK: fir.call @use_i64(%[[CVT_OUTER]]) +// CHECK: acc.parallel { +// CHECK: %[[CVT_INNER:.*]] = fir.convert %[[ALLOCA]] +// CHECK: fir.call @use_i64(%[[CVT_INNER]]) + +// ----- + +// Test that an intermediate fir.convert in a trace chain +// (declare -> convert -> unboxchar) does not get rematerialized, +// while a direct ptr-to-int fir.convert is correctly sunk. +func.func private @use_i64(i64) -> () +func.func private @use_ref(!fir.ref<!fir.char<1,10>>) -> () + +func.func @test_convert_chain_and_direct(%arg0: !fir.boxchar<1>, %arg1: !fir.ref<i32>) { + %c10 = arith.constant 10 : index + %0:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) + %1 = fir.convert %0#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,10>> + %2 = fir.declare %1 typeparams %c10 {uniq_name = "scalar"} : (!fir.ref<!fir.char<1,10>>, index) -> !fir.ref<!fir.char<1,10>> + %3 = fir.convert %arg1 : (!fir.ref<i32>) -> i64 + acc.parallel { + fir.call @use_ref(%2) : (!fir.ref<!fir.char<1,10>>) -> () + fir.call @use_i64(%3) : (i64) -> () + acc.yield + } + return +} + +// CHECK-LABEL: @test_convert_chain_and_direct +// CHECK: %[[UNBOX:.*]]:2 = fir.unboxchar %arg0 +// CHECK: %[[CVT_REF:.*]] = fir.convert %[[UNBOX]]#0 +// CHECK: %[[DECL:.*]] = fir.declare %[[CVT_REF]] +// CHECK: acc.parallel { +// CHECK: %[[CVT_INT:.*]] = fir.convert %arg1 +// CHECK: fir.call @use_ref(%[[DECL]]) +// CHECK: fir.call @use_i64(%[[CVT_INT]]) diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90 index 36874d3c21cd..d2da6ef62d7e 100644 --- a/flang/test/Lower/OpenACC/acc-cache.f90 +++ b/flang/test/Lower/OpenACC/acc-cache.f90 @@ -767,3 +767,19 @@ subroutine test_cache_temp_in_designator(data, a) ! CHECK: hlfir.designate %[[DECL]]#0 ! CHECK: acc.yield end subroutine + + +subroutine full_array_cache() + integer :: k, j , kd, jd,y, x + real(8) :: tile(0:8,0:8) + + !$acc parallel loop gang collapse(2) + do k = 1, kd + do j = 1, jd + !$acc cache(tile(:,:)) + end do + end do +end subroutine + +! CHECK-LABEL: func.func @_QPfull_array_cache() +! CHECK: acc.cache var(%{{.*}}) bounds(%{{.*}}) diff --git a/flang/test/Semantics/OpenACC/acc-cache-validity.f90 b/flang/test/Semantics/OpenACC/acc-cache-validity.f90 index de66043de1ec..aaffd4ec30a9 100644 --- a/flang/test/Semantics/OpenACC/acc-cache-validity.f90 +++ b/flang/test/Semantics/OpenACC/acc-cache-validity.f90 @@ -38,11 +38,9 @@ program openacc_cache_validity !ERROR: Only array element or subarray are allowed in CACHE directive !$acc cache(/i/) - !ERROR: The CACHE directive requires at least one of the bounds in the array section subscript triplet to be specified - !$acc cache(a(:)) + !$acc cache(a(:)) ! ok - !ERROR: The CACHE directive requires at least one of the bounds in the array section subscript triplet to be specified - !$acc cache(aa(:,:)) + !$acc cache(aa(:,:)) ! ok !ERROR: The CACHE directive does not support strided array sections !$acc cache(a(1:10:2)) diff --git a/flang/test/Semantics/namelist02.f90 b/flang/test/Semantics/namelist02.f90 new file mode 100644 index 000000000000..efe1f0204abd --- /dev/null +++ b/flang/test/Semantics/namelist02.f90 @@ -0,0 +1,29 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic -Werror + +module m + implicit none + integer, parameter :: mc = 42 +end module + +! Local named constant +program p + use m + implicit none + integer, parameter :: k = 3 + !PORTABILITY: A namelist group object 'k' should not be a PARAMETER [-Wnamelist-parameter] + namelist /g/ k + ! USE-associated named constant + !PORTABILITY: A namelist group object 'mc' should not be a PARAMETER [-Wnamelist-parameter] + namelist /g2/ mc +end program + +! Host-associated named constant +subroutine host + implicit none + integer, parameter :: hc = 10 + contains + subroutine inner + !PORTABILITY: A namelist group object 'hc' should not be a PARAMETER [-Wnamelist-parameter] + namelist /g3/ hc + end subroutine +end subroutine diff --git a/libc/shared/math.h b/libc/shared/math.h index a7d735ffa174..ede0ebd5371a 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -124,6 +124,7 @@ #include "math/fdimf16.h" #include "math/fdiml.h" #include "math/ffma.h" +#include "math/ffmaf128.h" #include "math/ffmal.h" #include "math/floor.h" #include "math/floorbf16.h" diff --git a/libc/shared/math/ffmaf128.h b/libc/shared/math/ffmaf128.h new file mode 100644 index 000000000000..b22e6a1f08d3 --- /dev/null +++ b/libc/shared/math/ffmaf128.h @@ -0,0 +1,29 @@ +//===-- Shared ffmaf128 function --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_FFMAF128_H +#define LLVM_LIBC_SHARED_MATH_FFMAF128_H + +#include "include/llvm-libc-types/float128.h" + +#ifdef LIBC_TYPES_HAS_FLOAT128 + +#include "shared/libc_common.h" +#include "src/__support/math/ffmaf128.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::ffmaf128; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT128 + +#endif // LLVM_LIBC_SHARED_MATH_FFMAF128_H diff --git a/libc/src/__support/FPUtil/generic/add_sub.h b/libc/src/__support/FPUtil/generic/add_sub.h index 9f3ecff0eb23..4ef9ce06ebf9 100644 --- a/libc/src/__support/FPUtil/generic/add_sub.h +++ b/libc/src/__support/FPUtil/generic/add_sub.h @@ -96,6 +96,8 @@ add_or_sub(InType x, InType y) { if (x_bits.is_zero()) { if (y_bits.is_zero()) { + if (is_effectively_add) + return OutFPBits::zero(x_bits.sign()).get_val(); switch (quick_get_round()) { case FE_DOWNWARD: return OutFPBits::zero(Sign::NEG).get_val(); diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 79278b6e77a3..0df8262cfb5f 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -1164,6 +1164,17 @@ add_header_library( ) add_header_library( + ffmaf128 + HDRS + ffmaf128.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.fma + libc.src.__support.macros.config + libc.include.llvm-libc-types.float128 +) + +add_header_library( ffmal HDRS ffmal.h diff --git a/libc/src/__support/math/asinpif.h b/libc/src/__support/math/asinpif.h index 9a5daf6198a4..79d3ebbe63b5 100644 --- a/libc/src/__support/math/asinpif.h +++ b/libc/src/__support/math/asinpif.h @@ -23,22 +23,6 @@ namespace LIBC_NAMESPACE_DECL { namespace math { LIBC_INLINE float asinpif(float x) { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - constexpr size_t N_EXCEPTS = 5; - constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINPIF_EXCEPTS = { - {// (inputs, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.e768f6p-122, asinpif(x) = 0x1.364b7ap-123 (RZ) - {0x02F3B47B, 0x021B25BD, 1, 0, 0}, - // x = 0x1.e768f6p-24, asinpif(x) = 0x1.364b7ap-25 (RZ) - {0x33F3B47B, 0x331B25BD, 1, 0, 1}, - // x = 0x1.dddb4ep-19, asinpif(x) = 0x1.303686p-20 (RZ) - {0x366EEDA7, 0x35981B43, 1, 0, 1}, - // x = -0x1.dddb4ep-19, asinpif(x) = -0x1.303686p-20 (RZ) - {0xB66EEDA7, 0xB5981B43, 0, 1, 1}, - // x = -0x1.e768f6p-24, asinpif(x) = -0x1.364b7ap-25 (RZ) - {0xB3F3B47B, 0xB31B25BD, 0, 1, 1}}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - using FPBits = fputil::FPBits<float>; FPBits xbits(x); @@ -61,12 +45,6 @@ LIBC_INLINE float asinpif(float x) { return FPBits::quiet_nan().get_val(); } -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - auto r = ASINPIF_EXCEPTS.lookup(xbits.uintval()); - if (LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // if |x| <= 0.5: // asinpi(x) = x * (c0 + x^2 * P1(x^2)) if (LIBC_UNLIKELY(x_abs <= 0.5)) { diff --git a/libc/src/__support/math/ffmaf128.h b/libc/src/__support/math/ffmaf128.h new file mode 100644 index 000000000000..c4b5a58e145f --- /dev/null +++ b/libc/src/__support/math/ffmaf128.h @@ -0,0 +1,34 @@ +//===-- Implementation header for ffmaf128 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_FFMAF128_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_FFMAF128_H + +#include "include/llvm-libc-types/float128.h" + +#ifdef LIBC_TYPES_HAS_FLOAT128 + +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE float ffmaf128(float128 x, float128 y, float128 z) { + return fputil::fma<float>(x, y, z); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT128 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_FFMAF128_H diff --git a/libc/src/__support/math/inv_trigf_utils.h b/libc/src/__support/math/inv_trigf_utils.h index 7a93831333db..54a94f572cda 100644 --- a/libc/src/__support/math/inv_trigf_utils.h +++ b/libc/src/__support/math/inv_trigf_utils.h @@ -184,14 +184,14 @@ LIBC_INLINE double asin_eval(double xsq) { // > prec = 200; // > display = hexadecimal; // > g = asin(x) / (pi * x); -// > P = fpminimax(g, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|], +// > P = fpminimax(g, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22|], // > [|D...|], [0, 0.5]); // > for i from 0 to degree(P) do coeff(P, i); // > print("Error:", dirtyinfnorm(P - g, [1e-30; 0.25])); -// Error: 0x1.45c281e1cf9b58p-50 ~= 2^−49.652 +// Error : 0x1.a53f84eafa3ea69bb81b6c52b3278872083fca2c757bd778acp-54 ~= 2^-54 // // Non-zero coefficients (even powers only): -LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[13] = { +LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[12] = { 0x1.45f306dc9c881p-2, // x^0 0x1.b2995e7b7e756p-5, // x^2 0x1.8723a1d12f828p-6, // x^4 @@ -206,12 +206,40 @@ LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[13] = { 0x1.4b50c2eb13708p-7 // x^22 }; -// Evaluates P1(v2) = c1 + c2*v2 + c3*v2^2 + ... (tail of P without c0) +// Evaluates P1(v2) = c1 + c2*v2 + c3*v2^2 + ... + c12*v2^11 (tail of P +// without c0) using Estrin's scheme for instruction-level parallelism. +// +// The tail polynomial has 12 coefficients ASINPI_COEFFS[1..11] in powers of +// v2: +// P1(v2) = c1 + c2*v2 + c3*v2^2 + c4*v2^3 + ... + c11*v2^10 +// +// Estrin pairs them bottom-up: +// Level 0 (6 pairs, using v2): +// p0 = c1 + c2*v2 p1 = c3 + c4*v2 +// p2 = c5 + c6*v2 p3 = c7 + c8*v2 +// p4 = c9 + c10*v2 p5 = c11 +// Level 1 (3 pairs, using v4): +// q0 = p0 + p1*v4 q1 = p2 + p3*v4 +// q2 = p4 + p5*v4 +// Level 2 (using v8): +// r0 = q0 + q1*v8 r1 = q2 +// result = q0 + q1*v8 + q1*v16 LIBC_INLINE double asinpi_eval(double v2) { - return fputil::polyeval( - v2, ASINPI_COEFFS[1], ASINPI_COEFFS[2], ASINPI_COEFFS[3], - ASINPI_COEFFS[4], ASINPI_COEFFS[5], ASINPI_COEFFS[6], ASINPI_COEFFS[7], - ASINPI_COEFFS[8], ASINPI_COEFFS[9], ASINPI_COEFFS[10], ASINPI_COEFFS[11]); + double v4 = v2 * v2; + double v8 = v4 * v4; + + double p0 = fputil::multiply_add(v2, ASINPI_COEFFS[2], ASINPI_COEFFS[1]); + double p1 = fputil::multiply_add(v2, ASINPI_COEFFS[4], ASINPI_COEFFS[3]); + double p2 = fputil::multiply_add(v2, ASINPI_COEFFS[6], ASINPI_COEFFS[5]); + double p3 = fputil::multiply_add(v2, ASINPI_COEFFS[8], ASINPI_COEFFS[7]); + double p4 = fputil::multiply_add(v2, ASINPI_COEFFS[10], ASINPI_COEFFS[9]); + double p5 = ASINPI_COEFFS[11]; + + double q0 = fputil::multiply_add(v4, p1, p0); + double q1 = fputil::multiply_add(v4, p3, p2); + double q2 = fputil::multiply_add(v4, p5, p4); + + return fputil::polyeval(v8, q0, q1, q2); } } // namespace inv_trigf_utils_internal diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index f8ec25be61d1..418cf85b84a2 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3155,8 +3155,7 @@ add_entrypoint_object( HDRS ../ffmaf128.h DEPENDS - libc.src.__support.macros.properties.types - libc.src.__support.FPUtil.fma + libc.src.__support.math.ffmaf128 ) add_entrypoint_object( diff --git a/libc/src/math/generic/ffmaf128.cpp b/libc/src/math/generic/ffmaf128.cpp index 55da93020faf..15c0308b0b9c 100644 --- a/libc/src/math/generic/ffmaf128.cpp +++ b/libc/src/math/generic/ffmaf128.cpp @@ -7,14 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/math/ffmaf128.h" -#include "src/__support/FPUtil/FMA.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" +#include "src/__support/math/ffmaf128.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float, ffmaf128, (float128 x, float128 y, float128 z)) { - return fputil::fma<float>(x, y, z); + return math::ffmaf128(x, y, z); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index a04a15cdabcb..c90e5687d8c3 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -121,6 +121,7 @@ add_fp_unittest( libc.src.__support.math.fdimf16 libc.src.__support.math.fdiml libc.src.__support.math.ffma + libc.src.__support.math.ffmaf128 libc.src.__support.math.ffmal libc.src.__support.math.floor libc.src.__support.math.floorbf16 diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index 460449e4fcb2..17045ce5edfd 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -339,6 +339,8 @@ TEST(LlvmLibcSharedMathTest, AllFloat128) { EXPECT_FP_EQ(float128(0x0p+0), LIBC_NAMESPACE::shared::atan2f128(float128(0.0), float128(0.0))); + EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::ffmaf128( + float128(0.0), float128(0.0), float128(0.0))); EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::fsqrtf128(float128(1.0f))); EXPECT_FP_EQ_ALL_ROUNDING(float128(0.75), LIBC_NAMESPACE::shared::frexpf128( float128(24), &exponent)); diff --git a/libc/test/src/math/smoke/AddTest.h b/libc/test/src/math/smoke/AddTest.h index d0a9dcb9c283..511ea581d523 100644 --- a/libc/test/src/math/smoke/AddTest.h +++ b/libc/test/src/math/smoke/AddTest.h @@ -165,6 +165,15 @@ public: EXPECT_FP_EQ(OutType(-1.0), func(InType(-2.0), InType(1.0))); EXPECT_FP_EQ(OutType(-3.0), func(InType(-2.0), InType(-1.0))); } + + void test_signed_zero_result(AddFunc func) { + EXPECT_FP_EQ_ALL_ROUNDING(zero, func(in.zero, in.zero)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, func(in.neg_zero, in.neg_zero)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, zero, neg_zero, zero, + func(in.neg_zero, in.zero)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, zero, neg_zero, zero, + func(in.zero, in.neg_zero)); + } }; #define LIST_ADD_TESTS(OutType, InType, func) \ @@ -176,7 +185,8 @@ public: TEST_F(LlvmLibcAddTest, RangeErrors) { test_range_errors(&func); } \ TEST_F(LlvmLibcAddTest, InexactResults) { test_inexact_results(&func); } \ TEST_F(LlvmLibcAddTest, MixedNormality) { test_mixed_normality(&func); } \ - TEST_F(LlvmLibcAddTest, MixedSigns) { test_mixed_signs(&func); } + TEST_F(LlvmLibcAddTest, MixedSigns) { test_mixed_signs(&func); } \ + TEST_F(LlvmLibcAddTest, SignedZeroResult) { test_signed_zero_result(&func); } #define LIST_ADD_SAME_TYPE_TESTS(suffix, OutType, InType, func) \ using LlvmLibcAddTest##suffix = AddTest<OutType, InType>; \ @@ -193,6 +203,9 @@ public: TEST_F(LlvmLibcAddTest##suffix, MixedNormality) { \ test_mixed_normality(&func); \ } \ - TEST_F(LlvmLibcAddTest##suffix, MixedSigns) { test_mixed_signs(&func); } + TEST_F(LlvmLibcAddTest##suffix, MixedSigns) { test_mixed_signs(&func); } \ + TEST_F(LlvmLibcAddTest##suffix, SignedZeroResult) { \ + test_signed_zero_result(&func); \ + } #endif // LLVM_LIBC_TEST_SRC_MATH_SMOKE_ADDTEST_H diff --git a/libc/test/src/math/smoke/SubTest.h b/libc/test/src/math/smoke/SubTest.h index 79086aa0bfb7..bc2af7ee08b0 100644 --- a/libc/test/src/math/smoke/SubTest.h +++ b/libc/test/src/math/smoke/SubTest.h @@ -156,6 +156,15 @@ public: EXPECT_FP_EQ(OutType(-3.0), func(InType(-2.0), InType(1.0))); EXPECT_FP_EQ(OutType(-1.0), func(InType(-2.0), InType(-1.0))); } + + void test_signed_zero_result(SubFunc func) { + EXPECT_FP_EQ_ALL_ROUNDING(zero, func(in.zero, in.neg_zero)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, func(in.neg_zero, in.zero)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, zero, neg_zero, zero, + func(in.zero, in.zero)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, zero, neg_zero, zero, + func(in.neg_zero, in.neg_zero)); + } }; #define LIST_SUB_TESTS(OutType, InType, func) \ @@ -166,7 +175,8 @@ public: } \ TEST_F(LlvmLibcSubTest, RangeErrors) { test_range_errors(&func); } \ TEST_F(LlvmLibcSubTest, InexactResults) { test_inexact_results(&func); } \ - TEST_F(LlvmLibcSubTest, MixedSigns) { test_mixed_signs(&func); } + TEST_F(LlvmLibcSubTest, MixedSigns) { test_mixed_signs(&func); } \ + TEST_F(LlvmLibcSubTest, SignedZeroResult) { test_signed_zero_result(&func); } #define LIST_SUB_SAME_TYPE_TESTS(suffix, OutType, InType, func) \ using LlvmLibcSubTest##suffix = SubTest<OutType, InType>; \ @@ -180,6 +190,9 @@ public: TEST_F(LlvmLibcSubTest##suffix, InexactResults) { \ test_inexact_results(&func); \ } \ - TEST_F(LlvmLibcSubTest##suffix, MixedSigns) { test_mixed_signs(&func); } + TEST_F(LlvmLibcSubTest##suffix, MixedSigns) { test_mixed_signs(&func); } \ + TEST_F(LlvmLibcSubTest##suffix, SignedZeroResult) { \ + test_signed_zero_result(&func); \ + } #endif // LLVM_LIBC_TEST_SRC_MATH_SMOKE_SUBTEST_H diff --git a/libclc/clc/include/clc/workitem/clc_get_enqueued_local_size.h b/libclc/clc/include/clc/workitem/clc_get_enqueued_local_size.h new file mode 100644 index 000000000000..3a1137ef3998 --- /dev/null +++ b/libclc/clc/include/clc/workitem/clc_get_enqueued_local_size.h @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_WORKITEM_CLC_GET_ENQUEUED_LOCAL_SIZE_H__ +#define __CLC_WORKITEM_CLC_GET_ENQUEUED_LOCAL_SIZE_H__ + +#include <clc/internal/clc.h> + +_CLC_OVERLOAD _CLC_CONST _CLC_DECL size_t +__clc_get_enqueued_local_size(uint dim); + +#endif // __CLC_WORKITEM_CLC_GET_ENQUEUED_LOCAL_SIZE_H__ diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES index b4557b0a26f7..7006f538d927 100644 --- a/libclc/clc/lib/amdgcn/SOURCES +++ b/libclc/clc/lib/amdgcn/SOURCES @@ -2,6 +2,7 @@ address_space/qualifier.cl math/clc_ldexp.cl mem_fence/clc_mem_fence.cl synchronization/clc_work_group_barrier.cl +workitem/clc_get_enqueued_local_size.cl workitem/clc_get_global_offset.cl workitem/clc_get_global_size.cl workitem/clc_get_group_id.cl diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl new file mode 100644 index 000000000000..c7226241694b --- /dev/null +++ b/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl @@ -0,0 +1,14 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_enqueued_local_size.h" +#include "clc/workitem/clc_get_local_size.h" + +_CLC_OVERLOAD _CLC_DEF size_t __clc_get_enqueued_local_size(uint dim) { + return __clc_get_local_size(dim); +} diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl index c10cdd2d02ef..f21a060849db 100644 --- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl +++ b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl @@ -6,11 +6,15 @@ // //===----------------------------------------------------------------------===// +#include <amdhsa_abi.h> #include <clc/opencl/opencl-base.h> _CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) { - __constant uint *ptr = (__constant uint *)__builtin_amdgcn_dispatch_ptr(); - if (dim < 3) - return ptr[3 + dim]; - return 1; + if (dim > 2) + return 1; + __constant amdhsa_implicit_kernarg_v5 *args = + (__constant amdhsa_implicit_kernarg_v5 *) + __builtin_amdgcn_implicitarg_ptr(); + return args->block_count[dim] * (uint)args->group_size[dim] + + (uint)args->remainder[dim]; } diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl index a95c58ca1853..ed1e17776361 100644 --- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl +++ b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl @@ -6,17 +6,21 @@ // //===----------------------------------------------------------------------===// +#include <amdhsa_abi.h> #include <clc/opencl/opencl-base.h> _CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) { - __constant uint *ptr = (__constant uint *)__builtin_amdgcn_dispatch_ptr(); - switch (dim) { - case 0: - return ptr[1] & 0xffffu; - case 1: - return ptr[1] >> 16; - case 2: - return ptr[2] & 0xffffu; - } - return 1; + if (dim > 2) + return 1; + + __constant amdhsa_implicit_kernarg_v5 *args = + (__constant amdhsa_implicit_kernarg_v5 *) + __builtin_amdgcn_implicitarg_ptr(); + + uint group_ids[3] = {__builtin_amdgcn_workgroup_id_x(), + __builtin_amdgcn_workgroup_id_y(), + __builtin_amdgcn_workgroup_id_z()}; + + return group_ids[dim] < args->block_count[dim] ? (size_t)args->group_size[dim] + : (size_t)args->remainder[dim]; } diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES index 0522e13f5d3d..84fc4a6650c3 100644 --- a/libclc/opencl/lib/amdgcn/SOURCES +++ b/libclc/opencl/lib/amdgcn/SOURCES @@ -1,5 +1,4 @@ mem_fence/fence.cl -synchronization/barrier.cl workitem/get_global_offset.cl workitem/get_group_id.cl workitem/get_global_size.cl diff --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl deleted file mode 100644 index 9f67b6ebcb6d..000000000000 --- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl +++ /dev/null @@ -1,17 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/opencl/synchronization/utils.h> -#include <clc/synchronization/clc_work_group_barrier.h> - -_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { - int memory_scope = __opencl_get_memory_scope(flags); - int memory_order = __ATOMIC_SEQ_CST; - __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); - __clc_work_group_barrier(memory_scope, memory_order, memory_semantics); -} diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES index bb5e8ab08a71..be94a34e9af0 100644 --- a/libclc/opencl/lib/generic/SOURCES +++ b/libclc/opencl/lib/generic/SOURCES @@ -199,5 +199,7 @@ shared/max.cl shared/min.cl shared/vload.cl shared/vstore.cl +synchronization/work_group_barrier.cl +workitem/get_enqueued_local_size.cl workitem/get_global_id.cl workitem/get_global_size.cl diff --git a/libclc/opencl/lib/generic/async/wait_group_events.cl b/libclc/opencl/lib/generic/async/wait_group_events.cl index 0881a74bd904..76a9ee38bb89 100644 --- a/libclc/opencl/lib/generic/async/wait_group_events.cl +++ b/libclc/opencl/lib/generic/async/wait_group_events.cl @@ -12,5 +12,5 @@ _CLC_DEF _CLC_OVERLOAD void wait_group_events(int num_events, event_t *event_list) { (void)num_events; (void)event_list; - barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); } diff --git a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl new file mode 100644 index 000000000000..14de313c4f58 --- /dev/null +++ b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/opencl/synchronization/utils.h" +#include "clc/opencl/utils.h" +#include "clc/synchronization/clc_work_group_barrier.h" + +_CLC_DEF _CLC_OVERLOAD void work_group_barrier(cl_mem_fence_flags flags, + memory_scope scope) { + int memory_order = __ATOMIC_SEQ_CST; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope), memory_order, + memory_semantics); +} + +_CLC_DEF _CLC_OVERLOAD void work_group_barrier(cl_mem_fence_flags flags) { + work_group_barrier(flags, memory_scope_work_group); +} + +_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { + work_group_barrier(flags); +} diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_local_size.cl b/libclc/opencl/lib/generic/workitem/get_enqueued_local_size.cl new file mode 100644 index 000000000000..416a3e9837cd --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_enqueued_local_size.cl @@ -0,0 +1,14 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/opencl/opencl-base.h> +#include <clc/workitem/clc_get_enqueued_local_size.h> + +_CLC_DEF _CLC_OVERLOAD size_t get_enqueued_local_size(uint dim) { + return __clc_get_enqueued_local_size(dim); +} diff --git a/libclc/opencl/lib/ptx-nvidiacl/SOURCES b/libclc/opencl/lib/ptx-nvidiacl/SOURCES index eb28570a617a..eb64360fece7 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/SOURCES +++ b/libclc/opencl/lib/ptx-nvidiacl/SOURCES @@ -1,5 +1,4 @@ mem_fence/fence.cl -synchronization/barrier.cl workitem/get_global_id.cl workitem/get_group_id.cl workitem/get_local_id.cl diff --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl deleted file mode 100644 index 9f67b6ebcb6d..000000000000 --- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl +++ /dev/null @@ -1,17 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/opencl/synchronization/utils.h> -#include <clc/synchronization/clc_work_group_barrier.h> - -_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { - int memory_scope = __opencl_get_memory_scope(flags); - int memory_order = __ATOMIC_SEQ_CST; - __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); - __clc_work_group_barrier(memory_scope, memory_order, memory_semantics); -} diff --git a/libcxx/test/libcxx/strings/basic.string/string.cons/constexpr_initialization_stress.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.cons/constexpr_initialization_stress.pass.cpp index 19342168a72f..4ed74027ed8f 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.cons/constexpr_initialization_stress.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.cons/constexpr_initialization_stress.pass.cpp @@ -14,9 +14,6 @@ // Stress test for constexpr std::string initialization. // This test ensures that we can handle a large number of constexpr strings. -// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=1000000 -// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=1000000 - #include <string> #include <array> #include <cassert> diff --git a/lldb/docs/use/variable.rst b/lldb/docs/use/variable.rst index 73df62dcd1b7..6a258dfcd342 100644 --- a/lldb/docs/use/variable.rst +++ b/lldb/docs/use/variable.rst @@ -958,7 +958,8 @@ be implemented by the Python class): def get_child_index(self, name: str) -> int: """ This call should return the index of the synthetic child whose name is - given as the argument. + given as the argument. Array subscripting, names in the form "[N]", is + automatically supported. Return -1 if there is no child at the index. """ diff --git a/lldb/include/lldb/DataFormatters/TypeSynthetic.h b/lldb/include/lldb/DataFormatters/TypeSynthetic.h index fbf1d060a92b..d1e6efb79303 100644 --- a/lldb/include/lldb/DataFormatters/TypeSynthetic.h +++ b/lldb/include/lldb/DataFormatters/TypeSynthetic.h @@ -47,7 +47,14 @@ public: virtual lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) = 0; - virtual llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) = 0; + /// Determine the index of a named child. Subscript names ("[N]") are, by + /// default, handled automatically. For data types which need custom + /// subscripting behavior - for example a sparse array, disable automatic + /// subscripting with TypeOptions::eTypeOptionCustomSubscripting. + virtual llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) { + return llvm::createStringError("Type has no child named '%s'", + name.AsCString()); + } /// This function is assumed to always succeed and if it fails, the front-end /// should know to deal with it in the correct way (most probably, by refusing @@ -223,6 +230,18 @@ public: return *this; } + bool GetCustomSubscripting() const { + return m_flags & lldb::eTypeOptionCustomSubscripting; + } + + Flags &SetCustomSubscripting(bool value = true) { + if (value) + m_flags |= lldb::eTypeOptionCustomSubscripting; + else + m_flags &= ~lldb::eTypeOptionCustomSubscripting; + return *this; + } + uint32_t GetValue() { return m_flags; } void SetValue(uint32_t value) { m_flags = value; } @@ -245,6 +264,8 @@ public: bool WantsDereference() const { return m_flags.GetFrontEndWantsDereference();} + bool CustomSubscripting() const { return m_flags.GetCustomSubscripting(); } + void SetCascades(bool value) { m_flags.SetCascades(value); } void SetSkipsPointers(bool value) { m_flags.SetSkipPointers(value); } diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 7ebcb2214e0e..d2600d0a6ce4 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -926,7 +926,8 @@ FLAGS_ENUM(TypeOptions){eTypeOptionNone = (0u), eTypeOptionHideNames = (1u << 6), eTypeOptionNonCacheable = (1u << 7), eTypeOptionHideEmptyAggregates = (1u << 8), - eTypeOptionFrontEndWantsDereference = (1u << 9)}; + eTypeOptionFrontEndWantsDereference = (1u << 9), + eTypeOptionCustomSubscripting = (1u << 10)}; /// This is the return value for frame comparisons. If you are comparing frame /// A to frame B the following cases arise: diff --git a/lldb/source/DataFormatters/VectorType.cpp b/lldb/source/DataFormatters/VectorType.cpp index c2355fbfdcb2..624f9de312bb 100644 --- a/lldb/source/DataFormatters/VectorType.cpp +++ b/lldb/source/DataFormatters/VectorType.cpp @@ -271,19 +271,6 @@ public: return lldb::ChildCacheState::eRefetch; } - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; - } - private: lldb::Format m_parent_format = eFormatInvalid; lldb::Format m_item_format = eFormatInvalid; diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp index f89b0fe6b6db..4e9b1389a11a 100644 --- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp +++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp @@ -1561,7 +1561,8 @@ void DynamicLoaderDarwinKernel::PrivateInitialize(Process *process) { } void DynamicLoaderDarwinKernel::SetNotificationBreakpointIfNeeded() { - if (m_break_id == LLDB_INVALID_BREAK_ID && m_kernel.GetModule()) { + if (m_break_id == LLDB_INVALID_BREAK_ID && m_kernel.GetModule() && + m_process->IsLiveDebugSession()) { DEBUG_PRINTF("DynamicLoaderDarwinKernel::%s() process state = %s\n", __FUNCTION__, StateAsCString(m_process->GetState())); diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp index f2521ec75087..1d522f0b4a72 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp @@ -28,15 +28,6 @@ public: GenericBitsetFrontEnd(ValueObject &valobj, StdLib stdlib); - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; - } - lldb::ChildCacheState Update() override; llvm::Expected<uint32_t> CalculateNumChildren() override { return m_elements.size(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericList.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericList.cpp index b6ff4477a890..841d3b2220df 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericList.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericList.cpp @@ -124,14 +124,6 @@ private: template <StlType Stl> class AbstractListFrontEnd : public SyntheticChildrenFrontEnd { public: - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; - } lldb::ChildCacheState Update() override; protected: diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp index 7fc6eb55d4e3..e4c261a5411f 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp @@ -41,12 +41,8 @@ public: llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override { if (name == "$$dereference$$") return 0; - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; + return llvm::createStringError("Type has no child named '%s'", + name.AsCString()); } llvm::Expected<uint32_t> CalculateNumChildren() override { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp index 9061be2e4014..c40223f5845a 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp @@ -197,8 +197,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: llvm::Expected<uint32_t> CalculateNumChildrenForOldCompressedPairLayout(ValueObject &pair); @@ -390,16 +388,6 @@ lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -llvm::Expected<size_t> lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd:: - GetIndexOfChildWithName(ConstString name) { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; -} - SyntheticChildrenFrontEnd * lldb_private::formatters::LibcxxStdMapSyntheticFrontEndCreator( CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp index ebc6d92aabe0..3e4093509b6b 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp @@ -20,15 +20,6 @@ public: Update(); } - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; - } - lldb::ChildCacheState Update() override; llvm::Expected<uint32_t> CalculateNumChildren() override { return m_elements.size(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp index fd8411ba0e56..34eed108d850 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp @@ -40,8 +40,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: CompilerType GetNodeType(); CompilerType GetElementType(CompilerType table_type); @@ -285,17 +283,6 @@ lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -llvm::Expected<size_t> -lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: - GetIndexOfChildWithName(ConstString name) { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; -} - SyntheticChildrenFrontEnd * lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEndCreator( CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp index 30fec4e2dde0..2855fc1e0512 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp @@ -202,15 +202,6 @@ public: Update(); } - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; - } - lldb::ChildCacheState Update() override; llvm::Expected<uint32_t> CalculateNumChildren() override { return m_size; } ValueObjectSP GetChildAtIndex(uint32_t idx) override; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp index 076bbbb87448..7ef12f06d96d 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp @@ -32,8 +32,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: // The lifetime of a ValueObject and all its derivative ValueObjects // (children, clones, etc.) is managed by a ClusterManager. These @@ -98,16 +96,6 @@ LibStdcppTupleSyntheticFrontEnd::CalculateNumChildren() { return m_members.size(); } -llvm::Expected<size_t> -LibStdcppTupleSyntheticFrontEnd::GetIndexOfChildWithName(ConstString name) { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; -} - SyntheticChildrenFrontEnd * lldb_private::formatters::LibStdcppTupleSyntheticFrontEndCreator( CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTree.cpp b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTree.cpp index 566f92c39b1d..3cd88eebc56b 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTree.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTree.cpp @@ -187,8 +187,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: /// Returns the ValueObject for the _Tree_node at index \ref idx. /// @@ -335,17 +333,6 @@ lldb_private::formatters::MsvcStlTreeSyntheticFrontEnd::Update() { return lldb::ChildCacheState::eRefetch; } -llvm::Expected<size_t> -lldb_private::formatters::MsvcStlTreeSyntheticFrontEnd::GetIndexOfChildWithName( - ConstString name) { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; -} - lldb::ChildCacheState MsvcStlTreeIterSyntheticFrontEnd::Update() { m_inner_sp = nullptr; ValueObjectSP node_sp = m_backend.GetChildMemberWithName("_Ptr"); diff --git a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTuple.cpp index fe20b4c141a6..fd133550e00b 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTuple.cpp @@ -20,15 +20,6 @@ public: Update(); } - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; - } - lldb::ChildCacheState Update() override; llvm::Expected<uint32_t> CalculateNumChildren() override { return m_elements.size(); diff --git a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlVariant.cpp b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlVariant.cpp index 55e964256264..de6216d8a431 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlVariant.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlVariant.cpp @@ -147,15 +147,6 @@ public: Update(); } - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override { - auto optional_idx = formatters::ExtractIndexFromString(name.GetCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - return *optional_idx; - } - lldb::ChildCacheState Update() override; llvm::Expected<uint32_t> CalculateNumChildren() override { return m_size; } ValueObjectSP GetChildAtIndex(uint32_t idx) override; diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp index 25376e064879..b1dc9ff7e48b 100644 --- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp @@ -56,8 +56,6 @@ public: lldb::ChildCacheState Update() override = 0; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - protected: virtual lldb::addr_t GetDataAddress() = 0; @@ -218,8 +216,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: ExecutionContextRef m_exe_ctx_ref; uint8_t m_ptr_size = 8; @@ -526,20 +522,6 @@ lldb_private::formatters::GenericNSArrayMSyntheticFrontEnd<D32, D64>::Update() { : lldb::ChildCacheState::eRefetch; } -llvm::Expected<size_t> lldb_private::formatters::NSArrayMSyntheticFrontEndBase:: - GetIndexOfChildWithName(ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - template <typename D32, typename D64> lldb_private::formatters::GenericNSArrayMSyntheticFrontEnd<D32, D64>:: GenericNSArrayMSyntheticFrontEnd::~GenericNSArrayMSyntheticFrontEnd() { @@ -616,22 +598,6 @@ lldb_private::formatters::GenericNSArrayISyntheticFrontEnd<D32, D64, Inline>:: } template <typename D32, typename D64, bool Inline> -llvm::Expected<size_t> -lldb_private::formatters::GenericNSArrayISyntheticFrontEnd< - D32, D64, Inline>::GetIndexOfChildWithName(ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - -template <typename D32, typename D64, bool Inline> llvm::Expected<uint32_t> lldb_private::formatters::GenericNSArrayISyntheticFrontEnd< D32, D64, Inline>::CalculateNumChildren() { diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp index 4ff8f36adff8..2d72e913192a 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp @@ -109,8 +109,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: struct DataDescriptor_32 { uint32_t _used : 26; @@ -148,8 +146,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: ExecutionContextRef m_exe_ctx_ref; CompilerType m_pair_type; @@ -178,8 +174,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: struct DictionaryItemDescriptor { lldb::addr_t key_ptr; @@ -228,8 +222,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: struct DictionaryItemDescriptor { lldb::addr_t key_ptr; @@ -259,8 +251,6 @@ namespace Foundation1100 { lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: struct DataDescriptor_32 { uint32_t _used : 26; @@ -585,20 +575,6 @@ lldb_private::formatters::NSDictionaryISyntheticFrontEnd:: m_data_64 = nullptr; } -llvm::Expected<size_t> lldb_private::formatters:: - NSDictionaryISyntheticFrontEnd::GetIndexOfChildWithName(ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - llvm::Expected<uint32_t> lldb_private::formatters:: NSDictionaryISyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) @@ -723,20 +699,6 @@ lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: : SyntheticChildrenFrontEnd(*valobj_sp), m_exe_ctx_ref(), m_hashtable(), m_pair_type() {} -llvm::Expected<size_t> lldb_private::formatters:: - NSCFDictionarySyntheticFrontEnd::GetIndexOfChildWithName(ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - llvm::Expected<uint32_t> lldb_private::formatters:: NSCFDictionarySyntheticFrontEnd::CalculateNumChildren() { if (!m_hashtable.IsValid()) @@ -859,21 +821,6 @@ lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: NSConstantDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) : SyntheticChildrenFrontEnd(*valobj_sp) {} -llvm::Expected<size_t> -lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd:: - GetIndexOfChildWithName(ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - llvm::Expected<uint32_t> lldb_private::formatters:: NSConstantDictionarySyntheticFrontEnd::CalculateNumChildren() { return m_size; @@ -1064,22 +1011,6 @@ lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd< } template <typename D32, typename D64> -llvm::Expected<size_t> -lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd< - D32, D64>::GetIndexOfChildWithName(ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - -template <typename D32, typename D64> llvm::Expected<uint32_t> lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd< D32, D64>::CalculateNumChildren() { @@ -1227,20 +1158,6 @@ lldb_private::formatters::Foundation1100:: m_data_64 = nullptr; } -llvm::Expected<size_t> lldb_private::formatters::Foundation1100:: - NSDictionaryMSyntheticFrontEnd::GetIndexOfChildWithName(ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - llvm::Expected<uint32_t> lldb_private::formatters::Foundation1100:: NSDictionaryMSyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) diff --git a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp index b5360195e91d..23f711931f95 100644 --- a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp @@ -126,19 +126,6 @@ public: bool MightHaveChildren() override { return m_impl.m_mode != Mode::Invalid; } - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; - } - lldb::ValueObjectSP GetSyntheticValue() override { return nullptr; } protected: diff --git a/lldb/source/Plugins/Language/ObjC/NSSet.cpp b/lldb/source/Plugins/Language/ObjC/NSSet.cpp index 150b23350712..44af668759f9 100644 --- a/lldb/source/Plugins/Language/ObjC/NSSet.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSSet.cpp @@ -52,8 +52,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: struct DataDescriptor_32 { uint32_t _used : 26; @@ -88,8 +86,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: struct SetItemDescriptor { lldb::addr_t item_ptr; @@ -119,8 +115,6 @@ public: lldb::ChildCacheState Update() override; - llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override; - private: struct SetItemDescriptor { @@ -386,21 +380,6 @@ lldb_private::formatters::NSSetISyntheticFrontEnd::~NSSetISyntheticFrontEnd() { m_data_64 = nullptr; } -llvm::Expected<size_t> -lldb_private::formatters::NSSetISyntheticFrontEnd::GetIndexOfChildWithName( - ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - llvm::Expected<uint32_t> lldb_private::formatters::NSSetISyntheticFrontEnd::CalculateNumChildren() { if (!m_data_32 && !m_data_64) @@ -522,21 +501,6 @@ lldb_private::formatters::NSCFSetSyntheticFrontEnd::NSCFSetSyntheticFrontEnd( : SyntheticChildrenFrontEnd(*valobj_sp), m_exe_ctx_ref(), m_hashtable(), m_pair_type() {} -llvm::Expected<size_t> -lldb_private::formatters::NSCFSetSyntheticFrontEnd::GetIndexOfChildWithName( - ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - llvm::Expected<uint32_t> lldb_private::formatters::NSCFSetSyntheticFrontEnd::CalculateNumChildren() { if (!m_hashtable.IsValid()) @@ -662,21 +626,6 @@ lldb_private::formatters::GenericNSSetMSyntheticFrontEnd<D32, D64>:: } template <typename D32, typename D64> -llvm::Expected<size_t> lldb_private::formatters::GenericNSSetMSyntheticFrontEnd< - D32, D64>::GetIndexOfChildWithName(ConstString name) { - auto optional_idx = ExtractIndexFromString(name.AsCString()); - if (!optional_idx) { - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - } - uint32_t idx = *optional_idx; - if (idx >= CalculateNumChildrenIgnoringErrors()) - return llvm::createStringError("Type has no child named '%s'", - name.AsCString()); - return idx; -} - -template <typename D32, typename D64> llvm::Expected<uint32_t> lldb_private::formatters::GenericNSSetMSyntheticFrontEnd< D32, D64>::CalculateNumChildren() { diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86.h index 51a53c749dfe..cd519691eb5a 100644 --- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86.h +++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86.h @@ -33,12 +33,11 @@ namespace process_freebsd { class NativeProcessFreeBSD; -class NativeRegisterContextFreeBSD_x86_64 - : public NativeRegisterContextFreeBSD, - public NativeRegisterContextDBReg_x86 { +class NativeRegisterContextFreeBSD_x86 : public NativeRegisterContextFreeBSD, + public NativeRegisterContextDBReg_x86 { public: - NativeRegisterContextFreeBSD_x86_64(const ArchSpec &target_arch, - NativeThreadFreeBSD &native_thread); + NativeRegisterContextFreeBSD_x86(const ArchSpec &target_arch, + NativeThreadFreeBSD &native_thread); uint32_t GetRegisterSetCount() const override; const RegisterSet *GetRegisterSet(uint32_t set_index) const override; diff --git a/lldb/source/ValueObject/ValueObjectSynthetic.cpp b/lldb/source/ValueObject/ValueObjectSynthetic.cpp index 44e53bd5fd82..b0e67df5541e 100644 --- a/lldb/source/ValueObject/ValueObjectSynthetic.cpp +++ b/lldb/source/ValueObject/ValueObjectSynthetic.cpp @@ -9,6 +9,7 @@ #include "lldb/ValueObject/ValueObjectSynthetic.h" #include "lldb/Core/Value.h" +#include "lldb/DataFormatters/FormattersHelpers.h" #include "lldb/DataFormatters/TypeSynthetic.h" #include "lldb/Target/ExecutionContext.h" #include "lldb/Utility/ConstString.h" @@ -18,6 +19,7 @@ #include "lldb/ValueObject/ValueObject.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Error.h" #include <optional> namespace lldb_private { @@ -344,12 +346,30 @@ ValueObjectSynthetic::GetIndexOfChildWithName(llvm::StringRef name_ref) { } if (!found_index && m_synth_filter_up != nullptr) { - auto index_or_err = m_synth_filter_up->GetIndexOfChildWithName(name); - if (!index_or_err) - return index_or_err.takeError(); + size_t index = SIZE_MAX; + if (auto index_or_err = m_synth_filter_up->GetIndexOfChildWithName(name)) { + index = *index_or_err; + } else if (!m_synth_sp->CustomSubscripting()) { + // Provide automatic support for subscript child names ("[N]"). + auto maybe_index = formatters::ExtractIndexFromString(name.GetCString()); + if (!maybe_index) + // The child name was not of the form "[N]", return the original error. + return index_or_err.takeError(); + + // Subscripting succeeded, ignore the original error. + llvm::consumeError(index_or_err.takeError()); + index = *maybe_index; + + // Prevent unnecessary work by limiting max to one past the index. + uint32_t max = index + 1; + auto num_children = GetNumChildrenIgnoringErrors(max); + if (index >= num_children) + return llvm::createStringError("Subscript index out of range: %zu", + index); + } std::lock_guard<std::mutex> guard(m_child_mutex); - m_name_toindex[name.GetCString()] = *index_or_err; - return *index_or_err; + m_name_toindex[name.GetCString()] = index; + return index; } else if (!found_index && m_synth_filter_up == nullptr) { return llvm::createStringError("Type has no child named '%s'", name.AsCString()); diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/myArraySynthProvider.py b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/myArraySynthProvider.py index 167899bd3907..7549128d9b64 100644 --- a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/myArraySynthProvider.py +++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/myArraySynthProvider.py @@ -25,7 +25,7 @@ class myArraySynthProvider: if name == "[0]": return 0 if name == "[1]": - return + return 1 if name == "[2]": return 2 if name == "[3]": diff --git a/lldb/test/API/functionalities/data-formatter/synthetic_subscript/Makefile b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/Makefile new file mode 100644 index 000000000000..c9319d6e6888 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/Makefile @@ -0,0 +1,2 @@ +C_SOURCES := main.c +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/synthetic_subscript/TestSyntheticSubscript.py b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/TestSyntheticSubscript.py new file mode 100644 index 000000000000..be34b0f1f72d --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/TestSyntheticSubscript.py @@ -0,0 +1,23 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + def test(self): + self.build() + _, process, _, _ = lldbutil.run_to_source_breakpoint( + self, "break here", lldb.SBFileSpec("main.c") + ) + self.runCmd("command script import thing_formatter.py") + frame = process.selected_thread.selected_frame + x = frame.var("x") + names = ("zero", "one") + for i in range(x.num_children): + idx = x.GetIndexOfChildWithName(f"[{i}]") + self.assertEqual(idx, i) + child = x.GetChildAtIndex(idx) + self.assertEqual(child.name, names[idx]) + idx = x.GetIndexOfChildWithName(f"[{x.num_children + 1}]") + self.assertEqual(idx, lldb.UINT32_MAX) diff --git a/lldb/test/API/functionalities/data-formatter/synthetic_subscript/main.c b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/main.c new file mode 100644 index 000000000000..ca0da120a7c0 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/main.c @@ -0,0 +1,12 @@ +struct Thing { + int zero; + int one; +}; + +int main() { + struct Thing x; + x.zero = 1; + x.one = 2; + __builtin_printf("break here\n"); + return 0; +} diff --git a/lldb/test/API/functionalities/data-formatter/synthetic_subscript/thing_formatter.py b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/thing_formatter.py new file mode 100644 index 000000000000..0027f0ba0be6 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/thing_formatter.py @@ -0,0 +1,15 @@ +class ThingSynthetic: + def __init__(self, valobj, _) -> None: + self.valobj = valobj + + def num_children(self): + return self.valobj.num_children + + def get_child_at_index(self, idx): + return self.valobj.GetChildAtIndex(idx) + + # Use default implementation of get_child_index. + + +def __lldb_init_module(dbg, _): + dbg.HandleCommand(f"type synthetic add -l {__name__}.ThingSynthetic Thing") diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Inputs/FormatterBytecode/formatter.py b/lldb/test/Shell/ScriptInterpreter/Python/Inputs/FormatterBytecode/formatter.py index cc8778bd51c7..3740ce8c451f 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Inputs/FormatterBytecode/formatter.py +++ b/lldb/test/Shell/ScriptInterpreter/Python/Inputs/FormatterBytecode/formatter.py @@ -9,6 +9,8 @@ import lldb def __lldb_init_module(debugger, internal_dict): + debugger.HandleCommand("type category define --enabled llvm") + debugger.HandleCommand( "type synthetic add -w llvm " f"-l {__name__}.MyOptionalSynthProvider " @@ -55,8 +57,8 @@ def stringify(bytecode: bytearray) -> str: return s -def evaluate(assembler: str, data: list): - bytecode = compile(assembler) +def evaluate(assembly: str, data: list): + bytecode = assemble(assembly) trace = True if trace: print( diff --git a/lldb/test/Shell/ScriptInterpreter/Python/bytecode.test b/lldb/test/Shell/ScriptInterpreter/Python/bytecode.test index 746f0197a1b5..863964477287 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/bytecode.test +++ b/lldb/test/Shell/ScriptInterpreter/Python/bytecode.test @@ -3,10 +3,10 @@ # RUN: %clang_host -std=c++17 -g %S/Inputs/FormatterBytecode/MyOptional.cpp -o %t.exe # RUN: %lldb %t.exe -o "command script import %S/../../../../examples/python/formatter_bytecode.py" -o "command script import %S/Inputs/FormatterBytecode/formatter.py" -o "b -p here" -o "r" -o "v x" -o "v y" -o q | FileCheck %s --check-prefix=OPTIONAL # OPTIONAL: (lldb) v x -# OPTIONAL: (MyOptional<int>) x = { +# OPTIONAL: (MyOptional<int>) x = None { # OPTIONAL: hasVal = false # OPTIONAL: } # OPTIONAL: (lldb) v y -# OPTIONAL: (MyOptional<int>) y = { +# OPTIONAL: (MyOptional<int>) y = (int) value = 42 { # OPTIONAL: Storage = (value = 42, hasVal = true) # OPTIONAL: } diff --git a/llvm/docs/AIToolPolicy.md b/llvm/docs/AIToolPolicy.md index 2a3ff9345db7..c7576327b326 100644 --- a/llvm/docs/AIToolPolicy.md +++ b/llvm/docs/AIToolPolicy.md @@ -143,6 +143,18 @@ contributors are responsible for ensuring that such material does not appear in their contributions. Contributions found to violate this policy will be removed just like any other offending contribution. +## Exceptions + +We have one exception to this policy for the Bazel-fixer bot. The project +council approved [this RFC][bazel-rfc] proposing to use a combination of +[dwyu][dwyu] and LLMs to maintain the Bazel build files. + +[bazel-rfc]: https://discourse.llvm.org/t/rfc-ai-assisted-bazel-fixer-bot/89178/93 +[dwyu]: https://github.com/hzeller/bant?tab=readme-ov-file#dwyu--depend-on-what-you-use + +Any future exception will be considered individually on its own merits as to +whether it is useful to the project or extracts work from maintainers. + ## Examples Here are some examples of contributions that demonstrate how to apply diff --git a/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h b/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h index 88371b557fe7..6c506d7eb5b7 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h @@ -88,6 +88,15 @@ public: using ElementSet = DenseSet<ElementId>; using ContainerElementsMap = DenseMap<ContainerId, ElementSet>; + class SuperNode; + +private: + using ElemToSuperNodeMap = + DenseMap<ContainerId, DenseMap<ElementId, SuperNode *>>; + + using SuperNodeDepsMap = DenseMap<SuperNode *, DenseSet<SuperNode *>>; + +public: class SuperNode { friend class WaitingOnGraph; friend class WaitingOnGraphTest; @@ -103,14 +112,112 @@ public: private: ContainerElementsMap Defs; ContainerElementsMap Deps; - }; -private: - using ElemToSuperNodeMap = - DenseMap<ContainerId, DenseMap<ElementId, SuperNode *>>; + ElemToSuperNodeMap *RegisteredElemToSN = nullptr; - using SuperNodeDepsMap = DenseMap<SuperNode *, DenseSet<SuperNode *>>; + /// Add a mapping from the Defs in this SuperNode to SN (which may or may + /// not be the same as this). + void mapDefsTo(ElemToSuperNodeMap &ElemToSN, SuperNode *SN, + bool AbandonOldMapping = false) { + assert(!Defs.empty() && "Empty defs!?"); + for (auto &[Container, Elements] : Defs) { + assert(!Elements.empty() && "Empty elements for container?"); + auto &ContainerElemToSN = ElemToSN[Container]; + for (auto &Elem : Elements) + ContainerElemToSN[Elem] = SN; + } + assert((AbandonOldMapping || !SN->RegisteredElemToSN || + SN->RegisteredElemToSN == &ElemToSN) && + "SN defs split across maps"); + SN->RegisteredElemToSN = &ElemToSN; + } + + /// Add a mapping from the Defs in this SuperNode to this. + /// (Equivalent to `SN.mapDefsTo(ElemToSN, &SN);`) + void mapDefsToThis(ElemToSuperNodeMap &ElemToSN, + bool AbandonOldMapping = false) { + mapDefsTo(ElemToSN, this, AbandonOldMapping); + } + + /// Remove a mapping from the Defs in this SuperNode from the registered + /// ElemToSuperNodeMap. The mapping must already exist. + void unmapDefsFromThis() { + assert(RegisteredElemToSN && "No registered ElemToSuperNodeMap"); + for (auto &[Container, Elements] : Defs) { + auto I = RegisteredElemToSN->find(Container); + assert(I != RegisteredElemToSN->end() && "Container not in map"); + auto &ContainerElemToSN = I->second; + for (auto &Elem : Elements) { + assert(ContainerElemToSN[Elem] == this && "Mapping not present"); + ContainerElemToSN.erase(Elem); + } + if (ContainerElemToSN.empty()) + RegisteredElemToSN->erase(I); + } + RegisteredElemToSN = nullptr; + } + + /// For all Defs of this node that are defined by some node in ElemToSN, + /// remove the Def from this map and add this SuperNode to the list of + /// dependants of the defining node. + /// + /// Returns true if SuperNodeDeps was changed. + bool hoistDeps(SuperNodeDepsMap &SuperNodeDeps, + ElemToSuperNodeMap &ElemToSN) { + bool Changed = false; + + SmallVector<ContainerId> ContainersToRemove; + for (auto &[DepContainer, DepElems] : Deps) { + auto I = ElemToSN.find(DepContainer); + if (I == ElemToSN.end()) + continue; + auto &ContainerElemToSN = I->second; + + // ElemToSN includes SuperNodes that define elements in DepContainer. + // We need to iterate over ContainerElemToSN or DepElems: we pick the + // smaller to minimize the cost. + if (ContainerElemToSN.size() < DepElems.size()) { + for (auto &[DefElem, DefSN] : ContainerElemToSN) { + if (DepElems.erase(DefElem) && DefSN != this) { + Changed = true; + SuperNodeDeps[DefSN].insert(this); + } + } + } else { + SmallVector<ElementId> ElemsToRemove; + for (auto &DepElem : DepElems) { + auto J = ContainerElemToSN.find(DepElem); + if (J == ContainerElemToSN.end()) + continue; + ElemsToRemove.push_back(DepElem); + SuperNode *DefSN = J->second; + if (DefSN != this) { + Changed = true; + SuperNodeDeps[DefSN].insert(this); + } + } + + for (auto &DepElem : ElemsToRemove) + DepElems.erase(DepElem); + } + + // If DepElems has become empty then add DepContainer to the list of + // containers to remove. + if (DepElems.empty()) + ContainersToRemove.push_back(DepContainer); + } + + for (auto &DepContainer : ContainersToRemove) { + assert(Deps.count(DepContainer) && "already removed?"); + assert(Deps[DepContainer].empty() && "non empty?"); + Deps.erase(DepContainer); + } + return Changed; + } + }; + +private: class Coalescer { public: std::unique_ptr<SuperNode> addOrCreateSuperNode(ContainerElementsMap Defs, @@ -136,19 +243,17 @@ private: } void coalesce(std::vector<std::unique_ptr<SuperNode>> &SNs, - ElemToSuperNodeMap &ElemToSN) { + ElemToSuperNodeMap &ElemToSN, + bool AbandonOldMapping = false) { for (size_t I = 0; I != SNs.size();) { auto &SN = SNs[I]; assert(!SNHashes.count(SN.get()) && "Elements of SNs should be new to the coalescer"); auto H = getHash(SN->Deps); if (auto *CanonicalSN = findCanonicalSuperNode(H, SN->Deps)) { - for (auto &[Container, Elems] : SN->Defs) { + SN->mapDefsTo(ElemToSN, CanonicalSN, AbandonOldMapping); + for (auto &[Container, Elems] : SN->Defs) CanonicalSN->Defs[Container].insert(Elems.begin(), Elems.end()); - auto &ContainerElemToSN = ElemToSN[Container]; - for (auto &Elem : Elems) - ContainerElemToSN[Elem] = CanonicalSN; - } std::swap(SN, SNs.back()); SNs.pop_back(); } else { @@ -286,16 +391,11 @@ public: static SimplifyResult simplify(std::vector<std::unique_ptr<SuperNode>> SNs) { // Build ElemToSN map. ElemToSuperNodeMap ElemToSN; - for (auto &SN : SNs) { - for (auto &[Container, Elements] : SN->Defs) { - auto &ContainerElemToSN = ElemToSN[Container]; - for (auto &E : Elements) - ContainerElemToSN[E] = SN.get(); - } - } + for (auto &SN : SNs) + SN->mapDefsToThis(ElemToSN); SuperNodeDepsMap SuperNodeDeps; - hoistDeps(SuperNodeDeps, SNs, ElemToSN); + hoistDeps(SNs, SuperNodeDeps, ElemToSN); propagateDeps(SuperNodeDeps); // Pre-coalesce nodes. @@ -324,25 +424,13 @@ public: // First process any dependencies on nodes with external state. auto FailedSNs = processExternalDeps(NewSNs, GetExternalState); + SuperNodeDepsMap SuperNodeDeps; + // Collect the PendingSNs whose dep sets are about to be modified. std::vector<std::unique_ptr<SuperNode>> ModifiedPendingSNs; for (size_t I = 0; I != PendingSNs.size();) { auto &SN = PendingSNs[I]; - bool Remove = false; - for (auto &[Container, Elems] : SN->Deps) { - auto I = ElemToNewSN.find(Container); - if (I == ElemToNewSN.end()) - continue; - for (auto Elem : Elems) { - if (I->second.contains(Elem)) { - Remove = true; - break; - } - } - if (Remove) - break; - } - if (Remove) { + if (SN->hoistDeps(SuperNodeDeps, ElemToNewSN)) { ModifiedPendingSNs.push_back(std::move(SN)); std::swap(SN, PendingSNs.back()); PendingSNs.pop_back(); @@ -350,15 +438,11 @@ public: ++I; } - // Remove cycles from the graphs. - SuperNodeDepsMap SuperNodeDeps; - hoistDeps(SuperNodeDeps, ModifiedPendingSNs, ElemToNewSN); - - // If SN's deps are about to be modified then remove it from the coalescer. + // Remove SNs whose deps have been modified from the coalescer. for (auto &SN : ModifiedPendingSNs) CoalesceToPendingSNs.erase(SN.get()); - hoistDeps(SuperNodeDeps, NewSNs, ElemToPendingSN); + hoistDeps(NewSNs, SuperNodeDeps, ElemToPendingSN); propagateDeps(SuperNodeDeps); propagateFailures(FailedSNs, SuperNodeDeps); @@ -372,7 +456,8 @@ public: FailedSNs, nullptr); CoalesceToPendingSNs.coalesce(ModifiedPendingSNs, ElemToPendingSN); - CoalesceToPendingSNs.coalesce(NewSNs, ElemToPendingSN); + CoalesceToPendingSNs.coalesce(NewSNs, ElemToPendingSN, + /* AbandonOldMapping = */ true); // Integrate remaining ModifiedPendingSNs and NewSNs into PendingSNs. for (auto &SN : ModifiedPendingSNs) @@ -380,11 +465,7 @@ public: // Update ElemToPendingSN for the remaining elements. for (auto &SN : NewSNs) { - for (auto &[Container, Elems] : SN->Defs) { - auto &Row = ElemToPendingSN[Container]; - for (auto &Elem : Elems) - Row[Elem] = SN.get(); - } + SN->mapDefsToThis(ElemToPendingSN, /* AbandonOldMapping = */ true); PendingSNs.push_back(std::move(SN)); } @@ -514,57 +595,12 @@ public: private: // Replace individual dependencies with supernode dependencies. - static void hoistDeps(SuperNodeDepsMap &SuperNodeDeps, - std::vector<std::unique_ptr<SuperNode>> &SNs, + static void hoistDeps(std::vector<std::unique_ptr<SuperNode>> &SNs, + SuperNodeDepsMap &SuperNodeDeps, ElemToSuperNodeMap &ElemToSN) { // For all SNs... - for (auto &SN : SNs) { - SmallVector<ContainerId> ContainersToRemove; - for (auto &[DepContainer, DepElems] : SN->Deps) { - - // Check ElemToSN to see if any other SuperNodes define elements in - // DepContainer. If not then bail out early. - auto I = ElemToSN.find(DepContainer); - if (I == ElemToSN.end()) - continue; - auto &ContainerElemToSN = I->second; - - // ElemToSN includes SuperNodes that define elements in DepContainer. - // We need to iterate over ContainerElemToSN or DepElems: we pick the - // smaller to minimize the cost. - if (ContainerElemToSN.size() < DepElems.size()) { - for (auto &[DefElem, DefSN] : ContainerElemToSN) - if (DepElems.erase(DefElem) && DefSN != SN.get()) - SuperNodeDeps[DefSN].insert(SN.get()); - } else { - SmallVector<ElementId> ElemsToRemove; - for (auto &DepElem : DepElems) { - auto J = ContainerElemToSN.find(DepElem); - if (J == ContainerElemToSN.end()) - continue; - ElemsToRemove.push_back(DepElem); - SuperNode *DefSN = J->second; - if (DefSN != SN.get()) - SuperNodeDeps[DefSN].insert(SN.get()); - } - - for (auto &DepElem : ElemsToRemove) - DepElems.erase(DepElem); - } - - // If DepElems has become empty then add DepContainer to the list of - // containers to remove. - if (DepElems.empty()) - ContainersToRemove.push_back(DepContainer); - } - - // Remove any containers in SN->Deps that have become empty. - for (auto &DepContainer : ContainersToRemove) { - assert(SN->Deps.count(DepContainer) && "DepContainer already removed?"); - assert(SN->Deps[DepContainer].empty() && "DepContainer deps not empty"); - SN->Deps.erase(DepContainer); - } - } + for (auto &SN : SNs) + SN->hoistDeps(SuperNodeDeps, ElemToSN); } // Compute transitive closure of deps for each node. @@ -692,13 +728,8 @@ private: } // Update ElemToSNs (if passed) to remove elements pointing at SN. - for (auto *SN : ToRemoveFromElemToSNs) { - for (auto &[Container, Elems] : SN->defs()) { - auto &Row = (*ElemToSNs)[Container]; - for (auto &Elem : Elems) - Row.erase(Elem); - } - } + for (auto *SN : ToRemoveFromElemToSNs) + SN->unmapDefsFromThis(); } std::vector<std::unique_ptr<SuperNode>> PendingSNs; diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 6774a33556c0..e2b2feb92731 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -138,6 +138,18 @@ def int_dx_resource_samplecmplevelzero llvm_float_ty, llvm_any_ty], [IntrReadMem]>; +def int_dx_resource_gather + : DefaultAttrsIntrinsic<[llvm_any_ty], + [llvm_any_ty, llvm_any_ty, llvm_any_ty, + llvm_i32_ty, llvm_any_ty], + [IntrReadMem]>; + +def int_dx_resource_gather_cmp + : DefaultAttrsIntrinsic<[llvm_any_ty], + [llvm_any_ty, llvm_any_ty, llvm_any_ty, + llvm_float_ty, llvm_i32_ty, llvm_any_ty], + [IntrReadMem]>; + // Cast between target extension handle types and dxil-style opaque handles def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 065036cedc2a..8444675b847e 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -264,8 +264,6 @@ unsigned OptTable::internalFindNearest( StringRef Option, std::string &NearestString, unsigned MinimumLength, unsigned MaximumDistance, std::function<bool(const Info &)> ExcludeOption) const { - assert(!Option.empty()); - // Consider each [option prefix + option name] pair as a candidate, finding // the closest match. unsigned BestDistance = diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index bc2dcdf9b591..bb0998064a26 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -305,8 +305,16 @@ def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))), (FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $src0, (as_hw_round_mode $round))>; let True16Predicate = UseRealTrue16Insts in +def : GCNPat <(f16 (fptrunc_round (f32 (VOP3OpSelMods f32:$src0, i32:$src0_modifiers)), (i32 SupportedRoundMode:$round))), + (FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 $src0_modifiers, $src0, (as_hw_round_mode $round))>; + +let True16Predicate = NotUseRealTrue16Insts in +def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 0))), + (V_CVT_PKRTZ_F16_F32_e32 $src0, (IMPLICIT_DEF))>; + +let True16Predicate = UseRealTrue16Insts in def : GCNPat <(f16 (fptrunc_round (f32 (VOP3OpSelMods f32:$src0, i32:$src0_modifiers)), (i32 0))), - (V_CVT_PKRTZ_F16_F32_e32 $src0_modifiers, $src0)>; + (EXTRACT_SUBREG (V_CVT_PKRTZ_F16_F32_e64 $src0_modifiers, $src0, 0, (IMPLICIT_DEF)), lo16)>; def : GCNPat <(v2f16 (build_vector (f16 (fptrunc_round f32:$src0, (i32 0))), (f16 (fptrunc_round f32:$src1, (i32 0))))), diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 08f3ac44e157..5c4b1f3a4bdc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8859,14 +8859,27 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::SRA: if (Op.getSimpleValueType().isFixedLengthVector()) { if (Subtarget.hasStdExtP()) { - // We have patterns for scalar/immediate shift amount, so no lowering - // needed. - if (Op.getOperand(1)->getOpcode() == ISD::SPLAT_VECTOR) - return Op; - // There's no vector-vector version of shift instruction in P extension // so we need to unroll to scalar computation and pack them back. - return DAG.UnrollVectorOp(Op.getNode()); + if (Op.getOperand(1)->getOpcode() != ISD::SPLAT_VECTOR) + return DAG.UnrollVectorOp(Op.getNode()); + + unsigned Opc; + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case ISD::SHL: + Opc = RISCVISD::PSHL; + break; + case ISD::SRL: + Opc = RISCVISD::PSRL; + break; + case ISD::SRA: + Opc = RISCVISD::PSRA; + break; + } + return DAG.getNode(Opc, SDLoc(Op), Op.getValueType(), Op.getOperand(0), + Op.getOperand(1).getOperand(0)); } return lowerToScalableOp(Op, DAG); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index eb276ef17da7..6917a8576a23 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1381,7 +1381,7 @@ class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType, (Inst GPR:$rs1, ImmType:$imm)>; class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst> : PatGprImm<OpNode, Inst, simm12_lo>; -class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIShift Inst> +class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIBase Inst> : PatGprImm<OpNode, Inst, uimmlog2xlen>; /// Predicates diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index f82ff91eecdb..2e8e4c9fd816 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1658,6 +1658,13 @@ def riscv_mulhr : RVSDNode<"MULHR", SDTIntBinOp>; def riscv_mulhru : RVSDNode<"MULHRU", SDTIntBinOp>; def riscv_mulhrsu : RVSDNode<"MULHRSU", SDTIntBinOp>; +def STD_RISCVPackedShift : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, XLenVT>]>; +def riscv_pshl : RVSDNode<"PSHL", STD_RISCVPackedShift>; +def riscv_psrl : RVSDNode<"PSRL", STD_RISCVPackedShift>; +def riscv_psra : RVSDNode<"PSRA", STD_RISCVPackedShift>; + // Bitwise merge: res = (~op0 & op1) | (op0 & op2) def SDT_RISCVMERGE : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, @@ -1766,23 +1773,23 @@ let Predicates = [HasStdExtP] in { def: Pat<(XLenVecI16VT (riscv_mulhrsu GPR:$rs1, GPR:$rs2)), (PMULHRSU_H GPR:$rs1, GPR:$rs2)>; // 8-bit logical shift left/right patterns - def: Pat<(XLenVecI8VT (shl GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))), + def: Pat<(XLenVecI8VT (riscv_pshl GPR:$rs1, uimm3:$shamt)), (PSLLI_B GPR:$rs1, uimm3:$shamt)>; - def: Pat<(XLenVecI8VT (srl GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))), + def: Pat<(XLenVecI8VT (riscv_psrl GPR:$rs1, uimm3:$shamt)), (PSRLI_B GPR:$rs1, uimm3:$shamt)>; // 16-bit logical shift left/right patterns - def: Pat<(XLenVecI16VT (shl GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))), + def: Pat<(XLenVecI16VT (riscv_pshl GPR:$rs1, uimm4:$shamt)), (PSLLI_H GPR:$rs1, uimm4:$shamt)>; - def: Pat<(XLenVecI16VT (srl GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))), + def: Pat<(XLenVecI16VT (riscv_psrl GPR:$rs1, uimm4:$shamt)), (PSRLI_H GPR:$rs1, uimm4:$shamt)>; // 8-bit arithmetic shift right patterns - def: Pat<(XLenVecI8VT (sra GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))), + def: Pat<(XLenVecI8VT (riscv_psra GPR:$rs1, uimm3:$shamt)), (PSRAI_B GPR:$rs1, uimm3:$shamt)>; // 16-bit arithmetic shift right patterns - def: Pat<(XLenVecI16VT (sra GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))), + def: Pat<(XLenVecI16VT (riscv_psra GPR:$rs1, uimm4:$shamt)), (PSRAI_H GPR:$rs1, uimm4:$shamt)>; // 16-bit signed saturation shift left patterns @@ -1790,29 +1797,23 @@ let Predicates = [HasStdExtP] in { (PSSLAI_H GPR:$rs1, uimm4:$shamt)>; // 8-bit logical shift left/right - def: Pat<(XLenVecI8VT (shl GPR:$rs1, - (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))), + def: Pat<(XLenVecI8VT (riscv_pshl GPR:$rs1, GPR:$rs2)), (PSLL_BS GPR:$rs1, GPR:$rs2)>; - def: Pat<(XLenVecI8VT (srl GPR:$rs1, - (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))), + def: Pat<(XLenVecI8VT (riscv_psrl GPR:$rs1, GPR:$rs2)), (PSRL_BS GPR:$rs1, GPR:$rs2)>; // 8-bit arithmetic shift left/right - def: Pat<(XLenVecI8VT (sra GPR:$rs1, - (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))), + def: Pat<(XLenVecI8VT (riscv_psra GPR:$rs1, GPR:$rs2)), (PSRA_BS GPR:$rs1, GPR:$rs2)>; // 16-bit logical shift left/right - def: Pat<(XLenVecI16VT (shl GPR:$rs1, - (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))), + def: Pat<(XLenVecI16VT (riscv_pshl GPR:$rs1, GPR:$rs2)), (PSLL_HS GPR:$rs1, GPR:$rs2)>; - def: Pat<(XLenVecI16VT (srl GPR:$rs1, - (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))), + def: Pat<(XLenVecI16VT (riscv_psrl GPR:$rs1, GPR:$rs2)), (PSRL_HS GPR:$rs1, GPR:$rs2)>; // 16-bit arithmetic shift left/right - def: Pat<(XLenVecI16VT (sra GPR:$rs1, - (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))), + def: Pat<(XLenVecI16VT (riscv_psra GPR:$rs1, GPR:$rs2)), (PSRA_HS GPR:$rs1, GPR:$rs2)>; // 8-bit PLI SD node pattern @@ -1865,6 +1866,7 @@ let Predicates = [HasStdExtP, IsRV32] in { def : PatGprGpr<uaddsat, SADDU>; def : PatGprGpr<usubsat, SSUBU>; def : PatGprGpr<sshlsat, SSHA>; + def : PatGprUimmLog2XLen<sshlsat, SSLAI>; // Narrowing shift patterns (NSRL/NSRA) // Immediate shift amount patterns @@ -1972,14 +1974,28 @@ let Predicates = [HasStdExtP, IsRV64] in { def: Pat<(v2i32 (mul GPR:$rs1, GPR:$rs2)), (PACK (MUL_W00 GPR:$rs1, GPR:$rs2), (MUL_W11 GPR:$rs1, GPR:$rs2))>; + // 32-bit logical shift left/right patterns + def: Pat<(v2i32 (riscv_pshl GPR:$rs1, uimm5:$shamt)), + (PSLLI_W GPR:$rs1, uimm5:$shamt)>; + def: Pat<(v2i32 (riscv_psrl GPR:$rs1, uimm5:$shamt)), + (PSRLI_W GPR:$rs1, uimm5:$shamt)>; + + // 32-bit arithmetic shift left/right patterns + def: Pat<(v2i32 (riscv_psra GPR:$rs1, uimm5:$shamt)), + (PSRAI_W GPR:$rs1, uimm5:$shamt)>; + + // 32-bit signed saturation shift left patterns + def: Pat<(v2i32 (sshlsat GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), + (PSSLAI_W GPR:$rs1, uimm5:$shamt)>; + // 32-bit logical shift left/right - def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))), + def: Pat<(v2i32 (riscv_pshl GPR:$rs1, GPR:$rs2)), (PSLL_WS GPR:$rs1, GPR:$rs2)>; - def: Pat<(v2i32 (srl GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))), + def: Pat<(v2i32 (riscv_psrl GPR:$rs1, GPR:$rs2)), (PSRL_WS GPR:$rs1, GPR:$rs2)>; // 32-bit arithmetic shift left/right - def: Pat<(v2i32 (sra GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))), + def: Pat<(v2i32 (riscv_psra GPR:$rs1, GPR:$rs2)), (PSRA_WS GPR:$rs1, GPR:$rs2)>; // splat pattern @@ -2006,20 +2022,6 @@ let Predicates = [HasStdExtP, IsRV64] in { def: Pat<(v2i32 (smax GPR:$rs1, GPR:$rs2)), (PMAX_W GPR:$rs1, GPR:$rs2)>; def: Pat<(v2i32 (umax GPR:$rs1, GPR:$rs2)), (PMAXU_W GPR:$rs1, GPR:$rs2)>; - // 32-bit logical shift left/right patterns - def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), - (PSLLI_W GPR:$rs1, uimm5:$shamt)>; - def: Pat<(v2i32 (srl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), - (PSRLI_W GPR:$rs1, uimm5:$shamt)>; - - // 32-bit arithmetic shift left/right patterns - def: Pat<(v2i32 (sra GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), - (PSRAI_W GPR:$rs1, uimm5:$shamt)>; - - // 32-bit signed saturation shift left patterns - def: Pat<(v2i32 (sshlsat GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), - (PSSLAI_W GPR:$rs1, uimm5:$shamt)>; - // 32-bit vselect patterns def: Pat<(v2i32 (vselect (v2i32 GPR:$mask), GPR:$true_v, GPR:$false_v)), (MERGE GPR:$mask, GPR:$false_v, GPR:$true_v)>; diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index 01fe45a013a6..63e897e076ed 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -545,15 +545,13 @@ public: collectFusionCandidates(LV); Changed |= fuseCandidates(); + // All loops in the candidate sets have a common parent (or no parent). + // Next loop vector will correspond to a different parent. It is safe + // to remove all the candidates currently in the set. FusionCandidates.clear(); } - // Finished analyzing candidates at this level. - // Descend to the next level and clear all of the candidates currently - // collected. Note that it will not be possible to fuse any of the - // existing candidates with new candidates because the new candidates will - // be at a different nest level and thus not be control flow equivalent - // with all of the candidates collected so far. + // Finished analyzing candidates at this level. Descend to the next level. LLVM_DEBUG(dbgs() << "Descend one level!\n"); LDT.descend(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index a4a0e5d9a8b4..837a9b329514 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -1817,10 +1817,12 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan, bool IsStrictPredicate = ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred); if (IsStrictPredicate) { - return handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR, - cast<VPWidenIntOrFpInductionRecipe>(IVOp), - MinOrMaxResult, FindIVSelect, FindIVCmp, - FindIVRdxResult); + if (!handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR, + cast<VPWidenIntOrFpInductionRecipe>(IVOp), + MinOrMaxResult, FindIVSelect, FindIVCmp, + FindIVRdxResult)) + return false; + continue; } // The reduction using MinOrMaxPhiR needs adjusting to compute the correct diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll index 2e33fcac9536..2a5a8f5e068d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll @@ -1,44 +1,68 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,SDAG %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=CHECK,SDAG %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=CHECK,GFX11-SDAG %s +; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=CHECK,GFX11-GISEL %s ; XUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,GISEL %s ; FIXME. gisel for fptrunc_round rtz define amdgpu_gs half @v_fptrunc_round_f32_to_f16_tonearest(float %a) { -; CHECK-LABEL: v_fptrunc_round_f32_to_f16_tonearest: -; CHECK: ; %bb.0: -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: ; return to shader part epilog +; SDAG-LABEL: v_fptrunc_round_f32_to_f16_tonearest: +; SDAG: ; %bb.0: +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_tonearest: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0 +; GFX11-SDAG-NEXT: ; return to shader part epilog %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearest") ret half %res } define amdgpu_gs half @v_fptrunc_round_f32_to_f16_upward(float %a) { -; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: ; return to shader part epilog +; SDAG-LABEL: v_fptrunc_round_f32_to_f16_upward: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_upward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0 +; GFX11-SDAG-NEXT: ; return to shader part epilog %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") ret half %res } define amdgpu_gs half @v_fptrunc_round_f32_to_f16_downward(float %a) { -; CHECK-LABEL: v_fptrunc_round_f32_to_f16_downward: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: ; return to shader part epilog +; SDAG-LABEL: v_fptrunc_round_f32_to_f16_downward: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_downward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0 +; GFX11-SDAG-NEXT: ; return to shader part epilog %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward") ret half %res } define amdgpu_gs half @v_fptrunc_round_f32_to_f16_towardzero(float %a) { -; CHECK-LABEL: v_fptrunc_round_f32_to_f16_towardzero: -; CHECK: ; %bb.0: -; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; return to shader part epilog +; SDAG-LABEL: v_fptrunc_round_f32_to_f16_towardzero: +; SDAG: ; %bb.0: +; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 +; SDAG-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_towardzero: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v0, v0, s0 +; GFX11-SDAG-NEXT: ; return to shader part epilog %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.towardzero") ret half %res } @@ -49,6 +73,10 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_f32_to_v2f16_towardzero(float %a, f ; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_v2f16_towardzero: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_f32_to_v2f16_towardzero: ; GISEL: ; %bb.0: ; GISEL-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 @@ -63,10 +91,15 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_f32_to_v2f16_towardzero(float %a, f } define amdgpu_gs <2 x half> @v_fptrunc_round_poison_to_v2f16_towardzero(float %a) { -; CHECK-LABEL: v_fptrunc_round_poison_to_v2f16_towardzero: -; CHECK: ; %bb.0: -; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; return to shader part epilog +; SDAG-LABEL: v_fptrunc_round_poison_to_v2f16_towardzero: +; SDAG: ; %bb.0: +; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 +; SDAG-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: v_fptrunc_round_poison_to_v2f16_towardzero: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v0, v0, s0 +; GFX11-SDAG-NEXT: ; return to shader part epilog %lo = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.towardzero") %tmp = insertelement <2 x half> poison, half %lo, i32 0 ret <2 x half> %tmp @@ -80,6 +113,11 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_constant_to_v2f16_towardzero(float ; SDAG-NEXT: v_perm_b32 v0, s0, v0, 0x5040100 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_constant_to_v2f16_towardzero: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v0, v0, s0 +; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, 0x3c00 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_constant_to_v2f16_towardzero: ; GISEL: ; %bb.0: ; GISEL-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 @@ -92,18 +130,32 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_constant_to_v2f16_towardzero(float } define amdgpu_gs void @v_fptrunc_round_f32_to_f16_upward_multiple_calls(float %a, float %b, ptr addrspace(1) %out) { -; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v1 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 -; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 -; CHECK-NEXT: v_add_f16_e32 v0, v0, v4 -; CHECK-NEXT: v_add_f16_e32 v0, v1, v0 -; CHECK-NEXT: global_store_short v[2:3], v0, off -; CHECK-NEXT: s_endpgm +; SDAG-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: v_cvt_f16_f32_e32 v4, v1 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; SDAG-NEXT: v_add_f16_e32 v0, v0, v4 +; SDAG-NEXT: v_add_f16_e32 v0, v1, v0 +; SDAG-NEXT: global_store_short v[2:3], v0, off +; SDAG-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.h, v1 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v1 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h +; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v1.l, v0.l +; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v0, off +; GFX11-SDAG-NEXT: s_endpgm %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") %res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward") %res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward") @@ -114,18 +166,32 @@ define amdgpu_gs void @v_fptrunc_round_f32_to_f16_upward_multiple_calls(float %a } define amdgpu_gs void @v_fptrunc_round_f32_to_f16_downward_multiple_calls(float %a, float %b, ptr addrspace(1) %out) { -; CHECK-LABEL: v_fptrunc_round_f32_to_f16_downward_multiple_calls: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v0 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 -; CHECK-NEXT: v_add_f16_e32 v0, v4, v0 -; CHECK-NEXT: v_add_f16_e32 v0, v1, v0 -; CHECK-NEXT: global_store_short v[2:3], v0, off -; CHECK-NEXT: s_endpgm +; SDAG-LABEL: v_fptrunc_round_f32_to_f16_downward_multiple_calls: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v4, v0 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; SDAG-NEXT: v_add_f16_e32 v0, v4, v0 +; SDAG-NEXT: v_add_f16_e32 v0, v1, v0 +; SDAG-NEXT: global_store_short v[2:3], v0, off +; SDAG-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_downward_multiple_calls: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v4.l, v0 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.h, v1 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v4.l, v0.l +; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l +; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v0, off +; GFX11-SDAG-NEXT: s_endpgm %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") %res2 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward") %res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward") @@ -136,17 +202,30 @@ define amdgpu_gs void @v_fptrunc_round_f32_to_f16_downward_multiple_calls(float } define amdgpu_gs void @v_fptrunc_round_f32_to_f16_towardzero_multiple_calls(float %a, float %b, ptr addrspace(1) %out) { -; CHECK-LABEL: v_fptrunc_round_f32_to_f16_towardzero_multiple_calls: -; CHECK: ; %bb.0: -; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 -; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v4, v1, v0 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 -; CHECK-NEXT: v_add_f16_e32 v0, v0, v4 -; CHECK-NEXT: v_add_f16_e32 v0, v1, v0 -; CHECK-NEXT: global_store_short v[2:3], v0, off -; CHECK-NEXT: s_endpgm +; SDAG-LABEL: v_fptrunc_round_f32_to_f16_towardzero_multiple_calls: +; SDAG: ; %bb.0: +; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 +; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v4, v1, v0 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; SDAG-NEXT: v_add_f16_e32 v0, v0, v4 +; SDAG-NEXT: v_add_f16_e32 v0, v1, v0 +; SDAG-NEXT: global_store_short v[2:3], v0, off +; SDAG-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_towardzero_multiple_calls: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v4, v0, s0 +; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v5, v1, s0 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v1 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_f16_e32 v0.h, v4.l, v5.l +; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h +; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v0, off +; GFX11-SDAG-NEXT: s_endpgm %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.towardzero") %res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.towardzero") %res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward") @@ -157,14 +236,23 @@ define amdgpu_gs void @v_fptrunc_round_f32_to_f16_towardzero_multiple_calls(floa } define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addrspace(1) %out) { -; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward: -; CHECK: ; %bb.0: -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: ; return to shader part epilog +; SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward: +; SDAG: ; %bb.0: +; SDAG-NEXT: v_mov_b32_e32 v0, s0 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; SDAG-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: ; return to shader part epilog %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") %bitcast = bitcast half %res to i16 %ret = zext i16 %bitcast to i32 @@ -172,14 +260,23 @@ define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addr } define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward(float inreg %a, ptr addrspace(1) %out) { -; CHECK-LABEL: s_fptrunc_round_f32_to_f16_downward: -; CHECK: ; %bb.0: -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: ; return to shader part epilog +; SDAG-LABEL: s_fptrunc_round_f32_to_f16_downward: +; SDAG: ; %bb.0: +; SDAG-NEXT: v_mov_b32_e32 v0, s0 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; SDAG-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: s_fptrunc_round_f32_to_f16_downward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: ; return to shader part epilog %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward") %bitcast = bitcast half %res to i16 %ret = zext i16 %bitcast to i32 @@ -187,20 +284,34 @@ define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward(float inreg %a, ptr ad } define amdgpu_gs void @s_fptrunc_round_f32_to_f16_upward_multiple_calls(float inreg %a, float inreg %b, ptr addrspace(1) %out) { -; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls: -; CHECK: ; %bb.0: -; CHECK-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v3 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 -; CHECK-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 -; CHECK-NEXT: v_add_f16_e32 v2, v2, v4 -; CHECK-NEXT: v_add_f16_e32 v2, v3, v2 -; CHECK-NEXT: global_store_short v[0:1], v2, off -; CHECK-NEXT: s_endpgm +; SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls: +; SDAG: ; %bb.0: +; SDAG-NEXT: v_mov_b32_e32 v2, s0 +; SDAG-NEXT: v_mov_b32_e32 v3, s1 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-NEXT: v_cvt_f16_f32_e32 v4, v3 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; SDAG-NEXT: v_add_f16_e32 v2, v2, v4 +; SDAG-NEXT: v_add_f16_e32 v2, v3, v2 +; SDAG-NEXT: global_store_short v[0:1], v2, off +; SDAG-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v2.l, s0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v2.h, s1 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, s1 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_f16_e32 v2.l, v2.l, v2.h +; GFX11-SDAG-NEXT: v_add_f16_e32 v2.l, v3.l, v2.l +; GFX11-SDAG-NEXT: global_store_b16 v[0:1], v2, off +; GFX11-SDAG-NEXT: s_endpgm %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") %res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward") %res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward") @@ -219,6 +330,14 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> % ; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 @@ -239,6 +358,14 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_v2f32_to_v2f16_downward(<2 x float> ; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_v2f32_to_v2f16_downward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_v2f32_to_v2f16_downward: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 @@ -270,6 +397,22 @@ define amdgpu_gs void @v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x ; SDAG-NEXT: global_store_dword v[4:5], v0, off ; SDAG-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.h, v3 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v2 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; GFX11-SDAG-NEXT: v_pk_add_f16 v0, v1, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX11-SDAG-NEXT: global_store_b32 v[4:5], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; GISEL-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 @@ -298,18 +441,31 @@ define amdgpu_gs void @v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x } define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> inreg %a, ptr addrspace(1) %out) { -; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward: -; CHECK: ; %bb.0: -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: v_mov_b32_e32 v1, s1 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: v_readfirstlane_b32 s1, v1 -; CHECK-NEXT: ; return to shader part epilog +; SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward: +; SDAG: ; %bb.0: +; SDAG-NEXT: v_mov_b32_e32 v0, s0 +; SDAG-NEXT: v_mov_b32_e32 v1, s1 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; SDAG-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, s0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, s1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-SDAG-NEXT: ; return to shader part epilog %res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward") %bitcast = bitcast <2 x half> %res to <2 x i16> %ret = zext <2 x i16> %bitcast to <2 x i32> @@ -317,18 +473,31 @@ define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> in } define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_downward(<2 x float> inreg %a, ptr addrspace(1) %out) { -; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward: -; CHECK: ; %bb.0: -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: v_mov_b32_e32 v1, s1 -; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: v_readfirstlane_b32 s1, v1 -; CHECK-NEXT: ; return to shader part epilog +; SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward: +; SDAG: ; %bb.0: +; SDAG-NEXT: v_mov_b32_e32 v0, s0 +; SDAG-NEXT: v_mov_b32_e32 v1, s1 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; SDAG-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, s0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, s1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-SDAG-NEXT: ; return to shader part epilog %res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.downward") %bitcast = bitcast <2 x half> %res to <2 x i16> %ret = zext <2 x i16> %bitcast to <2 x i32> @@ -362,6 +531,22 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x ; SDAG-NEXT: global_store_dword v[0:1], v2, off ; SDAG-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v2.h, s1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v2.l, s0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, s3 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, s2 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v4.h, s3 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v4.l, s2 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; GFX11-SDAG-NEXT: v_pk_add_f16 v2, v2, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_add_f16 v2, v4, v2 +; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-SDAG-NEXT: s_endpgm ; GISEL-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls: ; GISEL: ; %bb.0: ; GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -403,6 +588,15 @@ define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> % ; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, v3 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 @@ -425,6 +619,15 @@ define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> ; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, v3 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 @@ -449,6 +652,16 @@ define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_upward(<4 x float> % ; SDAG-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v3 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 @@ -475,6 +688,16 @@ define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_downward(<4 x float> ; SDAG-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_downward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v3 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_v4f32_to_v4f16_downward: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 @@ -507,6 +730,21 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_upward(<8 x float> % ; SDAG-NEXT: v_perm_b32 v3, v7, v6, 0x5040100 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_v8f32_to_v8f16_upward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v7.h, v7 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v5.h, v5 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v5.l, v4 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v7.l, v6 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v3 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v7 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_v8f32_to_v8f16_upward: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 @@ -545,6 +783,21 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float> ; SDAG-NEXT: v_perm_b32 v3, v7, v6, 0x5040100 ; SDAG-NEXT: ; return to shader part epilog ; +; GFX11-SDAG-LABEL: v_fptrunc_round_v8f32_to_v8f16_downward: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v7.h, v7 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v5.h, v5 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v5.l, v4 +; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v7.l, v6 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v3 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v7 +; GFX11-SDAG-NEXT: ; return to shader part epilog ; GISEL-LABEL: v_fptrunc_round_v8f32_to_v8f16_downward: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 diff --git a/llvm/test/CodeGen/RISCV/rv32p.ll b/llvm/test/CodeGen/RISCV/rv32p.ll index 2369bbb4bfe4..cc00f427126b 100644 --- a/llvm/test/CodeGen/RISCV/rv32p.ll +++ b/llvm/test/CodeGen/RISCV/rv32p.ll @@ -584,6 +584,37 @@ define i32 @shlsat_i32(i32 %a, i32 %b) { ret i32 %sshlsat } +define i8 @shlsati_i8(i8 %a) { +; CHECK-LABEL: shlsati_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 24 +; CHECK-NEXT: sslai a0, a0, 5 +; CHECK-NEXT: srai a0, a0, 24 +; CHECK-NEXT: ret + %sshlsat = tail call i8 @llvm.sshl.sat.i8(i8 %a, i8 5) + ret i8 %sshlsat +} + +define i16 @shlsati_i16(i16 %a) { +; CHECK-LABEL: shlsati_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 16 +; CHECK-NEXT: sslai a0, a0, 10 +; CHECK-NEXT: srai a0, a0, 16 +; CHECK-NEXT: ret + %sshlsat = tail call i16 @llvm.sshl.sat.i16(i16 %a, i16 10) + ret i16 %sshlsat +} + +define i32 @shlsati_i32(i32 %a) { +; CHECK-LABEL: shlsati_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sslai a0, a0, 21 +; CHECK-NEXT: ret + %sshlsat = tail call i32 @llvm.sshl.sat.i32(i32 %a, i32 21) + ret i32 %sshlsat +} + define i8 @sadd_i8(i8 %x, i8 %y) { ; CHECK-LABEL: sadd_i8: ; CHECK: # %bb.0: diff --git a/llvm/test/Transforms/LoopVectorize/multiple-argmin-argmax.ll b/llvm/test/Transforms/LoopVectorize/multiple-argmin-argmax.ll index ee7928963332..02e4bb4e5c2a 100644 --- a/llvm/test/Transforms/LoopVectorize/multiple-argmin-argmax.ll +++ b/llvm/test/Transforms/LoopVectorize/multiple-argmin-argmax.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 ; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s -; FIXME: Some loops in the file are currently mis-compiled. ; Tests for https://github.com/llvm/llvm-project/issues/184729. define i64 @argmin_argmax(ptr %data, i32 %start_val) { ; CHECK-LABEL: define i64 @argmin_argmax( @@ -15,11 +14,11 @@ define i64 @argmin_argmax(ptr %data, i32 %start_val) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 @@ -31,7 +30,7 @@ define i64 @argmin_argmax(ptr %data, i32 %start_val) { ; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI3]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw <2 x i64> [[VEC_IND1]], splat (i64 2) -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add nuw nsw <2 x i64> [[VEC_IND2]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT6]] = add nuw <2 x i64> [[VEC_IND2]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -45,9 +44,14 @@ define i64 @argmin_argmax(ptr %data, i32 %start_val) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL]] ; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[TMP6]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]]) -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 -; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT8]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT9]] +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1) +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]]) +; CHECK-NEXT: [[DERIVED_IV_RESULT10:%.*]] = add i64 1, [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL]] +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT10]] ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -114,11 +118,11 @@ define i64 @argmin_argmin(ptr %data, i32 %start_val1, i32 %start_val2) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT2]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 @@ -130,7 +134,7 @@ define i64 @argmin_argmin(ptr %data, i32 %start_val1, i32 %start_val2) { ; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI5]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw <2 x i64> [[VEC_IND3]], splat (i64 2) -; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw nsw <2 x i64> [[VEC_IND4]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw <2 x i64> [[VEC_IND4]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -144,9 +148,14 @@ define i64 @argmin_argmin(ptr %data, i32 %start_val1, i32 %start_val2) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL1]] ; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[TMP6]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]]) -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 -; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT10]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1) +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]]) +; CHECK-NEXT: [[DERIVED_IV_RESULT12:%.*]] = add i64 1, [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL2]] +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT12]] ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -213,11 +222,11 @@ define i64 @argmax_argmax(ptr %data, i32 %start_val1, i32 %start_val2) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT2]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 @@ -229,7 +238,7 @@ define i64 @argmax_argmax(ptr %data, i32 %start_val1, i32 %start_val2) { ; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI5]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw <2 x i64> [[VEC_IND3]], splat (i64 2) -; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw nsw <2 x i64> [[VEC_IND4]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw <2 x i64> [[VEC_IND4]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -243,9 +252,14 @@ define i64 @argmax_argmax(ptr %data, i32 %start_val1, i32 %start_val2) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL1]] ; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[TMP6]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]]) -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 -; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT10]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1) +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]]) +; CHECK-NEXT: [[DERIVED_IV_RESULT12:%.*]] = add i64 1, [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL2]] +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT12]] ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -313,11 +327,11 @@ define i64 @argmin_signed_argmax_unsigned(ptr %data, i32 %start_val1, i32 %start ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT2]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 @@ -329,7 +343,7 @@ define i64 @argmin_signed_argmax_unsigned(ptr %data, i32 %start_val1, i32 %start ; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI5]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw <2 x i64> [[VEC_IND3]], splat (i64 2) -; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw nsw <2 x i64> [[VEC_IND4]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw <2 x i64> [[VEC_IND4]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -343,9 +357,14 @@ define i64 @argmin_signed_argmax_unsigned(ptr %data, i32 %start_val1, i32 %start ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL1]] ; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[TMP6]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]]) -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 -; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT10]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1) +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]]) +; CHECK-NEXT: [[DERIVED_IV_RESULT12:%.*]] = add i64 1, [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL2]] +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT12]] ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -410,11 +429,11 @@ define i64 @argmin_argmax_unsigned(ptr %data, i32 %start_val) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 @@ -426,7 +445,7 @@ define i64 @argmin_argmax_unsigned(ptr %data, i32 %start_val) { ; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI3]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw <2 x i64> [[VEC_IND1]], splat (i64 2) -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add nuw nsw <2 x i64> [[VEC_IND2]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT6]] = add nuw <2 x i64> [[VEC_IND2]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -440,9 +459,14 @@ define i64 @argmin_argmax_unsigned(ptr %data, i32 %start_val) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL]] ; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[TMP6]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]]) -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 -; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT8]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT9]] +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1) +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]]) +; CHECK-NEXT: [[DERIVED_IV_RESULT10:%.*]] = add i64 1, [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL]] +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT10]] ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -498,57 +522,14 @@ exit: define i64 @argmin_second_not_argmax(ptr %data, i32 %start_val) { ; CHECK-LABEL: define i64 @argmin_second_not_argmax( ; CHECK-SAME: ptr [[DATA:%.*]], i32 [[START_VAL:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[START_VAL]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP2]] = select <2 x i1> [[TMP1]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI2]] -; CHECK-NEXT: [[TMP3]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI]]) -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> splat (i32 10), [[VEC_PHI3]] -; CHECK-NEXT: [[TMP5]] = select <2 x i1> [[TMP4]], <2 x i64> [[VEC_IND1]], <2 x i64> [[VEC_PHI4]] -; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI3]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98 -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT6]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i32> [[TMP3]], [[BROADCAST_SPLAT7]] -; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1) -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP10]]) -; CHECK-NEXT: [[DERIVED_IV_RESULT:%.*]] = add i64 1, [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL]] -; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]] -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[TMP6]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]]) -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 -; CHECK-NEXT: br label %[[SCALAR_PH:.*]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 99, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MINVAL:%.*]] = phi i32 [ [[TMP8]], %[[SCALAR_PH]] ], [ [[NEW_MINVAL:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MINPOS:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[NEW_MINPOS:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAXVAL:%.*]] = phi i32 [ [[TMP14]], %[[SCALAR_PH]] ], [ [[NEW_MAXVAL:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAXPOS:%.*]] = phi i64 [ [[TMP17]], %[[SCALAR_PH]] ], [ [[NEW_MAXPOS:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MINVAL:%.*]] = phi i32 [ [[START_VAL]], %[[SCALAR_PH]] ], [ [[NEW_MINVAL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MINPOS:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[NEW_MINPOS:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAXVAL:%.*]] = phi i32 [ [[START_VAL]], %[[SCALAR_PH]] ], [ [[NEW_MAXVAL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAXPOS:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[NEW_MAXPOS:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[IV]] ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP_MIN:%.*]] = icmp slt i32 [[VAL]], [[MINVAL]] @@ -559,7 +540,7 @@ define i64 @argmin_second_not_argmax(ptr %data, i32 %start_val) { ; CHECK-NEXT: [[NEW_MAXVAL]] = call i32 @llvm.smax.i32(i32 [[VAL]], i32 [[MAXVAL]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[NEW_MINPOS_LCSSA:%.*]] = phi i64 [ [[NEW_MINPOS]], %[[LOOP]] ] ; CHECK-NEXT: [[NEW_MAXPOS_LCSSA:%.*]] = phi i64 [ [[NEW_MAXPOS]], %[[LOOP]] ] diff --git a/llvm/utils/gn/secondary/bolt/unittests/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/BUILD.gn index eded7696e9e8..2e2415cd441e 100644 --- a/llvm/utils/gn/secondary/bolt/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/unittests/BUILD.gn @@ -1,8 +1,8 @@ group("unittests") { deps = [ "Core:CoreTests", - "Profile:ProfileTests", "Passes:PassTests", + "Profile:ProfileTests", ] testonly = true } diff --git a/llvm/utils/gn/secondary/clang/lib/Options/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Options/BUILD.gn index 3f022ed7f748..f3bd07f61416 100644 --- a/llvm/utils/gn/secondary/clang/lib/Options/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Options/BUILD.gn @@ -10,7 +10,7 @@ static_library("Options") { ] public_deps = [ # public_dep because public header Options.h includes generated Options.inc. - "//clang/include/clang/Options:Options", + "//clang/include/clang/Options", ] sources = [ "DriverOptions.cpp", diff --git a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn index b6818a2a5d02..f6e4632cc7b5 100644 --- a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn @@ -22,9 +22,9 @@ static_library("Sema") { "//clang/include/clang/Basic:riscv_andes_vector_builtin_sema", "//clang/include/clang/Basic:riscv_sifive_vector_builtin_sema", "//clang/include/clang/Basic:riscv_vector_builtin_sema", + "//clang/include/clang/Sema:AttrIsTypeDependent", "//clang/include/clang/Sema:AttrParsedAttrImpl", "//clang/include/clang/Sema:AttrParsedAttrKinds", - "//clang/include/clang/Sema:AttrIsTypeDependent", "//clang/include/clang/Sema:AttrSpellingListIndex", "//clang/include/clang/Sema:AttrTemplateInstantiate", "//clang/lib/APINotes", diff --git a/llvm/utils/gn/secondary/clang/lib/Tooling/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Tooling/BUILD.gn index 653062d108ce..e5f491ba8fa4 100644 --- a/llvm/utils/gn/secondary/clang/lib/Tooling/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Tooling/BUILD.gn @@ -2,7 +2,7 @@ static_library("Tooling") { output_name = "clangTooling" configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ - "//clang/include/clang/Options:Options", + "//clang/include/clang/Options", "//clang/lib/AST", "//clang/lib/ASTMatchers", "//clang/lib/Basic", diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn index 483d56be802a..ab8ea1ffb56b 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn @@ -7,10 +7,10 @@ if (current_toolchain == host_toolchain) { } } else { asan_sources = [ - "asan_aix.cpp", "asan_activation.cpp", "asan_activation.h", "asan_activation_flags.inc", + "asan_aix.cpp", "asan_allocator.cpp", "asan_allocator.h", "asan_debugging.cpp", diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 1bd812560b2c..5815de177bb7 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -94,13 +94,13 @@ source_set("_unused") { # Thumb1 "arm/addsf3.S", "arm/comparesf2.S", + "arm/divsf3.S", "arm/divsi3.S", - "arm/udivsi3.S", "arm/fnan2.c", "arm/fnorm2.c", "arm/funder.c", "arm/mulsf3.S", - "arm/divsf3.S", "arm/thumb1/mulsf3.S", + "arm/udivsi3.S", ] } diff --git a/llvm/utils/gn/secondary/compiler-rt/test/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/asan/BUILD.gn index b501d7df1468..297ccc381a4e 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/asan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/asan/BUILD.gn @@ -26,7 +26,7 @@ write_cmake_config("lit_site_cfg") { values += [ "ASAN_TEST_APPLE_TARGET_IS_HOST_PYBOOL=1", "ASAN_TEST_DYNAMIC=1", - ] + ] } else { values += [ "ASAN_TEST_APPLE_TARGET_IS_HOST_PYBOOL=0", diff --git a/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn index 838846e31d92..41dfc1fdb0ba 100644 --- a/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn @@ -76,8 +76,8 @@ static_library("Host") { "windows/MainLoopWindows.cpp", "windows/PipeWindows.cpp", "windows/ProcessLauncherWindows.cpp", - "windows/PseudoConsole.cpp", "windows/ProcessRunLock.cpp", + "windows/PseudoConsole.cpp", ] } else { sources += [ diff --git a/llvm/utils/gn/secondary/lldb/test/BUILD.gn b/llvm/utils/gn/secondary/lldb/test/BUILD.gn index e82fe2d11b75..3a151bbe0fe2 100644 --- a/llvm/utils/gn/secondary/lldb/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/test/BUILD.gn @@ -142,7 +142,7 @@ write_lit_cfg("lit_shell_site_cfg") { "LLDB_TOOL_LLDB_SERVER_BUILD=1", "LLDB_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), "LLDB_USE_SYSTEM_DEBUGSERVER=1", # XXX port //lldb/tools/debugserver (?) - "LLVM_ENABLE_DIA_SDK=0", # FIXME: option? just enable on windows? + "LLVM_ENABLE_DIA_SDK=0", # FIXME: option? just enable on windows? "LLVM_HOST_TRIPLE=$llvm_current_triple", "LLVM_USE_SANITIZER=", "Python3_EXECUTABLE=$python_path", diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 9fe4f3e4149b..4c237ddfed39 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -4,6 +4,7 @@ static_library("Analysis") { # Must be a public_dep because Analysis's headers include # TargetLibraryInfo.inc. "//llvm/include/llvm/Analysis:TargetLibraryInfo", + # Must be a public_dep because Analysis's headers include llvm-config.h. "//llvm/include/llvm/Config:llvm-config", ] diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn index 3f6de22922a7..acd3fbd176a8 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn @@ -49,8 +49,8 @@ static_library("LLVMBPFCodeGen") { ":BPFGenFastISel", ":BPFGenGlobalISel", ":BPFGenMCPseudoLowering", - ":BPFGenSDNodeInfo", ":BPFGenRegisterBank", + ":BPFGenSDNodeInfo", "MCTargetDesc", "TargetInfo", "//llvm/include/llvm/Config:llvm-config", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn index 186d2ef96c19..35b069e823c2 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn @@ -28,8 +28,8 @@ static_library("Utils") { "CountVisits.cpp", "CtorUtils.cpp", "DXILUpgrade.cpp", - "Debugify.cpp", "DebugSSAUpdater.cpp", + "Debugify.cpp", "DeclareRuntimeLibcalls.cpp", "DemoteRegToStack.cpp", "EntryExitInstrumenter.cpp", diff --git a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn index 2363dad4bd34..454b2ed1f1d1 100644 --- a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn @@ -121,5 +121,6 @@ write_cmake_config("llvm-lit") { "Python3_EXECUTABLE=/usr/bin/env $python_path", "BUILD_MODE=.", "LLVM_LIT_CONFIG_MAP=" + config_map, + "LLVM_WINDOWS_PREFER_FORWARD_SLASH=", ] } diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index ab8557406968..27d3d0d9b6e7 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -168,8 +168,7 @@ class Arith_IntBinaryOpWithOverflowFlags<string mnemonic, list<Trait> traits = [ class Arith_IntBinaryOpWithExactFlag<string mnemonic, list<Trait> traits = []> : Arith_BinaryOp<mnemonic, traits # - [DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, - DeclareOpInterfaceMethods<ArithExactFlagInterface>]>, + [DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>]>, Arguments<(ins Arith_SignlessIntegerOrIndexLike:$lhs, Arith_SignlessIntegerOrIndexLike:$rhs, UnitAttr:$isExact)>, @@ -1588,34 +1587,15 @@ def IndexCastTypeConstraint : TypeConstraint<Or<[ def Arith_IndexCastOp : Arith_CastOp<"index_cast", IndexCastTypeConstraint, IndexCastTypeConstraint, - [DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, - DeclareOpInterfaceMethods<ArithExactFlagInterface>]> { + [DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>]> { let summary = "cast between index and integer types"; let description = [{ Casts between scalar or vector integers and corresponding 'index' scalar or vectors. Index is an integer of platform-specific bit width. If casting to a wider integer, the value is sign-extended. If casting to a narrower integer, the value is truncated. - - If the `exact` attribute is present, it is assumed that the operand - contains a value that fits in the destination's representation, therefore - the cast does not lose any information. When this assumption is violated, - the result is poison. - - Example: - - ```mlir - %0 = arith.index_cast %a : index to i64 - %1 = arith.index_cast %a exact : index to i64 - %2 = arith.index_cast %b exact : i32 to index - ``` }]; - let arguments = (ins IndexCastTypeConstraint:$in, UnitAttr:$isExact); - let results = (outs IndexCastTypeConstraint:$out); - let assemblyFormat = [{ - $in (`exact` $isExact^)? attr-dict `:` type($in) `to` type($out) - }]; let hasFolder = 1; let hasCanonicalizer = 1; } @@ -1627,8 +1607,7 @@ def Arith_IndexCastOp def Arith_IndexCastUIOp : Arith_CastOp<"index_castui", IndexCastTypeConstraint, IndexCastTypeConstraint, [DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, - DeclareOpInterfaceMethods<ArithNonNegFlagInterface>, - DeclareOpInterfaceMethods<ArithExactFlagInterface>]> { + DeclareOpInterfaceMethods<ArithNonNegFlagInterface>]> { let summary = "unsigned cast between index and integer types"; let description = [{ Casts between scalar or vector integers and corresponding 'index' scalar or @@ -1641,27 +1620,19 @@ def Arith_IndexCastUIOp is equivalent to sign extension. When this assumption is violated, the result is poison. - If the `exact` attribute is present, it is assumed that the operand - contains a value that fits in the destination's representation, therefore - the cast does not lose any information. When this assumption is violated, - the result is poison. - Example: ```mlir %0 = arith.index_castui %a : i32 to index %1 = arith.index_castui %a nneg : i32 to index %2 = arith.index_castui %b nneg : index to i64 - %3 = arith.index_castui %a nneg exact : i64 to index ``` }]; - let arguments = (ins IndexCastTypeConstraint:$in, UnitAttr:$nonNeg, - UnitAttr:$isExact); + let arguments = (ins IndexCastTypeConstraint:$in, UnitAttr:$nonNeg); let results = (outs IndexCastTypeConstraint:$out); let assemblyFormat = [{ - $in oilist(`exact` $isExact | `nneg` $nonNeg) attr-dict - `:` type($in) `to` type($out) + $in (`nneg` $nonNeg^)? attr-dict `:` type($in) `to` type($out) }]; let hasFolder = 1; let hasCanonicalizer = 1; diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td index e8287ac2d6bc..d1b8e250cdb5 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td @@ -153,53 +153,6 @@ def ArithNonNegFlagInterface : OpInterface<"ArithNonNegFlagInterface"> { ]; } -def ArithExactFlagInterface : OpInterface<"ArithExactFlagInterface"> { - let description = [{ - Access to op exact flag. - }]; - - let cppNamespace = "::mlir::arith"; - - let methods = [ - InterfaceMethod< - /*desc=*/ "Returns whether the operation has the exact flag set", - /*returnType=*/ "bool", - /*methodName=*/ "getExact", - /*args=*/ (ins), - /*methodBody=*/ [{}], - /*defaultImpl=*/ [{ - auto op = cast<ConcreteOp>(this->getOperation()); - return op.getIsExactAttr() != nullptr; - }] - >, - InterfaceMethod< - /*desc=*/ "Set the exact flag for the operation", - /*returnType=*/ "void", - /*methodName=*/ "setExact", - /*args=*/ (ins "bool":$isExact), - /*methodBody=*/ [{}], - /*defaultImpl=*/ [{ - auto op = cast<ConcreteOp>(this->getOperation()); - if (isExact) - op.setIsExactAttr(UnitAttr::get(op->getContext())); - else - op.removeIsExactAttr(); - }] - >, - StaticInterfaceMethod< - /*desc=*/ [{Returns the name of the exact flag attribute for - the operation}], - /*returnType=*/ "StringRef", - /*methodName=*/ "getExactFlagAttrName", - /*args=*/ (ins), - /*methodBody=*/ [{}], - /*defaultImpl=*/ [{ - return "isExact"; - }] - > - ]; -} - def ArithRoundingModeInterface : OpInterface<"ArithRoundingModeInterface"> { let description = [{ Access to op rounding mode. diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td index f179cfd752c6..ebb0e6132fee 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td @@ -210,4 +210,157 @@ def OpenACC_FirstprivateMapInitialOp let extraClassDeclaration = extraClassDeclarationBase; } +//===----------------------------------------------------------------------===// +// acc.par_width +//===----------------------------------------------------------------------===// + +def OpenACC_ParWidthOp + : OpenACC_Op<"par_width", [NoMemoryEffect, AlwaysSpeculatable]> { + let summary = "Specify parallel width for a GPU dimension"; + let description = [{ + The `acc.par_width` operation specifies the parallel width for a + given GPU parallel dimension. It is used as an input to + `acc.compute_region` to define the launch configuration. + + The optional `launchArg` operand provides a known width value. When + absent, the width is unknown and must be determined later (either at + compile time by analysis or at runtime). + + Examples: + + ```mlir + // Known width from SSA value + %w1 = acc.par_width %vector_len {par_dim = #acc.par_dim<thread_x>} + + // Unknown width (to be computed later) + %w2 = acc.par_width {par_dim = #acc.par_dim<block_x>} + ``` + }]; + let arguments = (ins Optional<Index>:$launchArg, + OpenACC_GPUParallelDimAttr:$par_dim); + let results = (outs OpenACC_ParWidthType:$output); + let assemblyFormat = [{ + ($launchArg^)? attr-dict + }]; +} + +//===----------------------------------------------------------------------===// +// acc.compute_region +//===----------------------------------------------------------------------===// + +// Local type constraint for gpu::AsyncTokenType. +def OpenACC_GPUAsyncTokenType : Type< + CPred<"::llvm::isa<::mlir::gpu::AsyncTokenType>($_self)">, + "GPU async token type">; + +def OpenACC_ComputeRegionOp + : OpenACC_Op<"compute_region", + [OffloadRegionOpInterface, AffineScope, + RecursiveMemoryEffects, + SingleBlockImplicitTerminator<"YieldOp">, + IsolatedFromAbove, AttrSizedOperandSegments]> { + let summary = "Compute region for GPU execution"; + let description = [{ + The `acc.compute_region` operation wraps a region of code that will be + compiled and executed on a GPU. It is typically produced by lowering + OpenACC compute constructs (`acc.parallel`, `acc.kernels`, `acc.serial`) + but can also be targeted directly by other frontends or lowered from + other constructs that benefit from the automatic parallelization and data + mapping facilities that the `acc` dialect provides. It serves as the + bridge between the high-level representation and the `gpu.launch` + operation. + + The operation is `IsolatedFromAbove`: all values used inside the + region must be explicitly captured. Values are captured in two ways: + + - Launch arguments (`launch`): Results of operations that define + the parallel launch configuration. These are `!acc.par_width`-typed + and become block arguments representing the parallel width for each + dimension. + + - Input arguments (`ins`): Arbitrary values captured from outside + the region (data pointers, scalars, etc.). These become block + arguments with their original types. + + The `origin` attribute records which construct produced this compute + region (e.g., `"acc.parallel"`, `"acc.kernels"`). This is intended to + be solely informational. + + Example: + + ```mlir + %w0 = acc.par_width %c128 {par_dim = #acc.par_dim<thread_x>} + %w1 = acc.par_width %c8 {par_dim = #acc.par_dim<block_x>} + acc.compute_region launch(%arg0 = %w0, %arg1 = %w1) + ins(%arg2 = %data) : (memref<1024xf32>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c1024 = arith.constant 1024 : index + scf.parallel (%iv) = (%c0) to (%c1024) step (%c1) { + %v = memref.load %arg2[%iv] : memref<1024xf32> + scf.reduce + } {acc.par_dims = #acc<par_dims[thread_x]>} + acc.yield + } {origin = "acc.parallel"} + ``` + }]; + + let arguments = (ins Variadic<OpenACC_ParWidthType>:$launchArgs, + Variadic<AnyType>:$inputArgs, + Optional<OpenACC_GPUAsyncTokenType>:$stream, + StrAttr:$origin, + OptionalAttr<FlatSymbolRefAttr>:$kernel_func_name, + OptionalAttr<FlatSymbolRefAttr>:$kernel_module_name); + + let results = (outs Variadic<AnyType>:$results); + + let regions = (region AnyRegion:$region); + + let extraClassDeclaration = [{ + /// Look up the par_width op for the given dimension among launch args. + std::optional<mlir::Value> getLaunchArg( + ::mlir::acc::GPUParallelDimAttr parDim); + + /// Get the known (non-empty) launch value for a dimension. + std::optional<mlir::Value> getKnownLaunchArg( + ::mlir::acc::GPUParallelDimAttr parDim); + + /// Get the known constant launch value for a dimension. + std::optional<uint64_t> getKnownConstantLaunchArg( + ::mlir::acc::GPUParallelDimAttr parDim); + + /// Add a new input argument, appending to both the operand list and + /// the region block arguments. Returns the new block argument. + ::mlir::BlockArgument appendInputArg(::mlir::Value); + + /// Check whether all parallel dimensions have width 1. + bool isEffectivelySerial(); + + /// Get the block argument representing the width for a given dimension. + ::mlir::BlockArgument parDimToWidth( + ::mlir::acc::GPUParallelDimAttr parDim); + + /// Get the block argument for a specific gpu::Processor. + ::mlir::BlockArgument gpuParWidth(::mlir::gpu::Processor); + + /// Collect all GPU parallel dimensions present in the launch config. + llvm::SmallVector<::mlir::acc::GPUParallelDimAttr> getLaunchParDims(); + + /// Get the body block of the compute region. + ::mlir::Block *getBody() { return &getRegion().front(); } + + /// Get the terminator of the compute region. + ::mlir::Operation *getTerminator() { + return &getRegion().back().back(); + } + + /// Map a block argument back to its corresponding operand + /// ($launchArgs or $inputArgs). + ::mlir::Value getOperand(::mlir::BlockArgument blockArg); + }]; + + let hasVerifier = 1; + let hasCustomAssemblyFormat = 1; +} + #endif // OPENACC_CG_OPS diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 2bb1654cb636..33d3b84b32b9 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -2845,7 +2845,7 @@ def OpenACC_LoopOp def OpenACC_YieldOp : OpenACC_Op<"yield", [Pure, ReturnLike, Terminator, ParentOneOf<["FirstprivateRecipeOp, LoopOp, ParallelOp, PrivateRecipeOp," "ReductionRecipeOp, ReductionInitOp, ReductionCombineRegionOp," - "SerialOp, AtomicUpdateOp"]>]> { + "SerialOp, AtomicUpdateOp, ComputeRegionOp"]>]> { let summary = "Acc yield and termination operation"; let description = [{ diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td index 117272693d62..bba385e69c0f 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td @@ -33,4 +33,12 @@ def OpenACC_DeclareTokenType : OpenACC_Type<"DeclareToken", "declare_token"> { }]; } +def OpenACC_ParWidthType : OpenACC_Type<"ParWidth", "par_width"> { + let summary = "parallel width token type"; + let description = [{ + Represents a type that is consumed by a compute region in order to + capture its parallelism dimensions arguments. + }]; +} + #endif // OPENACC_OPS_TYPES diff --git a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h index 852407292979..daf9b0df4191 100644 --- a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h +++ b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h @@ -81,7 +81,7 @@ int64_t linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis); /// /// Let `li = linearIndex`, assuming `strides` are `[s0, .. sn]`, return the /// vector of int64_t -/// `[li % s0, (li / s0) % s1, ..., (li / s0 / .. / sn-1) % sn]` +/// `[li / s0, (li % s0) / s1, ..., (li % s0 % .. % sn-1) / sn]` SmallVector<int64_t> delinearize(int64_t linearIndex, ArrayRef<int64_t> strides); @@ -181,7 +181,7 @@ AffineExpr linearize(MLIRContext *ctx, ArrayRef<AffineExpr> offsets, /// /// Let `li = linearIndex`, assuming `strides` are `[s0, .. sn]`, return the /// vector of AffineExpr -/// `[li % s0, (li / s0) % s1, ..., (li / s0 / .. / sn-1) % sn]` +/// `[li / s0, (li % s0) / s1, ..., (li % s0 % .. % sn-1) / sn]` /// /// It is the caller's responsibility to pass proper AffineExpr kind that result /// in valid AffineExpr (i.e. cannot multiply 2 AffineDimExpr or divide by an diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index 6f667f480167..a98073f3c5cf 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -254,6 +254,10 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { "xegpu::DistributeLayoutAttr", "collapseDims", (ins "SmallVector<int64_t>": $dimGroup)>, + InterfaceMethod<[{Derive a new layout by trasnposing it using `permutation`.}], + "xegpu::DistributeLayoutAttr", + "transposeDims", + (ins "ArrayRef<int64_t>": $permutation)>, InterfaceMethod<[{Generates instructions to compute multidimensional coordinates for dist units assigned to a level identified by linearId. The shape parameter represents the higher-level problem size. Each level may access @@ -261,56 +265,17 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { "FailureOr<SmallVector<SmallVector<Value>>>", "computeDistributedCoords", (ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId, "ArrayRef<int64_t>":$shape)>, - InterfaceMethod</*desc=*/[{Check if this layout can be achieved by applying a transpose - to some other layout according to given permutation of (0...n-1).}], - /*retTy=*/"bool", - /*methodName=*/"isTransposeOf", - /*args=*/(ins "const xegpu::DistributeLayoutAttr&": $other, "ArrayRef<int64_t>": $perm), - /*methodBody=*/[{ - if (!other) - return false; - if ($_self.getRank() != other.getRank() || perm.size() != static_cast<size_t>($_self.getRank())) - return false; - // Check if the permutation is valid - if (!isPermutationVector(perm)) - return false; - auto checkTranspose = [](ArrayRef<int64_t> dst, ArrayRef<int64_t> src, ArrayRef<int64_t> perm) { - // If both `dst` and `src` are empty, conservatively return true - // here because some layout fields can be empty. - if (dst.empty() && src.empty()) - return true; - for (const auto &ta : llvm::enumerate(perm)) { - if (src[ta.index()] != dst[ta.value()]) - return false; - } - return true; - }; - // Check sgLayout - if (!checkTranspose($_self.getEffectiveSgLayoutAsInt(), other.getEffectiveSgLayoutAsInt(), perm)) - return false; - // Check sgData - if (!checkTranspose($_self.getEffectiveSgDataAsInt(), other.getEffectiveSgDataAsInt(), perm)) - return false; - // Check instData - if (!checkTranspose($_self.getEffectiveInstDataAsInt(), other.getEffectiveInstDataAsInt(), perm)) - return false; - // Check laneLayout - if (!checkTranspose($_self.getEffectiveLaneLayoutAsInt(), other.getEffectiveLaneLayoutAsInt(), perm)) - return false; - // Check laneData - if (!checkTranspose($_self.getEffectiveLaneDataAsInt(), other.getEffectiveLaneDataAsInt(), perm)) - return false; - // Check order - if (!checkTranspose($_self.getEffectiveOrderAsInt(), other.getEffectiveOrderAsInt(), perm)) - return false; - - return true; - }]>, InterfaceMethod</*desc=*/[{Check if this layout is a slice of another layout.}], /*retTy=*/"bool", /*methodName=*/"isSliceOf", /*args=*/(ins "const xegpu::DistributeLayoutAttr&": $other)>, - + InterfaceMethod</*desc=*/[{Check if this layout is a transpose of + the other layout according to given permutation of (0...n-1).}], + /*retTy=*/"bool", + /*methodName=*/"isTransposeOf", + /*args=*/(ins "const xegpu::DistributeLayoutAttr&": $other, + "ArrayRef<int64_t>": $perm, + "xegpu::LayoutKind": $kind)>, InterfaceMethod</*desc=*/[{Check if this layout is compatible with another layout at a specific level of the layout hierarchy. Unlike isEqualTo, this compares only the effective (non-sliced) fields at the @@ -498,8 +463,11 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> { // avoid every field of the attribute is nullptr, which may lead to segment fault if (!getInstData() && !getLaneLayout()) return nullptr; + // Only preserve order if lane_layout remains, since order requires + // sg_layout or lane_layout to be present. + auto order = getLaneLayout() ? getOrder() : nullptr; return LayoutAttr::get(getContext(), nullptr, nullptr, getInstData(), - getLaneLayout(), getLaneData(), getOrder()); + getLaneLayout(), getLaneData(), order); } LayoutAttr dropInstData() const{ @@ -567,6 +535,9 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> { // that are collapsed into a single dimension in the derived layout. DistributeLayoutAttr collapseDims(SmallVector<int64_t> dimGroup); + // Derive a new layout by transposing the layout using `permutation`. + DistributeLayoutAttr transposeDims(ArrayRef<int64_t> permutation); + /// Delinearizes a linear ID into its multidimensional indices /// based on the effective level of the layout. FailureOr<SmallVector<Value>> @@ -584,6 +555,9 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> { /// Check if this layout is equal to another layout. bool isEqualTo(const xegpu::DistributeLayoutAttr &other); + + /// Check if this layout is a transpose of another layout. + bool isTransposeOf(const xegpu::DistributeLayoutAttr &other, ArrayRef<int64_t> perm, const xegpu::LayoutKind kind); }]; let assemblyFormat = "`<` struct(params) `>`"; @@ -767,6 +741,9 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { // that are collapsed into a single dimension in the derived layout. DistributeLayoutAttr collapseDims(SmallVector<int64_t> dimGroup); + // Derive a new layout by transposing the layout using `permutation`. + DistributeLayoutAttr transposeDims(ArrayRef<int64_t> permutation); + /// flatten a nested SliceAttr, e.g., for 2-level nested SliceAttr /// #xegpu.slice<#xegpu.slice<#xegpu.layout<sg_layout = [4, 8, 12]>, dims = [0]>, dims = [0]> /// it will coalese two slice operations and return a simplified SliceAttr @@ -792,6 +769,9 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { /// Check if this layout is equal to another layout. bool isEqualTo(const xegpu::DistributeLayoutAttr &other); + /// Check if this layout is a transpose of another layout. + bool isTransposeOf(const xegpu::DistributeLayoutAttr &other, ArrayRef<int64_t> perm, const xegpu::LayoutKind kind); + /// Drop the slice dims to get the original layout. SliceAttr dropSliceDims(ArrayRef<int64_t> sliceDimsToDrop); }]; diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h b/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h index 3482d1b9401b..2ae0ef3ae852 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h +++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h @@ -81,6 +81,11 @@ DistributeLayoutAttr inferMultiReductionSourceLayout(DistributeLayoutAttr resLayout, SmallVector<int64_t> reduceDims); +/// Infers the source layout attribute for a transpose operation given the +/// result layout attribute and permutation. +DistributeLayoutAttr inferTransposeSourceLayout(DistributeLayoutAttr resLayout, + ArrayRef<int64_t> permutation); + /// Infers the source layout attribute for a bitcast operation given the /// result layout attribute, result element type bitwidth, and source element /// type bitwidth. diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp index e0e1be35e4e1..e7f561e8a4d6 100644 --- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp +++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp @@ -311,32 +311,13 @@ LogicalResult IndexCastOpLowering<OpTy, ExtCastTy>::matchAndRewrite( if constexpr (std::is_same_v<ExtCastTy, LLVM::ZExtOp>) isNonNeg = op.getNonNeg(); - bool isExact = op.getExact(); - - // Map exact to the appropriate overflow flag(s) for truncation: - // index_cast (signed) exact -> trunc nsw - // index_castui (unsigned) exact -> trunc nuw - // index_castui nneg exact -> trunc nuw nsw - LLVM::IntegerOverflowFlags truncOverflow = LLVM::IntegerOverflowFlags::none; - if (isExact) { - if constexpr (std::is_same_v<ExtCastTy, LLVM::SExtOp>) { - truncOverflow = LLVM::IntegerOverflowFlags::nsw; - } else { - truncOverflow = LLVM::IntegerOverflowFlags::nuw; - if (isNonNeg) - truncOverflow |= LLVM::IntegerOverflowFlags::nsw; - } - } - // Handle the scalar and 1D vector cases. Type operandType = adaptor.getIn().getType(); if (!isa<LLVM::LLVMArrayType>(operandType)) { Type targetType = this->typeConverter->convertType(resultType); if (targetBits < sourceBits) { - auto truncOp = rewriter.replaceOpWithNewOp<LLVM::TruncOp>( - op, targetType, adaptor.getIn()); - if (isExact) - truncOp.setOverflowFlags(truncOverflow); + rewriter.replaceOpWithNewOp<LLVM::TruncOp>(op, targetType, + adaptor.getIn()); } else { auto extOp = rewriter.replaceOpWithNewOp<ExtCastTy>(op, targetType, adaptor.getIn()); @@ -354,16 +335,15 @@ LogicalResult IndexCastOpLowering<OpTy, ExtCastTy>::matchAndRewrite( [&](Type llvm1DVectorTy, ValueRange operands) -> Value { typename OpTy::Adaptor adaptor(operands); if (targetBits < sourceBits) { - auto truncOp = LLVM::TruncOp::create(rewriter, op.getLoc(), - llvm1DVectorTy, adaptor.getIn()); - if (isExact) - truncOp.setOverflowFlags(truncOverflow); - return truncOp; + return LLVM::TruncOp::create(rewriter, op.getLoc(), llvm1DVectorTy, + adaptor.getIn()); } auto extOp = ExtCastTy::create(rewriter, op.getLoc(), llvm1DVectorTy, adaptor.getIn()); - if constexpr (std::is_same_v<ExtCastTy, LLVM::ZExtOp>) - extOp.setNonNeg(isNonNeg); + if constexpr (std::is_same_v<ExtCastTy, LLVM::ZExtOp>) { + if (isNonNeg) + extOp.setNonNeg(true); + } return extOp; }, rewriter); diff --git a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td index f26af4816ce8..fb9c16db9143 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td +++ b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td @@ -288,38 +288,31 @@ def SelectI1ToNot : // IndexCastOp //===----------------------------------------------------------------------===// -// index_cast(index_cast(x, exact)) -> x, if dstType == srcType. -// The inner exact guarantees the iN -> index conversion is lossless, -// so the roundtrip through index preserves the value. +// index_cast(index_cast(x)) -> x, if dstType == srcType. def IndexCastOfIndexCast : - Pat<(Arith_IndexCastOp:$res (Arith_IndexCastOp $x, $exact1), $exact2), + Pat<(Arith_IndexCastOp:$res (Arith_IndexCastOp $x)), (replaceWithValue $x), - [(Constraint<CPred<"$0.getType() == $1.getType()">> $res, $x), - (Constraint<CPred<"(bool)$0">> $exact1)]>; + [(Constraint<CPred<"$0.getType() == $1.getType()">> $res, $x)]>; // index_cast(extsi(x)) -> index_cast(x) def IndexCastOfExtSI : - Pat<(Arith_IndexCastOp (Arith_ExtSIOp $x), $exact), - (Arith_IndexCastOp $x, $exact)>; + Pat<(Arith_IndexCastOp (Arith_ExtSIOp $x)), (Arith_IndexCastOp $x)>; //===----------------------------------------------------------------------===// // IndexCastUIOp //===----------------------------------------------------------------------===// -// index_castui(index_castui(x, exact)) -> x, if dstType == srcType. -// The inner exact guarantees the iN -> index conversion is lossless, -// so the roundtrip through index preserves the value. +// index_castui(index_castui(x)) -> x, if dstType == srcType. def IndexCastUIOfIndexCastUI : - Pat<(Arith_IndexCastUIOp:$res - (Arith_IndexCastUIOp $x, $nneg1, $exact1), $nneg2, $exact2), + Pat<(Arith_IndexCastUIOp:$res (Arith_IndexCastUIOp $x, $nneg1), $nneg2), (replaceWithValue $x), - [(Constraint<CPred<"$0.getType() == $1.getType()">> $res, $x), - (Constraint<CPred<"static_cast<bool>($0)">> $exact1)]>; + [(Constraint<CPred<"$0.getType() == $1.getType()">> $res, $x)]>; // index_castui(extui(x)) -> index_castui(x) def IndexCastUIOfExtUI : - Pat<(Arith_IndexCastUIOp (Arith_ExtUIOp $x, $nneg1), $nneg2, $exact), - (Arith_IndexCastUIOp $x, $nneg1, $exact)>; + Pat<(Arith_IndexCastUIOp (Arith_ExtUIOp $x, $nneg1), $nneg2), + (Arith_IndexCastUIOp $x, $nneg1)>; + //===----------------------------------------------------------------------===// // BitcastOp diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp index 881234cdd619..85dba70dbde1 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Region.h" @@ -326,6 +327,241 @@ void ReductionCombineOp::getEffects( } //===----------------------------------------------------------------------===// +// ComputeRegionOp +//===----------------------------------------------------------------------===// + +static ParWidthOp getParWidthOpForLaunchArg(ComputeRegionOp op, + GPUParallelDimAttr parDim) { + for (auto launchArg : op.getLaunchArgs()) { + auto parOp = launchArg.getDefiningOp<ParWidthOp>(); + if (!parOp) + continue; + auto launchArgDim = cast<GPUParallelDimAttr>(parOp.getParDim()); + if (launchArgDim == parDim) + return parOp; + } + return nullptr; +} + +std::optional<Value> ComputeRegionOp::getLaunchArg(GPUParallelDimAttr parDim) { + if (auto parWidthOp = getParWidthOpForLaunchArg(*this, parDim)) + return parWidthOp.getResult(); + return {}; +} + +std::optional<Value> +ComputeRegionOp::getKnownLaunchArg(GPUParallelDimAttr parDim) { + if (auto parWidthOp = getParWidthOpForLaunchArg(*this, parDim)) + if (parWidthOp.getLaunchArg()) + return parWidthOp.getLaunchArg(); + return {}; +} + +std::optional<uint64_t> +ComputeRegionOp::getKnownConstantLaunchArg(GPUParallelDimAttr parDim) { + auto knownParWidth = getKnownLaunchArg(parDim); + if (knownParWidth.has_value()) + return getConstantIntValue(knownParWidth.value()); + return {}; +} + +BlockArgument ComputeRegionOp::appendInputArg(Value value) { + getInputArgsMutable().append(value); + return getBody()->addArgument(value.getType(), getLoc()); +} + +bool ComputeRegionOp::isEffectivelySerial() { + auto *ctx = getContext(); + + if (getLaunchArg(GPUParallelDimAttr::seqDim(ctx))) + return true; + + auto checkDim = [&](GPUParallelDimAttr dim) -> bool { + auto val = getKnownConstantLaunchArg(dim); + return val && *val == 1; + }; + + return checkDim(GPUParallelDimAttr::threadXDim(ctx)) && + checkDim(GPUParallelDimAttr::threadYDim(ctx)) && + checkDim(GPUParallelDimAttr::threadZDim(ctx)) && + checkDim(GPUParallelDimAttr::blockXDim(ctx)) && + checkDim(GPUParallelDimAttr::blockYDim(ctx)) && + checkDim(GPUParallelDimAttr::blockZDim(ctx)); +} + +BlockArgument ComputeRegionOp::parDimToWidth(GPUParallelDimAttr parDim) { + for (auto [pos, launchArg] : llvm::enumerate(getLaunchArgs())) { + auto parOp = launchArg.getDefiningOp<ParWidthOp>(); + assert(parOp); + auto launchArgDim = cast<GPUParallelDimAttr>(parOp.getParDim()); + if (launchArgDim == parDim) { + assert(pos < getRegion().front().getNumArguments() && + "launch arg position out of range"); + return getRegion().front().getArgument(pos); + } + } + llvm_unreachable("attempting to get unspecified parDim"); +} + +SmallVector<GPUParallelDimAttr> ComputeRegionOp::getLaunchParDims() { + SmallVector<GPUParallelDimAttr> parDims; + for (auto launchArg : getLaunchArgs()) { + auto parOp = launchArg.getDefiningOp<ParWidthOp>(); + auto launchArgDim = cast<GPUParallelDimAttr>(parOp.getParDim()); + int64_t dimInt = launchArgDim.getValue().getInt(); + parDims.push_back(intToParDim(getContext(), dimInt)); + } + return parDims; +} + +Value ComputeRegionOp::getOperand(BlockArgument blockArg) { + unsigned argNumber = blockArg.getArgNumber(); + unsigned numLaunchArgs = getLaunchArgs().size(); + unsigned numInputArgs = getInputArgs().size(); + assert(argNumber < (numLaunchArgs + numInputArgs) && + "invalid block argument"); + if (argNumber < numLaunchArgs) + return getLaunchArgs()[argNumber]; + return getInputArgs()[argNumber - numLaunchArgs]; +} + +BlockArgument ComputeRegionOp::gpuParWidth(gpu::Processor processor) { + return parDimToWidth(GPUParallelDimAttr::get(getContext(), processor)); +} + +LogicalResult ComputeRegionOp::verify() { + unsigned expectedBlockArgs = getLaunchArgs().size() + getInputArgs().size(); + unsigned actualBlockArgs = getRegion().front().getNumArguments(); + if (expectedBlockArgs != actualBlockArgs) + return emitOpError("expected ") + << expectedBlockArgs << " block arguments (launch + input), got " + << actualBlockArgs; + + return success(); +} + +void ComputeRegionOp::print(OpAsmPrinter &p) { + ValueRange regionArgs = getBody()->getArguments(); + ValueRange launchArgs = getLaunchArgs(); + ValueRange inputArgs = getInputArgs(); + + assert(regionArgs.size() == (launchArgs.size() + inputArgs.size()) && + "region args mismatch"); + + if (getStream()) + p << " stream(" << getStream() << " : " << getStream().getType() << ")"; + + size_t i = 0; + if (!launchArgs.empty()) { + p << " launch("; + for (size_t j = 0; j < launchArgs.size(); ++j, ++i) { + p << regionArgs[i] << " = " << launchArgs[j]; + if (j < launchArgs.size() - 1) + p << ", "; + } + p << ")"; + } + if (!inputArgs.empty()) { + p << " ins("; + for (size_t j = 0; j < inputArgs.size(); ++j, ++i) { + p << regionArgs[i] << " = " << inputArgs[j]; + if (j < inputArgs.size() - 1) + p << ", "; + } + p << ") : ("; + for (size_t j = 0; j < inputArgs.size(); ++j) { + p << inputArgs[j].getType(); + if (j < inputArgs.size() - 1) + p << ", "; + } + p << ")"; + } + p.printOptionalArrowTypeList(getResultTypes()); + p << " "; + p.printRegion(getRegion(), /*printEntryBlockArgs=*/false); + p.printOptionalAttrDict((*this)->getAttrs(), + /*elidedAttrs=*/getOperandSegmentSizeAttr()); +} + +ParseResult ComputeRegionOp::parse(OpAsmParser &parser, + OperationState &result) { + auto &builder = parser.getBuilder(); + + SmallVector<OpAsmParser::Argument> regionArgs; + OpAsmParser::UnresolvedOperand streamOperand; + Type streamType; + SmallVector<OpAsmParser::UnresolvedOperand> launchOperands; + SmallVector<OpAsmParser::UnresolvedOperand> inputOperands; + SmallVector<Type> types; + + bool hasStream = false; + if (succeeded(parser.parseOptionalKeyword("stream"))) { + hasStream = true; + if (parser.parseLParen() || parser.parseOperand(streamOperand) || + parser.parseColon() || parser.parseType(streamType) || + parser.parseRParen()) + return failure(); + } + + if (succeeded(parser.parseOptionalKeyword("launch"))) { + if (parser.parseAssignmentList(regionArgs, launchOperands)) + return failure(); + auto parWidthType = acc::ParWidthType::get(builder.getContext()); + for (size_t i = 0; i < regionArgs.size(); ++i) + types.push_back(parWidthType); + } + + if (succeeded(parser.parseOptionalKeyword("ins"))) { + if (parser.parseAssignmentList(regionArgs, inputOperands) || + parser.parseColon() || parser.parseLParen() || + parser.parseTypeList(types) || parser.parseRParen()) + return failure(); + } + + if (parser.parseOptionalArrowTypeList(result.types)) + return failure(); + + for (auto [iterArg, type] : llvm::zip_equal(regionArgs, types)) + iterArg.type = type; + + Region *body = result.addRegion(); + if (parser.parseRegion(*body, regionArgs)) + return failure(); + + const size_t numLaunchOperands = launchOperands.size(); + const size_t numInputOperands = inputOperands.size(); + assert(numLaunchOperands + numInputOperands == regionArgs.size() && + "compute region args mismatch"); + + result.addAttribute( + ComputeRegionOp::getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr({static_cast<int32_t>(numLaunchOperands), + static_cast<int32_t>(numInputOperands), + hasStream ? 1 : 0})); + + for (size_t i = 0; i < numLaunchOperands; ++i) { + if (parser.resolveOperand(launchOperands[i], types[i], result.operands)) + return failure(); + } + + for (size_t i = numLaunchOperands; i < regionArgs.size(); ++i) { + if (parser.resolveOperand(inputOperands[i - numLaunchOperands], types[i], + result.operands)) + return failure(); + } + + if (hasStream) { + if (parser.resolveOperand(streamOperand, streamType, result.operands)) + return failure(); + } + + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + + return success(); +} + +//===----------------------------------------------------------------------===// // GPUParallelDimAttr //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenACC/Transforms/OffloadLiveInValueCanonicalization.cpp b/mlir/lib/Dialect/OpenACC/Transforms/OffloadLiveInValueCanonicalization.cpp index ea7ee715189e..7dbde227b2fa 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/OffloadLiveInValueCanonicalization.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/OffloadLiveInValueCanonicalization.cpp @@ -135,7 +135,7 @@ static bool isRematerializationCandidate(Value val, // Trace through view-like operations to find the original value. Value origVal = getOriginalValue(val); Operation *definingOp = origVal.getDefiningOp(); - if (!definingOp) + if (!definingOp && !(definingOp = val.getDefiningOp())) return false; LLVM_DEBUG(llvm::dbgs() << "\tChecking candidate: " << *definingOp << "\n"); @@ -181,6 +181,20 @@ static bool isRematerializationCandidate(Value val, } } + // An op implementing both ViewLikeOpInterface and + // OutlineRematerializationOpInterface may have been traced through by + // getOriginalValue. If the traced op is not a candidate, check the direct + // defining op of the live-in value. + if (origVal != val) { + definingOp = val.getDefiningOp(); + if (definingOp && + isa<acc::OutlineRematerializationOpInterface>(definingOp)) { + LLVM_DEBUG(llvm::dbgs() + << "\t\t-> OutlineRematerializationOpInterface (direct)\n"); + return true; + } + } + LLVM_DEBUG(llvm::dbgs() << "\t\t-> not a candidate\n"); return false; } diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index c082600ec27d..4d412dd92e1b 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -618,24 +618,97 @@ DistributeLayoutAttr LayoutAttr::collapseDims(SmallVector<int64_t> dimGroup) { SmallVector<int32_t> laneLayout32(laneLayout.begin(), laneLayout.end()); SmallVector<int32_t> laneData32(laneData.begin(), laneData.end()); + auto toAttr = [&](ArrayRef<int32_t> v) -> DenseI32ArrayAttr { + return v.empty() ? nullptr : DenseI32ArrayAttr::get(getContext(), v); + }; + auto collapsedLayout = xegpu::LayoutAttr::get( - getContext(), - sgLayout32.empty() ? DenseI32ArrayAttr() - : DenseI32ArrayAttr::get(getContext(), sgLayout32), - sgData32.empty() ? DenseI32ArrayAttr() - : DenseI32ArrayAttr::get(getContext(), sgData32), - instData32.empty() ? DenseI32ArrayAttr() - : DenseI32ArrayAttr::get(getContext(), instData32), - laneLayout32.empty() ? DenseI32ArrayAttr() - : DenseI32ArrayAttr::get(getContext(), laneLayout32), - laneData32.empty() ? DenseI32ArrayAttr() - : DenseI32ArrayAttr::get(getContext(), laneData32), - collapsedOrder.empty() - ? DenseI32ArrayAttr() - : DenseI32ArrayAttr::get(getContext(), collapsedOrder)); + getContext(), toAttr(sgLayout32), toAttr(sgData32), toAttr(instData32), + toAttr(laneLayout32), toAttr(laneData32), toAttr(collapsedOrder)); return collapsedLayout; } +// Derive a new layout by transpose the layout using `permutation`. +DistributeLayoutAttr LayoutAttr::transposeDims(ArrayRef<int64_t> permutation) { + + SmallVector<int64_t> origSgLayout = getEffectiveSgLayoutAsInt(); + SmallVector<int64_t> origSgData = getEffectiveSgDataAsInt(); + SmallVector<int64_t> origInstData = getEffectiveInstDataAsInt(); + SmallVector<int64_t> origLaneLayout = getEffectiveLaneLayoutAsInt(); + SmallVector<int64_t> origLaneData = getEffectiveLaneDataAsInt(); + SmallVector<int64_t> origOrder = getEffectiveOrderAsInt(); + + SmallVector<int32_t> sgLayout; + SmallVector<int32_t> sgData; + SmallVector<int32_t> instData; + SmallVector<int32_t> laneLayout; + SmallVector<int32_t> laneData; + SmallVector<int32_t> order; + + for (int64_t idx : permutation) { + if (!origLaneLayout.empty()) { + laneLayout.push_back(static_cast<int32_t>(origLaneLayout[idx])); + laneData.push_back(static_cast<int32_t>(origLaneData[idx])); + } + if (!origInstData.empty()) + instData.push_back(static_cast<int32_t>(origInstData[idx])); + if (!origSgLayout.empty()) { + sgLayout.push_back(static_cast<int32_t>(origSgLayout[idx])); + sgData.push_back(static_cast<int32_t>(origSgData[idx])); + } + order.push_back(static_cast<int32_t>(origOrder[idx])); + } + if (origLaneLayout.empty() && origSgLayout.empty()) + order.clear(); + + auto toAttr = [&](ArrayRef<int32_t> v) -> DenseI32ArrayAttr { + return v.empty() ? nullptr : DenseI32ArrayAttr::get(getContext(), v); + }; + return xegpu::LayoutAttr::get(getContext(), toAttr(sgLayout), toAttr(sgData), + toAttr(instData), toAttr(laneLayout), + toAttr(laneData), toAttr(order)); +} + +/// Check if this layout is a transpose of another layout. +bool LayoutAttr::isTransposeOf(const xegpu::DistributeLayoutAttr &other, + ArrayRef<int64_t> perm, + const xegpu::LayoutKind kind) { + if (!other) + return false; + if (getRank() != other.getRank() || + perm.size() != static_cast<size_t>(getRank())) + return false; + if (!isPermutationVector(perm)) + return false; + auto checkTranspose = [](ArrayRef<int64_t> dst, ArrayRef<int64_t> src, + ArrayRef<int64_t> perm) { + for (const auto &ta : llvm::enumerate(perm)) { + if (src[ta.index()] != dst[ta.value()]) + return false; + } + return true; + }; + if (kind == xegpu::LayoutKind::Subgroup) + return checkTranspose(getEffectiveSgLayoutAsInt(), + other.getEffectiveSgLayoutAsInt(), perm) && + checkTranspose(getEffectiveSgDataAsInt(), + other.getEffectiveSgDataAsInt(), perm) && + checkTranspose(getEffectiveOrderAsInt(), + other.getEffectiveOrderAsInt(), perm); + if (kind == xegpu::LayoutKind::InstData) + return checkTranspose(getEffectiveInstDataAsInt(), + other.getEffectiveInstDataAsInt(), perm); + if (kind == xegpu::LayoutKind::Lane) + return checkTranspose(getEffectiveLaneLayoutAsInt(), + other.getEffectiveLaneLayoutAsInt(), perm) && + checkTranspose(getEffectiveLaneDataAsInt(), + other.getEffectiveLaneDataAsInt(), perm) && + checkTranspose(getEffectiveOrderAsInt(), + other.getEffectiveOrderAsInt(), perm); + + return false; +} + //===----------------------------------------------------------------------===// // XeGPU_SliceAttr //===----------------------------------------------------------------------===// @@ -881,6 +954,62 @@ DistributeLayoutAttr SliceAttr::collapseDims(SmallVector<int64_t> dimGroup) { DenseI64ArrayAttr::get(getContext(), sliceDims)); } +SmallVector<int64_t> getPermForParentLayout(ArrayRef<int64_t> sliceDims, + ArrayRef<int64_t> permutation) { + SmallVector<int64_t> sortedSliceDims = llvm::to_vector(sliceDims); + llvm::sort(sortedSliceDims); + + for (size_t i = 1; i < sortedSliceDims.size(); ++i) { + assert((sortedSliceDims[i] == sortedSliceDims[i - 1] + 1) && + "slice dims non consecutive, cannot be transposed"); + } + + SmallVector<int64_t> permForParent; + if (sortedSliceDims.front() == 0) { + // Example: sliceDims.size() = 2, permutation= {1, 0} + // result: {3, 2, 1, 0}. + for (int64_t dim : permutation) + permForParent.push_back(dim + sortedSliceDims.size()); + for (int64_t i = sortedSliceDims.size() - 1; i >= 0; --i) + permForParent.push_back(i); + } else { + // Example: sliceDims.size() = 2, permutation = {0, 1} + // result: {3, 2, 0, 1}. + for (int64_t i = sortedSliceDims.size() - 1; i >= 0; --i) + permForParent.push_back(i + permutation.size()); + for (int64_t dim : permutation) + permForParent.push_back(dim); + } + return permForParent; +} + +// Derive a new layout by transpose the layout using `permutation`. +DistributeLayoutAttr SliceAttr::transposeDims(ArrayRef<int64_t> permutation) { + SmallVector<int64_t> sliceDims = llvm::to_vector(getDims().asArrayRef()); + DistributeLayoutAttr parent = getParent(); + SmallVector<int64_t> permForParent = + getPermForParentLayout(sliceDims, permutation); + auto transposedParent = parent.transposeDims(permForParent); + return SliceAttr::get(getContext(), transposedParent, + DenseI64ArrayAttr::get(getContext(), sliceDims)); +} + +/// Check if this layout is a transpose of another layout. +bool SliceAttr::isTransposeOf(const xegpu::DistributeLayoutAttr &other, + ArrayRef<int64_t> perm, + const xegpu::LayoutKind kind) { + // other must be a SliceAttr with the same slice dims. + auto otherSlice = dyn_cast<xegpu::SliceAttr>(other); + if (!otherSlice || getDims() != otherSlice.getDims()) + return false; + // check whether the parent layout is transpose of each other. + SmallVector<int64_t> sliceDims = llvm::to_vector(getDims().asArrayRef()); + DistributeLayoutAttr parent = getParent(); + SmallVector<int64_t> permForParent = getPermForParentLayout(sliceDims, perm); + auto otherParent = otherSlice.getParent(); + return parent.isTransposeOf(otherParent, permForParent, kind); +} + //===----------------------------------------------------------------------===// // XeGPU_RangeAttr //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp index 7aa186bb2222..432886db29d2 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp @@ -178,6 +178,14 @@ xegpu::inferMultiReductionSourceLayout(xegpu::DistributeLayoutAttr resLayout, return sliceLayout.getParent(); } +/// Infers the source layout attribute for a transpose operation given the +/// result layout attribute and permutation. +xegpu::DistributeLayoutAttr +xegpu::inferTransposeSourceLayout(xegpu::DistributeLayoutAttr resLayout, + ArrayRef<int64_t> permutation) { + return resLayout.transposeDims(permutation); +} + /// Infers the source layout attribute for a bitcast operation given the /// result layout attribute, result element type bitwidth, and source element /// type bitwidth. @@ -1144,6 +1152,16 @@ xegpu::DistributeLayoutAttr xegpu::getConsumerLayoutAt(OpOperand &operand) { if (idx == 1) return resLayout; } + + // For vector::TransposeOp, infer source layout from result layout using + // permutation. + if (auto transpose = dyn_cast<vector::TransposeOp>(op)) { + if (!resLayout) + return xegpu::DistributeLayoutAttr(); + return xegpu::inferTransposeSourceLayout(resLayout, + transpose.getPermutation()); + } + // For elementwise operations, all operands must have the same layout as the // result. if (OpTrait::hasElementwiseMappableTraits(op) && op->getNumResults() == 1) { diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp index 7f7e8d6ad773..ab8f7e768ec1 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp @@ -912,9 +912,12 @@ void LayoutInfoPropagation::visitTransposeOp( LayoutInfo resultLayout = results[0]->getValue(); if (!resultLayout.isAssigned()) return; - LayoutInfo newLayout = resultLayout.transpose(transpose.getPermutation()); + auto consumerLayoutAttr = + dyn_cast<xegpu::DistributeLayoutAttr>(resultLayout.get()); + auto srcLayoutAttr = xegpu::inferTransposeSourceLayout( + consumerLayoutAttr, transpose.getPermutation()); // Propagate the new layout to the vector operand. - propagateIfChanged(operands[0], operands[0]->meet(newLayout)); + propagateIfChanged(operands[0], operands[0]->meet(LayoutInfo(srcLayoutAttr))); } /// For vector::BitCastOp, the lane_data of the source layout is changed based diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp index bf9fded8a3ab..38bc95d39c2c 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp @@ -1963,7 +1963,8 @@ struct VectorTransposeDistribution final : public gpu::WarpDistributionPattern { "does not have 2D layout"); ArrayRef<int64_t> perm = transposeOp.getPermutation(); // Result layout must be a transpose of source layout. - if (!resultLayout.isTransposeOf(sourceLayout, perm)) + if (!resultLayout.isTransposeOf(sourceLayout, perm, + xegpu::LayoutKind::Lane)) return rewriter.notifyMatchFailure( transposeOp, "the source or result vector layouts must be 2D transposes of each " diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 30e4a956a0ad..139a30e76854 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -1532,7 +1532,8 @@ struct WgToSgVectorTransposeOp // Check that sgLayout, sgData & order are properly transposed for source // and result - if (!layout.isTransposeOf(sourceLayout, permutation)) + if (!layout.isTransposeOf(sourceLayout, permutation, + xegpu::LayoutKind::Subgroup)) return rewriter.notifyMatchFailure( op, "Result layout is not a valid transpose of source layout " "according to permutation"); @@ -1540,13 +1541,13 @@ struct WgToSgVectorTransposeOp SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first; VectorType newResultType = VectorType::get(sgShape, resultType.getElementType()); + SmallVector<Value> newTransposeOps; for (auto src : adaptor.getVector()) { auto newTranspose = vector::TransposeOp::create( rewriter, op.getLoc(), newResultType, src, permutation); newTransposeOps.push_back(newTranspose.getResult()); } - rewriter.replaceOpWithMultiple(op, {newTransposeOps}); return success(); } diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index 2845df23293d..47069906fa11 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -160,58 +160,6 @@ func.func @index_castui_nneg_not_set(%arg0: i1) { // ----- -// index_cast exact on truncation lowers to trunc nsw (signed semantics). -// CHECK-LABEL: @index_cast_exact_trunc -func.func @index_cast_exact_trunc(%arg0: index) { -// CHECK: llvm.trunc %{{.*}} overflow<nsw> : i{{.*}} to i1 - %0 = arith.index_cast %arg0 exact : index to i1 - return -} - -// ----- - -// index_cast exact on widening: exact is vacuously true, sext has no flag. -// CHECK-LABEL: @index_cast_exact_ext -func.func @index_cast_exact_ext(%arg0: i1) { -// CHECK: llvm.sext %{{.*}} : i1 to i{{.*}} -// CHECK-NOT: nsw - %0 = arith.index_cast %arg0 exact : i1 to index - return -} - -// ----- - -// index_castui exact on truncation lowers to trunc nuw (unsigned semantics). -// CHECK-LABEL: @index_castui_exact_trunc -func.func @index_castui_exact_trunc(%arg0: index) { -// CHECK: llvm.trunc %{{.*}} overflow<nuw> : i{{.*}} to i1 - %0 = arith.index_castui %arg0 exact : index to i1 - return -} - -// ----- - -// index_castui nneg exact on truncation lowers to trunc nuw nsw. -// CHECK-LABEL: @index_castui_nneg_exact_trunc -func.func @index_castui_nneg_exact_trunc(%arg0: index) { -// CHECK: llvm.trunc %{{.*}} overflow<nsw, nuw> : i{{.*}} to i1 - %0 = arith.index_castui %arg0 nneg exact : index to i1 - return -} - -// ----- - -// index_castui exact on widening: exact is vacuously true, zext has no flag. -// CHECK-LABEL: @index_castui_exact_ext -func.func @index_castui_exact_ext(%arg0: i1) { -// CHECK: llvm.zext %{{.*}} : i1 to i{{.*}} -// CHECK-NOT: nuw - %0 = arith.index_castui %arg0 exact : i1 to index - return -} - -// ----- - // Checking conversion of signed integer types to floating point. // CHECK-LABEL: @sitofp func.func @sitofp(%arg0 : i32, %arg1 : i64) { diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index 326afcae696c..035c10e78bf9 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -588,15 +588,6 @@ func.func @indexCastOfSignExtend(%arg0: i8) -> index { return %idx : index } -// CHECK-LABEL: @indexCastOfSignExtend_exact -// CHECK: %[[res:.+]] = arith.index_cast %arg0 exact : i8 to index -// CHECK: return %[[res]] -func.func @indexCastOfSignExtend_exact(%arg0: i8) -> index { - %ext = arith.extsi %arg0 : i8 to i16 - %idx = arith.index_cast %ext exact : i16 to index - return %idx : index -} - // CHECK-LABEL: @indexCastUIOfUnsignedExtend // CHECK: %[[res:.+]] = arith.index_castui %arg0 : i8 to index // CHECK: return %[[res]] @@ -625,61 +616,6 @@ func.func @indexCastUIOfUnsignedExtend_nneg_on_castui(%arg0: i8) -> index { return %idx : index } -// CHECK-LABEL: @indexCastUIOfUnsignedExtend_exact -// CHECK: %[[res:.+]] = arith.index_castui %arg0 exact : i8 to index -// CHECK: return %[[res]] -func.func @indexCastUIOfUnsignedExtend_exact(%arg0: i8) -> index { - %ext = arith.extui %arg0 : i8 to i16 - %idx = arith.index_castui %ext exact : i16 to index - return %idx : index -} - -// CHECK-LABEL: @indexCastUIOfUnsignedExtend_nneg_exact -// CHECK: %[[res:.+]] = arith.index_castui %arg0 exact nneg : i8 to index -// CHECK: return %[[res]] -func.func @indexCastUIOfUnsignedExtend_nneg_exact(%arg0: i8) -> index { - %ext = arith.extui %arg0 nneg : i8 to i16 - %idx = arith.index_castui %ext exact : i16 to index - return %idx : index -} - -// index_castui(index_castui(x)) -> x only when exact is on the inner cast. -// CHECK-LABEL: @indexCastUIOfIndexCastUI_no_exact -// CHECK: arith.index_castui -// CHECK: arith.index_castui -func.func @indexCastUIOfIndexCastUI_no_exact(%arg0: i32) -> i32 { - %idx = arith.index_castui %arg0 : i32 to index - %res = arith.index_castui %idx : index to i32 - return %res : i32 -} - -// CHECK-LABEL: @indexCastUIOfIndexCastUI_exact_inner -// CHECK: return %arg0 : i32 -func.func @indexCastUIOfIndexCastUI_exact_inner(%arg0: i32) -> i32 { - %idx = arith.index_castui %arg0 exact : i32 to index - %res = arith.index_castui %idx : index to i32 - return %res : i32 -} - -// exact on outer only does NOT trigger the fold (outer exact on widening -// is vacuously true and does not guarantee the inner truncation is lossless). -// CHECK-LABEL: @indexCastUIOfIndexCastUI_exact_outer -// CHECK: arith.index_castui -// CHECK: arith.index_castui -func.func @indexCastUIOfIndexCastUI_exact_outer(%arg0: i32) -> i32 { - %idx = arith.index_castui %arg0 : i32 to index - %res = arith.index_castui %idx exact : index to i32 - return %res : i32 -} - -// CHECK-LABEL: @indexCastUIOfIndexCastUI_exact_both -// CHECK: return %arg0 : i32 -func.func @indexCastUIOfIndexCastUI_exact_both(%arg0: i32) -> i32 { - %idx = arith.index_castui %arg0 exact : i32 to index - %res = arith.index_castui %idx exact : index to i32 - return %res : i32 -} - // CHECK-LABEL: @indexCastFold // CHECK: %[[res:.*]] = arith.constant -2 : index // CHECK: return %[[res]] diff --git a/mlir/test/Dialect/Arith/ops.mlir b/mlir/test/Dialect/Arith/ops.mlir index a9eabe97ebfc..9765db69d6dd 100644 --- a/mlir/test/Dialect/Arith/ops.mlir +++ b/mlir/test/Dialect/Arith/ops.mlir @@ -909,20 +909,6 @@ func.func @test_index_cast_scalable_vector1(%arg0 : vector<[8]xindex>) -> vector return %0 : vector<[8]xi64> } -// CHECK-LABEL: test_index_cast_exact -// CHECK: arith.index_cast %{{.*}} exact : i32 to index -func.func @test_index_cast_exact(%arg0 : i32) -> index { - %0 = arith.index_cast %arg0 exact : i32 to index - return %0 : index -} - -// CHECK-LABEL: test_index_cast_exact_vector -// CHECK: arith.index_cast %{{.*}} exact : vector<8xi32> to vector<8xindex> -func.func @test_index_cast_exact_vector(%arg0 : vector<8xi32>) -> vector<8xindex> { - %0 = arith.index_cast %arg0 exact : vector<8xi32> to vector<8xindex> - return %0 : vector<8xindex> -} - // CHECK-LABEL: test_index_castui0 func.func @test_index_castui0(%arg0 : i32) -> index { %0 = arith.index_castui %arg0 : i32 to index @@ -985,20 +971,6 @@ func.func @test_index_castui_nneg_vector(%arg0 : vector<8xi32>) -> vector<8xinde return %0 : vector<8xindex> } -// CHECK-LABEL: test_index_castui_exact -// CHECK: arith.index_castui %{{.*}} exact : i32 to index -func.func @test_index_castui_exact(%arg0 : i32) -> index { - %0 = arith.index_castui %arg0 exact : i32 to index - return %0 : index -} - -// CHECK-LABEL: test_index_castui_nneg_exact -// CHECK: arith.index_castui %{{.*}} exact nneg : i32 to index -func.func @test_index_castui_nneg_exact(%arg0 : i32) -> index { - %0 = arith.index_castui %arg0 nneg exact : i32 to index - return %0 : index -} - // CHECK-LABEL: test_bitcast0 func.func @test_bitcast0(%arg0 : i64) -> f64 { %0 = arith.bitcast %arg0 : i64 to f64 diff --git a/mlir/test/Dialect/OpenACC/invalid-cg.mlir b/mlir/test/Dialect/OpenACC/invalid-cg.mlir index bc2408ceafe8..f788e6c03bcc 100644 --- a/mlir/test/Dialect/OpenACC/invalid-cg.mlir +++ b/mlir/test/Dialect/OpenACC/invalid-cg.mlir @@ -19,3 +19,23 @@ scf.parallel (%iv) = (%c0_2) to (%c4_2) step (%c1_2) { scf.reduce // expected-error@+1 {{expected one of ::mlir::gpu::Processor enum names}} } {acc.par_dims = #acc<par_dims[gang]>} + +// ----- + +// expected-note@+1 {{prior use here}} +%c32 = arith.constant 32 : index +// expected-error@+1 {{use of value '%c32' expects different type than prior uses: '!acc.par_width' vs 'index'}} +acc.compute_region launch(%arg0 = %c32) { + acc.yield +} {origin = "acc.parallel"} + +// ----- + +// Use generic form to introduce an extra block argument. +%c64 = arith.constant 64 : index +%w = acc.par_width %c64 {par_dim = #acc.par_dim<thread_x>} +// expected-error@+1 {{'acc.compute_region' op expected 1 block arguments (launch + input), got 2}} +"acc.compute_region"(%w) <{operandSegmentSizes = array<i32: 1, 0, 0>}> ({ +^bb0(%arg0: index, %extra: index): + "acc.yield"() : () -> () +}) {origin = "acc.parallel"} : (!acc.par_width) -> () diff --git a/mlir/test/Dialect/OpenACC/ops-cg.mlir b/mlir/test/Dialect/OpenACC/ops-cg.mlir index e6453da21ed7..7a61261d97ba 100644 --- a/mlir/test/Dialect/OpenACC/ops-cg.mlir +++ b/mlir/test/Dialect/OpenACC/ops-cg.mlir @@ -77,3 +77,197 @@ func.func @par_dims_2d_grid() { return } // CHECK: acc.par_dims = #acc<par_dims[block_y, thread_y]> + +// ----- + +// CHECK-LABEL: func @compute_region_single_dim +func.func @compute_region_single_dim(%data: memref<1024xf32>, + %result: memref<f32>) { + %c128 = arith.constant 128 : index + %copyin = acc.copyin varPtr(%data : memref<1024xf32>) -> memref<1024xf32> + %copy = acc.copyin varPtr(%result : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copy>} + acc.kernel_environment dataOperands(%copyin, %copy : memref<1024xf32>, memref<f32>) { + %w0 = acc.par_width %c128 {par_dim = #acc.par_dim<thread_x>} + acc.compute_region launch(%arg0 = %w0) + ins(%arg1 = %copyin, %arg2 = %copy) : (memref<1024xf32>, memref<f32>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c128_inner = arith.constant 128 : index + %cst = arith.constant 0.000000e+00 : f32 + memref.store %cst, %arg2[] : memref<f32> + scf.parallel (%iv) = (%c0) to (%c128_inner) step (%c1) { + %val = memref.load %arg1[%iv] : memref<1024xf32> + %cur = memref.load %arg2[] : memref<f32> + %sum = arith.addf %cur, %val : f32 + memref.store %sum, %arg2[] : memref<f32> + scf.reduce + } {acc.par_dims = #acc<par_dims[thread_x]>} + acc.yield + } {origin = "acc.parallel"} + } + acc.copyout accPtr(%copy : memref<f32>) to varPtr(%result : memref<f32>) {dataClause = #acc<data_clause acc_copy>} + acc.delete accPtr(%copyin : memref<1024xf32>) + return +} +// CHECK: %[[W:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<thread_x>} +// CHECK: acc.compute_region launch(%{{.*}} = %[[W]]) ins({{.*}}) : (memref<1024xf32>, memref<f32>) { +// CHECK: acc.yield +// CHECK: } {origin = "acc.parallel"} + +// ----- + +// CHECK-LABEL: func @compute_region_two_dims +func.func @compute_region_two_dims(%data: memref<8xi32>, + %reduction_var: memref<i32>) { + %c8 = arith.constant 8 : index + %c128 = arith.constant 128 : index + %copyin_data = acc.copyin varPtr(%data : memref<8xi32>) -> memref<8xi32> + %copyin_red = acc.copyin varPtr(%reduction_var : memref<i32>) -> memref<i32> {dataClause = #acc<data_clause acc_reduction>} + acc.kernel_environment dataOperands(%copyin_data, %copyin_red : memref<8xi32>, memref<i32>) { + %w0 = acc.par_width %c8 {par_dim = #acc.par_dim<block_x>} + %w1 = acc.par_width %c128 {par_dim = #acc.par_dim<thread_x>} + acc.compute_region launch(%arg0 = %w0, %arg1 = %w1) + ins(%arg2 = %copyin_data, %arg3 = %copyin_red) : (memref<8xi32>, memref<i32>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8_inner = arith.constant 8 : index + %c0_i32 = arith.constant 0 : i32 + %init = acc.reduction_init %arg3 <add> : memref<i32> { + %alloca = memref.alloca() : memref<i32> + memref.store %c0_i32, %alloca[] : memref<i32> + acc.yield %alloca : memref<i32> + } + scf.parallel (%iv) = (%c0) to (%c8_inner) step (%c1) { + %v = memref.load %arg2[%iv] : memref<8xi32> + %cur = memref.load %init[] : memref<i32> + %sum = arith.addi %cur, %v : i32 + memref.store %sum, %init[] : memref<i32> + scf.reduce + } {acc.par_dims = #acc<par_dims[block_x, thread_x]>} + acc.reduction_combine %init into %arg3 <add> : memref<i32> + acc.yield + } {origin = "acc.parallel"} + } + acc.copyout accPtr(%copyin_red : memref<i32>) to varPtr(%reduction_var : memref<i32>) {dataClause = #acc<data_clause acc_reduction>} + acc.delete accPtr(%copyin_data : memref<8xi32>) + return +} +// CHECK: %[[W0:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<block_x>} +// CHECK: %[[W1:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<thread_x>} +// CHECK: acc.compute_region launch(%{{.*}} = %[[W0]], %{{.*}} = %[[W1]]) ins({{.*}}) : (memref<8xi32>, memref<i32>) { +// CHECK: acc.yield +// CHECK: } {origin = "acc.parallel"} + +// ----- + +// CHECK-LABEL: func @compute_region_unknown_width +func.func @compute_region_unknown_width(%data: memref<100xf32>) { + %copyin = acc.copyin varPtr(%data : memref<100xf32>) -> memref<100xf32> + acc.kernel_environment dataOperands(%copyin : memref<100xf32>) { + %w0 = acc.par_width {par_dim = #acc.par_dim<thread_x>} + acc.compute_region launch(%arg0 = %w0) + ins(%arg1 = %copyin) : (memref<100xf32>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c100 = arith.constant 100 : index + scf.parallel (%iv) = (%c0) to (%c100) step (%c1) { + scf.reduce + } {acc.par_dims = #acc<par_dims[thread_x]>} + acc.yield + } {origin = "acc.kernels"} + } + acc.delete accPtr(%copyin : memref<100xf32>) + return +} +// CHECK: %[[W:.*]] = acc.par_width {par_dim = #acc.par_dim<thread_x>} +// CHECK: acc.compute_region launch(%{{.*}} = %[[W]]) ins({{.*}}) : (memref<100xf32>) { +// CHECK: acc.yield +// CHECK: } {origin = "acc.kernels"} + +// ----- + +// CHECK-LABEL: func @compute_region_no_launch +func.func @compute_region_no_launch(%a: memref<i32>, %b: memref<i32>) { + %copy_a = acc.copyin varPtr(%a : memref<i32>) -> memref<i32> {dataClause = #acc<data_clause acc_copy>} + %copy_b = acc.copyin varPtr(%b : memref<i32>) -> memref<i32> {dataClause = #acc<data_clause acc_copy>} + acc.kernel_environment dataOperands(%copy_a, %copy_b : memref<i32>, memref<i32>) { + acc.compute_region + ins(%arg0 = %copy_a, %arg1 = %copy_b) : (memref<i32>, memref<i32>) { + %c1 = arith.constant 1 : i32 + memref.store %c1, %arg0[] : memref<i32> + memref.store %c1, %arg1[] : memref<i32> + acc.yield + } {origin = "acc.serial"} + } + acc.copyout accPtr(%copy_a : memref<i32>) to varPtr(%a : memref<i32>) {dataClause = #acc<data_clause acc_copy>} + acc.copyout accPtr(%copy_b : memref<i32>) to varPtr(%b : memref<i32>) {dataClause = #acc<data_clause acc_copy>} + return +} +// CHECK: acc.compute_region ins({{.*}}) : (memref<i32>, memref<i32>) { +// CHECK: acc.yield +// CHECK: } {origin = "acc.serial"} + +// ----- + +// CHECK-LABEL: func @compute_region_launch_only +func.func @compute_region_launch_only() { + %c32 = arith.constant 32 : index + %w0 = acc.par_width %c32 {par_dim = #acc.par_dim<thread_x>} + acc.compute_region launch(%arg0 = %w0) { + acc.yield + } {origin = "acc.parallel"} + return +} +// CHECK: %[[W:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<thread_x>} +// CHECK: acc.compute_region launch(%{{.*}} = %[[W]]) { +// CHECK: acc.yield +// CHECK: } {origin = "acc.parallel"} + +// ----- + +// CHECK-LABEL: func @compute_region_all_fields +// CHECK-SAME: (%{{.*}}: memref<1024xf32>, %[[STREAM:.*]]: !gpu.async.token) +func.func @compute_region_all_fields(%data: memref<1024xf32>, + %stream: !gpu.async.token) { + %c128 = arith.constant 128 : index + %c8 = arith.constant 8 : index + %copyin = acc.copyin varPtr(%data : memref<1024xf32>) -> memref<1024xf32> + acc.kernel_environment dataOperands(%copyin : memref<1024xf32>) { + %w0 = acc.par_width %c8 {par_dim = #acc.par_dim<block_x>} + %w1 = acc.par_width %c128 {par_dim = #acc.par_dim<thread_x>} + acc.compute_region stream(%stream : !gpu.async.token) + launch(%arg0 = %w0, %arg1 = %w1) + ins(%arg2 = %copyin) : (memref<1024xf32>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c1024 = arith.constant 1024 : index + scf.parallel (%iv) = (%c0) to (%c1024) step (%c1) { + scf.reduce + } {acc.par_dims = #acc<par_dims[block_x, thread_x]>} + acc.yield + } {kernel_func_name = @compute_kernel, kernel_module_name = @device_module, origin = "acc.parallel"} + } + acc.delete accPtr(%copyin : memref<1024xf32>) + return +} +// CHECK: %[[W0:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<block_x>} +// CHECK: %[[W1:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<thread_x>} +// CHECK: acc.compute_region stream(%[[STREAM]] : !gpu.async.token) launch(%{{.*}} = %[[W0]], %{{.*}} = %[[W1]]) ins({{.*}}) : (memref<1024xf32>) { +// CHECK: acc.yield +// CHECK: } {kernel_func_name = @compute_kernel, kernel_module_name = @device_module, origin = "acc.parallel"} + +// ----- + +// CHECK-LABEL: func @compute_region_with_results +func.func @compute_region_with_results() -> i32 { + %w0 = acc.par_width {par_dim = #acc.par_dim<thread_x>} + %0 = acc.compute_region launch(%arg0 = %w0) -> i32 { + %c0_i32 = arith.constant 0 : i32 + acc.yield %c0_i32 : i32 + } {origin = "acc.parallel"} + return %0 : i32 +} +// CHECK: %[[W:.*]] = acc.par_width {par_dim = #acc.par_dim<thread_x>} +// CHECK: {{.*}} = acc.compute_region launch(%{{.*}} = %[[W]]) -> i32 { +// CHECK: acc.yield +// CHECK: } {origin = "acc.parallel"} diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir index c073045691f5..ffbe95b2a6f8 100644 --- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir +++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir @@ -58,12 +58,12 @@ gpu.module @test { gpu.func @vector_transpose(%src: memref<256x128xf32>, %src1: memref<128x256xf32>) kernel attributes {known_block_size = array<i32: 1, 32, 16>} { // CHECK: %[[TDESC_LD:.*]] = xegpu.create_nd_tdesc %[[ARG_0]] : memref<256x128xf32> -> - // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>> + // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], order = [0, 1]>> // CHECK: %[[TDESC_ST:.*]] = xegpu.create_nd_tdesc %[[ARG_1]] : memref<128x256xf32> -> // CHECK-SAME: !xegpu.tensor_desc<128x256xf32, #xegpu.layout<sg_layout = [4, 8], sg_data = [32, 32]>> - // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC_LD]][0, 0] <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>}> : - // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>> -> vector<256x128xf32> + // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC_LD]][0, 0] <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], order = [0, 1]>}> : + // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], order = [0, 1]>> -> vector<256x128xf32> // CHECK: %[[TRANSPOSED:.*]] = vector.transpose %2, [1, 0] // CHECK-SAME {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [32, 32]>} : vector<256x128xf32> to vector<128x256xf32> diff --git a/mlir/test/Dialect/XeGPU/propagate-layout.mlir b/mlir/test/Dialect/XeGPU/propagate-layout.mlir index 4f2349a89b1e..3253d0004caf 100644 --- a/mlir/test/Dialect/XeGPU/propagate-layout.mlir +++ b/mlir/test/Dialect/XeGPU/propagate-layout.mlir @@ -278,9 +278,9 @@ func.func @vector_bitcast_i16_to_f16(%arg0: memref<8x16xi16>, %arg1: memref<16x1 // ----- gpu.module @test { // CHECK-LABEL: func.func @vector_bitcast_i32_to_f16( -// CHECK: %[[LOAD:.*]] = xegpu.load_nd %{{.*}} <{layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}> -// CHECK-SAME: !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>> -> vector<16x8xi32> -// CHECK-NEXT: %{{.*}} = vector.bitcast %[[LOAD]] {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>} +// CHECK: %[[LOAD:.*]] = xegpu.load_nd %{{.*}} <{layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>}> +// CHECK-SAME: !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>> -> vector<16x8xi32> +// CHECK-NEXT: %{{.*}} = vector.bitcast %[[LOAD]] {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2], order = [0, 1]>} // CHECK-SAME: vector<16x8xi32> to vector<16x16xf16> func.func @vector_bitcast_i32_to_f16(%arg0: memref<8x16xf16>, %arg1: memref<16x8xi32>, %arg2: memref<8x16xf32>) { %c0 = arith.constant 0 : index diff --git a/mlir/test/Integration/Dialect/XeVM/GPU/gpu_printf.mlir b/mlir/test/Integration/Dialect/XeVM/GPU/gpu_printf.mlir index 5ed2148a7258..d5f2a2358f1d 100644 --- a/mlir/test/Integration/Dialect/XeVM/GPU/gpu_printf.mlir +++ b/mlir/test/Integration/Dialect/XeVM/GPU/gpu_printf.mlir @@ -9,6 +9,11 @@ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s +// SPIR-V backend generates incorrect printf ops after +// https://github.com/llvm/llvm-project/pull/178980 changed the way variadic arguments. +// are handled. Test is expected to fail until the issue is resolved. + +// XFAIL: * module @test attributes {gpu.container_module} { gpu.module @test_module { gpu.func @test_printf(%arg0: i32, %arg1: f32) kernel { diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 3930575c45b3..8e02c06a0a29 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -895,7 +895,7 @@ func.func @subview(%arg0 : index, %arg1 : index) -> (index, index) { // CHECK-LABEL: func @index_cast // CHECK-SAME: %[[ARG_0:arg[0-9]+]]: i16 func.func @index_cast(%arg0: i16) -> (i16) { - %11 = arith.index_cast %arg0 exact : i16 to index + %11 = arith.index_cast %arg0 : i16 to index %12 = arith.index_cast %11 : index to i16 // CHECK: return %[[ARG_0]] : i16 return %12 : i16 diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 87d1d88e971b..31a229d94a18 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -3916,6 +3916,17 @@ libc_support_library( ) libc_support_library( + name = "__support_math_ffmaf128", + hdrs = ["src/__support/math/ffmaf128.h"], + deps = [ + ":__support_common", + ":__support_fputil_fma", + ":__support_macros_config", + ":llvm_libc_types_float128", + ], +) + +libc_support_library( name = "__support_math_floor", hdrs = ["src/__support/math/floor.h"], deps = [ @@ -6694,7 +6705,7 @@ libc_math_function( libc_math_function( name = "ffmaf128", additional_deps = [ - ":__support_fputil_fma", + ":__support_math_ffmaf128", ], ) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 9e20ce1cb3ee..a8455a61c95b 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -10366,6 +10366,7 @@ cc_library( ":BytecodeOpInterface", ":ControlFlowInterfaces", ":DataLayoutInterfaces", + ":DialectUtils", ":GPUDialect", ":IR", ":LLVMDialect", |
