aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVitaly Buka <vitalybuka@google.com>2026-03-05 17:18:24 -0800
committerVitaly Buka <vitalybuka@google.com>2026-03-05 17:18:24 -0800
commit2d67bf9405268cb08d70b7724e9c01d79a7dc839 (patch)
tree87709c1d59b9c8440165b384ca460bcf0c78dda1
parent68708c61f8cb9028614772c22141a6312fe1aee7 (diff)
parentcf8004bdee91cdbf6473a2c2d4dd992e5d5f8a92 (diff)
downloadllvm-users/vitalybuka/spr/libcstring-add-constexpr-initialization-stress-test.tar.gz
llvm-users/vitalybuka/spr/libcstring-add-constexpr-initialization-stress-test.tar.bz2
llvm-users/vitalybuka/spr/libcstring-add-constexpr-initialization-stress-test.zip
Created using spr 1.3.7
-rw-r--r--clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt2
-rw-r--r--clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp5
-rw-r--r--clang-tools-extra/clang-tidy/portability/CMakeLists.txt1
-rw-r--r--clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.cpp (renamed from clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp)4
-rw-r--r--clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.h (renamed from clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h)12
-rw-r--r--clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp2
-rw-r--r--clang-tools-extra/docs/ReleaseNotes.rst5
-rw-r--r--clang-tools-extra/docs/clang-tidy/checks/hicpp/no-assembler.rst10
-rw-r--r--clang-tools-extra/docs/clang-tidy/checks/list.rst3
-rw-r--r--clang-tools-extra/docs/clang-tidy/checks/portability/no-assembler.rst12
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/portability/no-assembler.cpp (renamed from clang-tools-extra/test/clang-tidy/checkers/hicpp/no-assembler.cpp)8
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-exception-variable-names.cpp22
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-loop-counter-names.cpp14
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-parameter-names.cpp11
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-exception-name-length.cpp16
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-loop-counter-name-length.cpp14
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-parameter-name-length.cpp9
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-variable-name-length.cpp11
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/magic-numbers-ignore-all-float.cpp8
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/qualified-auto-add-const-to-qualified.cpp12
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses-allowed-decls.cpp16
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument-option.cpp11
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/use-std-min-max-include-style.cpp13
-rw-r--r--clang/include/clang/AST/ASTNodeTraverser.h4
-rw-r--r--clang/include/clang/AST/RecursiveASTVisitor.h8
-rw-r--r--clang/include/clang/AST/StmtSYCL.h86
-rw-r--r--clang/include/clang/Basic/AttrDocs.td232
-rw-r--r--clang/include/clang/Basic/Builtins.td12
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td28
-rw-r--r--clang/include/clang/Basic/StmtNodes.td1
-rw-r--r--clang/include/clang/CIR/MissingFeatures.h2
-rw-r--r--clang/include/clang/Frontend/CompilerInstance.h16
-rw-r--r--clang/include/clang/Sema/ScopeInfo.h4
-rw-r--r--clang/include/clang/Sema/Sema.h25
-rw-r--r--clang/include/clang/Sema/SemaSYCL.h31
-rw-r--r--clang/include/clang/Serialization/ASTBitCodes.h3
-rw-r--r--clang/lib/AST/StmtPrinter.cpp7
-rw-r--r--clang/lib/AST/StmtProfile.cpp5
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp840
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenCall.cpp10
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenCall.h6
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenCleanup.cpp23
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp304
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h10
-rw-r--r--clang/lib/CIR/CodeGen/EHScopeStack.h19
-rw-r--r--clang/lib/CodeGen/CGHLSLBuiltins.cpp48
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.h2
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp18
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h2
-rw-r--r--clang/lib/CodeGen/CodeGenSYCL.cpp13
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp5
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/ARM.cpp18
-rw-r--r--clang/lib/Driver/Driver.cpp2
-rw-r--r--clang/lib/Frontend/CompilerInstance.cpp13
-rw-r--r--clang/lib/Frontend/FrontendActions.cpp4
-rw-r--r--clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp147
-rw-r--r--clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h4
-rw-r--r--clang/lib/Sema/HLSLExternalSemaSource.cpp101
-rw-r--r--clang/lib/Sema/SemaDecl.cpp67
-rw-r--r--clang/lib/Sema/SemaExceptionSpec.cpp14
-rw-r--r--clang/lib/Sema/SemaExpr.cpp3
-rw-r--r--clang/lib/Sema/SemaHLSL.cpp142
-rw-r--r--clang/lib/Sema/SemaSYCL.cpp306
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiate.cpp50
-rw-r--r--clang/lib/Sema/TreeTransform.h25
-rw-r--r--clang/lib/Serialization/ASTReaderStmt.cpp13
-rw-r--r--clang/lib/Serialization/ASTWriterStmt.cpp11
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngine.cpp1
-rw-r--r--clang/test/AST/HLSL/Texture2D-scalar-AST.hlsl (renamed from clang/test/AST/HLSL/Texture2D-AST.hlsl)285
-rw-r--r--clang/test/AST/HLSL/Texture2D-vector-AST.hlsl726
-rw-r--r--clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp203
-rw-r--r--clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp3
-rw-r--r--clang/test/ASTSYCL/ast-print-sycl-kernel-call.cpp25
-rw-r--r--clang/test/CIR/CodeGen/new-delete.cpp164
-rw-r--r--clang/test/CodeGen/AArch64/neon-misc.c307
-rw-r--r--clang/test/CodeGen/AArch64/neon/fullfp16.c4
-rw-r--r--clang/test/CodeGen/AArch64/neon/intrinsics.c439
-rw-r--r--clang/test/CodeGen/amdgpu-abi-version.c59
-rw-r--r--clang/test/CodeGenHLSL/resources/Texture2D-Gather.hlsl183
-rw-r--r--clang/test/CodeGenSYCL/function-attrs.cpp3
-rw-r--r--clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp277
-rw-r--r--clang/test/CodeGenSYCL/sycl-kernel-entry-point-exceptions.cpp95
-rw-r--r--clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp2
-rw-r--r--clang/test/Driver/empty_arg.c2
-rw-r--r--clang/test/Modules/auto-module-import.m2
-rw-r--r--clang/test/Modules/extern_c.cpp2
-rw-r--r--clang/test/SemaHLSL/Texture2D-Gather.hlsl50
-rw-r--r--clang/test/SemaHLSL/Texture2D-GatherCmp-Vulkan.hlsl23
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp64
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-entry-point-attr-device-odr-use.cpp142
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp5
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp4
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp8
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp7
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp6
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp188
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-launch-ms-compat.cpp88
-rw-r--r--clang/test/SemaSYCL/sycl-kernel-launch.cpp560
-rw-r--r--clang/tools/libclang/CXCursor.cpp1
-rw-r--r--flang-rt/lib/runtime/execute.cpp54
-rw-r--r--flang-rt/unittests/Runtime/CommandTest.cpp16
-rw-r--r--flang/docs/Extensions.md12
-rw-r--r--flang/include/flang/Parser/openmp-utils.h3
-rw-r--r--flang/include/flang/Support/Fortran-features.h2
-rw-r--r--flang/lib/Lower/OpenMP/Utils.cpp10
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp2
-rw-r--r--flang/lib/Parser/openmp-utils.cpp10
-rw-r--r--flang/lib/Parser/parse-tree.cpp8
-rw-r--r--flang/lib/Semantics/check-acc-structure.cpp6
-rw-r--r--flang/lib/Semantics/check-namelist.cpp8
-rw-r--r--flang/lib/Semantics/check-omp-loop.cpp71
-rw-r--r--flang/lib/Semantics/check-omp-structure.cpp18
-rw-r--r--flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir118
-rw-r--r--flang/test/Lower/OpenACC/acc-cache.f9016
-rw-r--r--flang/test/Semantics/OpenACC/acc-cache-validity.f906
-rw-r--r--flang/test/Semantics/namelist02.f9029
-rw-r--r--libc/shared/math.h1
-rw-r--r--libc/shared/math/ffmaf128.h29
-rw-r--r--libc/src/__support/FPUtil/generic/add_sub.h2
-rw-r--r--libc/src/__support/math/CMakeLists.txt11
-rw-r--r--libc/src/__support/math/asinpif.h22
-rw-r--r--libc/src/__support/math/ffmaf128.h34
-rw-r--r--libc/src/__support/math/inv_trigf_utils.h44
-rw-r--r--libc/src/math/generic/CMakeLists.txt3
-rw-r--r--libc/src/math/generic/ffmaf128.cpp6
-rw-r--r--libc/test/shared/CMakeLists.txt1
-rw-r--r--libc/test/shared/shared_math_test.cpp2
-rw-r--r--libc/test/src/math/smoke/AddTest.h17
-rw-r--r--libc/test/src/math/smoke/SubTest.h17
-rw-r--r--libclc/clc/include/clc/workitem/clc_get_enqueued_local_size.h17
-rw-r--r--libclc/clc/lib/amdgcn/SOURCES1
-rw-r--r--libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl14
-rw-r--r--libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl12
-rw-r--r--libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl24
-rw-r--r--libclc/opencl/lib/amdgcn/SOURCES1
-rw-r--r--libclc/opencl/lib/amdgcn/synchronization/barrier.cl17
-rw-r--r--libclc/opencl/lib/generic/SOURCES2
-rw-r--r--libclc/opencl/lib/generic/async/wait_group_events.cl2
-rw-r--r--libclc/opencl/lib/generic/synchronization/work_group_barrier.cl27
-rw-r--r--libclc/opencl/lib/generic/workitem/get_enqueued_local_size.cl14
-rw-r--r--libclc/opencl/lib/ptx-nvidiacl/SOURCES1
-rw-r--r--libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl17
-rw-r--r--libcxx/test/libcxx/strings/basic.string/string.cons/constexpr_initialization_stress.pass.cpp3
-rw-r--r--lldb/docs/use/variable.rst3
-rw-r--r--lldb/include/lldb/DataFormatters/TypeSynthetic.h23
-rw-r--r--lldb/include/lldb/lldb-enumerations.h3
-rw-r--r--lldb/source/DataFormatters/VectorType.cpp13
-rw-r--r--lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp3
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp9
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/GenericList.cpp8
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp8
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp12
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp9
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp13
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp9
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp12
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/MsvcStlTree.cpp13
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/MsvcStlTuple.cpp9
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/MsvcStlVariant.cpp9
-rw-r--r--lldb/source/Plugins/Language/ObjC/NSArray.cpp34
-rw-r--r--lldb/source/Plugins/Language/ObjC/NSDictionary.cpp83
-rw-r--r--lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp13
-rw-r--r--lldb/source/Plugins/Language/ObjC/NSSet.cpp51
-rw-r--r--lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86.h9
-rw-r--r--lldb/source/ValueObject/ValueObjectSynthetic.cpp30
-rw-r--r--lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/myArraySynthProvider.py2
-rw-r--r--lldb/test/API/functionalities/data-formatter/synthetic_subscript/Makefile2
-rw-r--r--lldb/test/API/functionalities/data-formatter/synthetic_subscript/TestSyntheticSubscript.py23
-rw-r--r--lldb/test/API/functionalities/data-formatter/synthetic_subscript/main.c12
-rw-r--r--lldb/test/API/functionalities/data-formatter/synthetic_subscript/thing_formatter.py15
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Inputs/FormatterBytecode/formatter.py6
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/bytecode.test4
-rw-r--r--llvm/docs/AIToolPolicy.md12
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h235
-rw-r--r--llvm/include/llvm/IR/IntrinsicsDirectX.td12
-rw-r--r--llvm/lib/Option/OptTable.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td10
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp25
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td72
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll475
-rw-r--r--llvm/test/CodeGen/RISCV/rv32p.ll31
-rw-r--r--llvm/test/Transforms/LoopVectorize/multiple-argmin-argmax.ll143
-rw-r--r--llvm/utils/gn/secondary/bolt/unittests/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/clang/lib/Options/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/clang/lib/Tooling/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn4
-rw-r--r--llvm/utils/gn/secondary/compiler-rt/test/asan/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/lldb/test/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn1
-rw-r--r--mlir/include/mlir/Dialect/Arith/IR/ArithOps.td39
-rw-r--r--mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td47
-rw-r--r--mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td153
-rw-r--r--mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td2
-rw-r--r--mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td8
-rw-r--r--mlir/include/mlir/Dialect/Utils/IndexingUtils.h4
-rw-r--r--mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td74
-rw-r--r--mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h5
-rw-r--r--mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp36
-rw-r--r--mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td27
-rw-r--r--mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp236
-rw-r--r--mlir/lib/Dialect/OpenACC/Transforms/OffloadLiveInValueCanonicalization.cpp16
-rw-r--r--mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp157
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp18
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp7
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp3
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp5
-rw-r--r--mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir52
-rw-r--r--mlir/test/Dialect/Arith/canonicalize.mlir64
-rw-r--r--mlir/test/Dialect/Arith/ops.mlir28
-rw-r--r--mlir/test/Dialect/OpenACC/invalid-cg.mlir20
-rw-r--r--mlir/test/Dialect/OpenACC/ops-cg.mlir194
-rw-r--r--mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir6
-rw-r--r--mlir/test/Dialect/XeGPU/propagate-layout.mlir6
-rw-r--r--mlir/test/Integration/Dialect/XeVM/GPU/gpu_printf.mlir5
-rw-r--r--mlir/test/Transforms/canonicalize.mlir2
-rw-r--r--utils/bazel/llvm-project-overlay/libc/BUILD.bazel13
-rw-r--r--utils/bazel/llvm-project-overlay/mlir/BUILD.bazel1
226 files changed, 8276 insertions, 2020 deletions
diff --git a/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt b/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt
index 2f31d168e65c..9179e5dea4ea 100644
--- a/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt
@@ -8,7 +8,6 @@ add_clang_library(clangTidyHICPPModule STATIC
HICPPTidyModule.cpp
IgnoredRemoveResultCheck.cpp
MultiwayPathsCoveredCheck.cpp
- NoAssemblerCheck.cpp
SignedBitwiseCheck.cpp
LINK_LIBS
@@ -19,6 +18,7 @@ add_clang_library(clangTidyHICPPModule STATIC
clangTidyMiscModule
clangTidyModernizeModule
clangTidyPerformanceModule
+ clangTidyPortabilityModule
clangTidyReadabilityModule
clangTidyUtils
diff --git a/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp b/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp
index 2e0e64fbcd2a..a4601d9cdde9 100644
--- a/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp
@@ -30,6 +30,7 @@
#include "../modernize/UseOverrideCheck.h"
#include "../performance/MoveConstArgCheck.h"
#include "../performance/NoexceptMoveConstructorCheck.h"
+#include "../portability/NoAssemblerCheck.h"
#include "../readability/BracesAroundStatementsCheck.h"
#include "../readability/FunctionSizeCheck.h"
#include "../readability/NamedParameterCheck.h"
@@ -37,7 +38,6 @@
#include "ExceptionBaseclassCheck.h"
#include "IgnoredRemoveResultCheck.h"
#include "MultiwayPathsCoveredCheck.h"
-#include "NoAssemblerCheck.h"
#include "SignedBitwiseCheck.h"
namespace clang::tidy {
@@ -81,7 +81,8 @@ public:
CheckFactories
.registerCheck<cppcoreguidelines::ProBoundsArrayToPointerDecayCheck>(
"hicpp-no-array-decay");
- CheckFactories.registerCheck<NoAssemblerCheck>("hicpp-no-assembler");
+ CheckFactories.registerCheck<portability::NoAssemblerCheck>(
+ "hicpp-no-assembler");
CheckFactories.registerCheck<cppcoreguidelines::NoMallocCheck>(
"hicpp-no-malloc");
CheckFactories
diff --git a/clang-tools-extra/clang-tidy/portability/CMakeLists.txt b/clang-tools-extra/clang-tidy/portability/CMakeLists.txt
index 73d74a550afc..170fedf52130 100644
--- a/clang-tools-extra/clang-tidy/portability/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/portability/CMakeLists.txt
@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
add_clang_library(clangTidyPortabilityModule STATIC
AvoidPragmaOnceCheck.cpp
+ NoAssemblerCheck.cpp
PortabilityTidyModule.cpp
RestrictSystemIncludesCheck.cpp
SIMDIntrinsicsCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp b/clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.cpp
index e7d97b2a26b2..d9a20b97b233 100644
--- a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp
+++ b/clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.cpp
@@ -11,7 +11,7 @@
using namespace clang::ast_matchers;
-namespace clang::tidy::hicpp {
+namespace clang::tidy::portability {
void NoAssemblerCheck::registerMatchers(MatchFinder *Finder) {
Finder->addMatcher(asmStmt().bind("asm-stmt"), this);
@@ -34,4 +34,4 @@ void NoAssemblerCheck::check(const MatchFinder::MatchResult &Result) {
diag(ASMLocation, "do not use inline assembler in safety-critical code");
}
-} // namespace clang::tidy::hicpp
+} // namespace clang::tidy::portability
diff --git a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h b/clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.h
index 15d646fd97af..2bc403e57a14 100644
--- a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h
+++ b/clang-tools-extra/clang-tidy/portability/NoAssemblerCheck.h
@@ -6,17 +6,17 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NOASSEMBLERCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NOASSEMBLERCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_NOASSEMBLERCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_NOASSEMBLERCHECK_H
#include "../ClangTidyCheck.h"
-namespace clang::tidy::hicpp {
+namespace clang::tidy::portability {
/// Find assembler statements. No fix is offered.
///
/// For the user-facing documentation see:
-/// https://clang.llvm.org/extra/clang-tidy/checks/hicpp/no-assembler.html
+/// https://clang.llvm.org/extra/clang-tidy/checks/portability/no-assembler.html
class NoAssemblerCheck : public ClangTidyCheck {
public:
NoAssemblerCheck(StringRef Name, ClangTidyContext *Context)
@@ -25,6 +25,6 @@ public:
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
};
-} // namespace clang::tidy::hicpp
+} // namespace clang::tidy::portability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NOASSEMBLERCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_NOASSEMBLERCHECK_H
diff --git a/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp
index fda997a2a3df..1f2340502f68 100644
--- a/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp
@@ -9,6 +9,7 @@
#include "../ClangTidy.h"
#include "../ClangTidyModule.h"
#include "AvoidPragmaOnceCheck.h"
+#include "NoAssemblerCheck.h"
#include "RestrictSystemIncludesCheck.h"
#include "SIMDIntrinsicsCheck.h"
#include "StdAllocatorConstCheck.h"
@@ -23,6 +24,7 @@ public:
void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
CheckFactories.registerCheck<AvoidPragmaOnceCheck>(
"portability-avoid-pragma-once");
+ CheckFactories.registerCheck<NoAssemblerCheck>("portability-no-assembler");
CheckFactories.registerCheck<RestrictSystemIncludesCheck>(
"portability-restrict-system-includes");
CheckFactories.registerCheck<SIMDIntrinsicsCheck>(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index b461f764eb0d..b0b4cd646c3b 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -157,6 +157,11 @@ New checks
New check aliases
^^^^^^^^^^^^^^^^^
+- Renamed :doc:`hicpp-no-assembler <clang-tidy/checks/hicpp/no-assembler>`
+ to :doc:`portability-no-assembler
+ <clang-tidy/checks/portability/no-assembler>`. The `hicpp-no-assembler`
+ name is kept as an alias.
+
Changes in existing checks
^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang-tools-extra/docs/clang-tidy/checks/hicpp/no-assembler.rst b/clang-tools-extra/docs/clang-tidy/checks/hicpp/no-assembler.rst
index 55231fbd0a8d..7987e40ba9e8 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/hicpp/no-assembler.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/hicpp/no-assembler.rst
@@ -1,10 +1,10 @@
.. title:: clang-tidy - hicpp-no-assembler
+.. meta::
+ :http-equiv=refresh: 0;URL=../portability/no-assembler.html
hicpp-no-assembler
==================
-Checks for assembler statements. Use of inline assembly should be avoided since
-it restricts the portability of the code.
-
-This enforces `rule 7.5.1 <https://www.perforce.com/resources/qac/high-integrity-cpp-coding-rules>`_
-of the High Integrity C++ Coding Standard.
+The `hicpp-no-assembler` check is an alias, please see
+`portability-no-assembler <../portability/no-assembler.html>`_ for more
+information.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 4beea34f0c5a..068431fb5c94 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -243,7 +243,6 @@ Clang-Tidy Checks
:doc:`hicpp-exception-baseclass <hicpp/exception-baseclass>`,
:doc:`hicpp-ignored-remove-result <hicpp/ignored-remove-result>`,
:doc:`hicpp-multiway-paths-covered <hicpp/multiway-paths-covered>`,
- :doc:`hicpp-no-assembler <hicpp/no-assembler>`,
:doc:`hicpp-signed-bitwise <hicpp/signed-bitwise>`,
:doc:`linuxkernel-must-check-errs <linuxkernel/must-check-errs>`,
:doc:`llvm-header-guard <llvm/header-guard>`,
@@ -371,6 +370,7 @@ Clang-Tidy Checks
:doc:`performance-unnecessary-value-param <performance/unnecessary-value-param>`, "Yes"
:doc:`performance-use-std-move <performance/use-std-move>`, "Yes"
:doc:`portability-avoid-pragma-once <portability/avoid-pragma-once>`,
+ :doc:`portability-no-assembler <portability/no-assembler>`,
:doc:`portability-restrict-system-includes <portability/restrict-system-includes>`, "Yes"
:doc:`portability-simd-intrinsics <portability/simd-intrinsics>`,
:doc:`portability-std-allocator-const <portability/std-allocator-const>`,
@@ -608,6 +608,7 @@ Check aliases
:doc:`hicpp-named-parameter <hicpp/named-parameter>`, :doc:`readability-named-parameter <readability/named-parameter>`, "Yes"
:doc:`hicpp-new-delete-operators <hicpp/new-delete-operators>`, :doc:`misc-new-delete-overloads <misc/new-delete-overloads>`,
:doc:`hicpp-no-array-decay <hicpp/no-array-decay>`, :doc:`cppcoreguidelines-pro-bounds-array-to-pointer-decay <cppcoreguidelines/pro-bounds-array-to-pointer-decay>`,
+ :doc:`hicpp-no-assembler <hicpp/no-assembler>`, :doc:`portability-no-assembler <portability/no-assembler>`,
:doc:`hicpp-no-malloc <hicpp/no-malloc>`, :doc:`cppcoreguidelines-no-malloc <cppcoreguidelines/no-malloc>`,
:doc:`hicpp-noexcept-move <hicpp/noexcept-move>`, :doc:`performance-noexcept-move-constructor <performance/noexcept-move-constructor>`, "Yes"
:doc:`hicpp-special-member-functions <hicpp/special-member-functions>`, :doc:`cppcoreguidelines-special-member-functions <cppcoreguidelines/special-member-functions>`,
diff --git a/clang-tools-extra/docs/clang-tidy/checks/portability/no-assembler.rst b/clang-tools-extra/docs/clang-tidy/checks/portability/no-assembler.rst
new file mode 100644
index 000000000000..ddc24683d76d
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/portability/no-assembler.rst
@@ -0,0 +1,12 @@
+.. title:: clang-tidy - portability-no-assembler
+
+portability-no-assembler
+========================
+
+Checks for assembler statements. Use of inline assembly should be avoided
+since it ties to a specific CPU architecture and syntax making code that
+uses it non-portable across platforms.
+
+.. code-block:: c++
+
+ asm("mov al, 2"); // warning: do not use assembler statements
diff --git a/clang-tools-extra/test/clang-tidy/checkers/hicpp/no-assembler.cpp b/clang-tools-extra/test/clang-tidy/checkers/portability/no-assembler.cpp
index d08ea74f6cde..0e589b65df1e 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/hicpp/no-assembler.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/portability/no-assembler.cpp
@@ -1,12 +1,12 @@
-// RUN: %check_clang_tidy %s hicpp-no-assembler %t
+// RUN: %check_clang_tidy %s portability-no-assembler %t
__asm__(".symver foo, bar@v");
-// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: do not use inline assembler in safety-critical code [hicpp-no-assembler]
+// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: do not use inline assembler in safety-critical code [portability-no-assembler]
static int s asm("spam");
-// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use inline assembler in safety-critical code [hicpp-no-assembler]
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use inline assembler in safety-critical code [portability-no-assembler]
void f() {
__asm("mov al, 2");
- // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use inline assembler in safety-critical code [hicpp-no-assembler]
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use inline assembler in safety-critical code [portability-no-assembler]
}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-exception-variable-names.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-exception-variable-names.cpp
new file mode 100644
index 000000000000..a58d6769ffc7
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-exception-variable-names.cpp
@@ -0,0 +1,22 @@
+// RUN: %check_clang_tidy %s readability-identifier-length %t \
+// RUN: -config='{CheckOptions: {readability-identifier-length.IgnoredExceptionVariableNames: "^[ex]$"}}' \
+// RUN: -- -fexceptions
+
+struct myexcept { int val; };
+void doIt();
+
+void test() {
+ try {
+ doIt();
+ } catch (const myexcept &e) { doIt(); } // no warning, e allowed
+ try {
+ doIt();
+ } catch (const myexcept &x) { doIt(); } // no warning, x allowed
+ try {
+ doIt();
+ } catch (const myexcept &y)
+ // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: exception variable name 'y' is too short, expected at least 2 characters [readability-identifier-length]
+ {
+ doIt();
+ }
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-loop-counter-names.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-loop-counter-names.cpp
new file mode 100644
index 000000000000..2d2049bbdc6d
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-loop-counter-names.cpp
@@ -0,0 +1,14 @@
+// RUN: %check_clang_tidy %s readability-identifier-length %t \
+// RUN: -config='{CheckOptions: {readability-identifier-length.IgnoredLoopCounterNames: "^[ijk]$"}}' \
+// RUN: -- -fexceptions
+
+void doIt();
+
+void test() {
+ for (int i = 0; i < 5; ++i) { doIt(); } // no warning, i allowed
+ for (int m = 0; m < 5; ++m)
+ // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: loop variable name 'm' is too short, expected at least 2 characters [readability-identifier-length]
+ {
+ doIt();
+ }
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-parameter-names.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-parameter-names.cpp
new file mode 100644
index 000000000000..65a45f8bb37b
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-ignored-parameter-names.cpp
@@ -0,0 +1,11 @@
+// RUN: %check_clang_tidy %s readability-identifier-length %t \
+// RUN: -config='{CheckOptions: {readability-identifier-length.IgnoredParameterNames: "^[ab]$"}}' \
+// RUN: -- -fexceptions
+
+void test(int a, int b, int c)
+// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: parameter name 'c' is too short, expected at least 3 characters [readability-identifier-length]
+{
+ (void)a;
+ (void)b;
+ (void)c;
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-exception-name-length.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-exception-name-length.cpp
new file mode 100644
index 000000000000..d108b115e29d
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-exception-name-length.cpp
@@ -0,0 +1,16 @@
+// RUN: %check_clang_tidy %s readability-identifier-length %t \
+// RUN: -config='{CheckOptions: {readability-identifier-length.MinimumExceptionNameLength: 4}}' \
+// RUN: -- -fexceptions
+
+struct myexcept { int val; };
+void doIt();
+
+void test() {
+ try {
+ doIt();
+ } catch (const myexcept &err)
+ // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: exception variable name 'err' is too short, expected at least 4 characters [readability-identifier-length]
+ {
+ doIt();
+ }
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-loop-counter-name-length.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-loop-counter-name-length.cpp
new file mode 100644
index 000000000000..a11a6da0085b
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-loop-counter-name-length.cpp
@@ -0,0 +1,14 @@
+// RUN: %check_clang_tidy %s readability-identifier-length %t \
+// RUN: -config='{CheckOptions: {readability-identifier-length.MinimumLoopCounterNameLength: 4}}' \
+// RUN: -- -fexceptions
+
+void doIt();
+
+void test() {
+ for (int idx = 0; idx < 5; ++idx)
+ // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: loop variable name 'idx' is too short, expected at least 4 characters [readability-identifier-length]
+ {
+ doIt();
+ }
+ for (int index = 0; index < 5; ++index) { doIt(); } // 5 chars, ok
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-parameter-name-length.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-parameter-name-length.cpp
new file mode 100644
index 000000000000..afe53896fe00
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-parameter-name-length.cpp
@@ -0,0 +1,9 @@
+// RUN: %check_clang_tidy %s readability-identifier-length %t \
+// RUN: -config='{CheckOptions: {readability-identifier-length.MinimumParameterNameLength: 5}}' \
+// RUN: -- -fexceptions
+
+void test(int data)
+// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: parameter name 'data' is too short, expected at least 5 characters [readability-identifier-length]
+{
+ (void)data;
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-variable-name-length.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-variable-name-length.cpp
new file mode 100644
index 000000000000..91d95beb42f7
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-length-minimum-variable-name-length.cpp
@@ -0,0 +1,11 @@
+// RUN: %check_clang_tidy %s readability-identifier-length %t \
+// RUN: -config='{CheckOptions: {readability-identifier-length.MinimumVariableNameLength: 5}}' \
+// RUN: -- -fexceptions
+
+void doIt();
+
+void test() {
+ int valu = 5;
+ // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: variable name 'valu' is too short, expected at least 5 characters [readability-identifier-length]
+ int value = 6; // 5 chars, ok
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/magic-numbers-ignore-all-float.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/magic-numbers-ignore-all-float.cpp
new file mode 100644
index 000000000000..109f636c0290
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/magic-numbers-ignore-all-float.cpp
@@ -0,0 +1,8 @@
+// RUN: %check_clang_tidy %s readability-magic-numbers %t -check-suffix=IGNFP \
+// RUN: -config='{CheckOptions: {readability-magic-numbers.IgnoreAllFloatingPointValues: true}}' --
+
+int BadInt = 5;
+// CHECK-MESSAGES-IGNFP: :[[@LINE-1]]:14: warning: 5 is a magic number; consider replacing it with a named constant [readability-magic-numbers]
+
+float IgnoredFloat = 3.14f;
+// CHECK-MESSAGES-IGNFP-NOT: 3.14f is a magic number
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/qualified-auto-add-const-to-qualified.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/qualified-auto-add-const-to-qualified.cpp
new file mode 100644
index 000000000000..c8ed9e09eb86
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/qualified-auto-add-const-to-qualified.cpp
@@ -0,0 +1,12 @@
+// RUN: %check_clang_tidy %s readability-qualified-auto %t --
+// RUN: %check_clang_tidy %s readability-qualified-auto %t -check-suffix=NOCONST \
+// RUN: -config='{CheckOptions: { readability-qualified-auto.AddConstToQualified: false }}' --
+
+const int *getCIntPtr();
+
+void foo() {
+ auto *QualCPtr = getCIntPtr();
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'auto *QualCPtr' can be declared as 'const auto *QualCPtr'
+ // CHECK-FIXES: const auto *QualCPtr = getCIntPtr();
+ // No warning for NOCONST
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses-allowed-decls.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses-allowed-decls.cpp
new file mode 100644
index 000000000000..de0366ba1540
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses-allowed-decls.cpp
@@ -0,0 +1,16 @@
+// RUN: %check_clang_tidy %s readability-redundant-parentheses %t \
+// RUN: -config='{CheckOptions: {readability-redundant-parentheses.AllowedDecls: ""}}'
+
+namespace std {
+ template<class T> T max(T, T);
+ template<class T> T min(T, T);
+} // namespace std
+
+void foo() {
+ (std::max)(1, 2);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant parentheses around expression [readability-redundant-parentheses]
+ // CHECK-FIXES: std::max(1, 2);
+ (std::min)(1, 2);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant parentheses around expression [readability-redundant-parentheses]
+ // CHECK-FIXES: std::min(1, 2);
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument-option.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument-option.cpp
index 7788feef8ce2..a6ab8fea1fab 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument-option.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument-option.cpp
@@ -1,7 +1,16 @@
// RUN: %check_clang_tidy %s readability-suspicious-call-argument %t \
// RUN: -config="{CheckOptions: {readability-suspicious-call-argument.Abbreviations: 'crash='}}" -- -std=c++11-or-later
+// RUN: %check_clang_tidy %s readability-suspicious-call-argument %t -check-suffix=MINLEN \
+// RUN: -config='{CheckOptions: {readability-suspicious-call-argument.MinimumIdentifierNameLength: 10}}' -- -std=c++11-or-later
void f() {}
// CHECK-MESSAGES: warning: Invalid abbreviation configuration 'crash=', ignoring.
-// TODO: Add testcases for other options
+void takeTwoParams(int frobble1, int frobble2);
+
+void testMinimumIdentifierNameLength() {
+ int frobble2 = 1, frobble1 = 2;
+ takeTwoParams(frobble2, frobble1);
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 1st argument 'frobble2' (passed to 'frobble1') looks like it might be swapped with the 2nd, 'frobble1' (passed to 'frobble2')
+ // No warning for MINLEN
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/use-std-min-max-include-style.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/use-std-min-max-include-style.cpp
new file mode 100644
index 000000000000..0e17db3fe836
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/use-std-min-max-include-style.cpp
@@ -0,0 +1,13 @@
+// RUN: %check_clang_tidy -std=c++11-or-later %s readability-use-std-min-max %t \
+// RUN: -config='{CheckOptions: {readability-use-std-min-max.IncludeStyle: "google"}}' \
+// RUN: -- -fno-delayed-template-parsing
+
+// CHECK-FIXES: #include <algorithm>
+
+void foo() {
+ int a = 0, b = 1;
+ if (a < b)
+ a = b;
+ // CHECK-MESSAGES: :[[@LINE-2]]:3: warning: use `std::max` instead of `<` [readability-use-std-min-max]
+ // CHECK-FIXES: a = std::max(a, b);
+}
diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h
index b438a9b250b0..e7aa6c26dfce 100644
--- a/clang/include/clang/AST/ASTNodeTraverser.h
+++ b/clang/include/clang/AST/ASTNodeTraverser.h
@@ -839,8 +839,10 @@ public:
void VisitSYCLKernelCallStmt(const SYCLKernelCallStmt *Node) {
Visit(Node->getOriginalStmt());
- if (Traversal != TK_IgnoreUnlessSpelledInSource)
+ if (Traversal != TK_IgnoreUnlessSpelledInSource) {
+ Visit(Node->getKernelLaunchStmt());
Visit(Node->getOutlinedFunctionDecl());
+ }
}
void VisitOMPExecutableDirective(const OMPExecutableDirective *Node) {
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index f97b54276cbe..ce6ad723191e 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3001,6 +3001,13 @@ DEF_TRAVERSE_STMT(ParenListExpr, {})
DEF_TRAVERSE_STMT(SYCLUniqueStableNameExpr, {
TRY_TO(TraverseTypeLoc(S->getTypeSourceInfo()->getTypeLoc()));
})
+DEF_TRAVERSE_STMT(UnresolvedSYCLKernelCallStmt, {
+ if (getDerived().shouldVisitImplicitCode()) {
+ TRY_TO(TraverseStmt(S->getOriginalStmt()));
+ TRY_TO(TraverseStmt(S->getKernelLaunchIdExpr()));
+ ShouldVisitChildren = false;
+ }
+})
DEF_TRAVERSE_STMT(OpenACCAsteriskSizeExpr, {})
DEF_TRAVERSE_STMT(PredefinedExpr, {})
DEF_TRAVERSE_STMT(ShuffleVectorExpr, {})
@@ -3038,6 +3045,7 @@ DEF_TRAVERSE_STMT(CapturedStmt, { TRY_TO(TraverseDecl(S->getCapturedDecl())); })
DEF_TRAVERSE_STMT(SYCLKernelCallStmt, {
if (getDerived().shouldVisitImplicitCode()) {
TRY_TO(TraverseStmt(S->getOriginalStmt()));
+ TRY_TO(TraverseStmt(S->getKernelLaunchStmt()));
TRY_TO(TraverseDecl(S->getOutlinedFunctionDecl()));
ShouldVisitChildren = false;
}
diff --git a/clang/include/clang/AST/StmtSYCL.h b/clang/include/clang/AST/StmtSYCL.h
index 28ace12d7916..79ac88532e14 100644
--- a/clang/include/clang/AST/StmtSYCL.h
+++ b/clang/include/clang/AST/StmtSYCL.h
@@ -28,40 +28,44 @@ namespace clang {
/// of such a function specifies the statements to be executed on a SYCL device
/// to invoke a SYCL kernel with a particular set of kernel arguments. The
/// SYCLKernelCallStmt associates an original statement (the compound statement
-/// that is the function body) with an OutlinedFunctionDecl that holds the
-/// kernel parameters and the transformed body. During code generation, the
-/// OutlinedFunctionDecl is used to emit an offload kernel entry point suitable
-/// for invocation from a SYCL library implementation. If executed, the
-/// SYCLKernelCallStmt behaves as a no-op; no code generation is performed for
-/// it.
+/// that is the function body) with a kernel launch statement to execute on a
+/// SYCL host and an OutlinedFunctionDecl that holds the kernel parameters and
+/// the transformed body to execute on a SYCL device. During code generation,
+/// the OutlinedFunctionDecl is used to emit an offload kernel entry point
+/// suitable for invocation from a SYCL library implementation.
class SYCLKernelCallStmt : public Stmt {
friend class ASTStmtReader;
friend class ASTStmtWriter;
private:
Stmt *OriginalStmt = nullptr;
+ Stmt *KernelLaunchStmt = nullptr;
OutlinedFunctionDecl *OFDecl = nullptr;
public:
/// Construct a SYCL kernel call statement.
- SYCLKernelCallStmt(CompoundStmt *CS, OutlinedFunctionDecl *OFD)
- : Stmt(SYCLKernelCallStmtClass), OriginalStmt(CS), OFDecl(OFD) {}
+ SYCLKernelCallStmt(CompoundStmt *CS, Stmt *S, OutlinedFunctionDecl *OFD)
+ : Stmt(SYCLKernelCallStmtClass), OriginalStmt(CS), KernelLaunchStmt(S),
+ OFDecl(OFD) {}
/// Construct an empty SYCL kernel call statement.
SYCLKernelCallStmt(EmptyShell Empty) : Stmt(SYCLKernelCallStmtClass, Empty) {}
- /// Retrieve the model statement.
CompoundStmt *getOriginalStmt() { return cast<CompoundStmt>(OriginalStmt); }
const CompoundStmt *getOriginalStmt() const {
return cast<CompoundStmt>(OriginalStmt);
}
+
void setOriginalStmt(CompoundStmt *CS) { OriginalStmt = CS; }
- /// Retrieve the outlined function declaration.
+ Stmt *getKernelLaunchStmt() { return KernelLaunchStmt; }
+ const Stmt *getKernelLaunchStmt() const { return KernelLaunchStmt; }
+
+ void setKernelLaunchStmt(Stmt *S) { KernelLaunchStmt = S; }
+
OutlinedFunctionDecl *getOutlinedFunctionDecl() { return OFDecl; }
const OutlinedFunctionDecl *getOutlinedFunctionDecl() const { return OFDecl; }
- /// Set the outlined function declaration.
void setOutlinedFunctionDecl(OutlinedFunctionDecl *OFD) { OFDecl = OFD; }
SourceLocation getBeginLoc() const LLVM_READONLY {
@@ -89,6 +93,66 @@ public:
}
};
+// UnresolvedSYCLKernelCallStmt represents an invocation of a SYCL kernel in
+// a dependent context for which lookup of the sycl_kernel_launch identifier
+// cannot be performed. These statements are transformed to SYCLKernelCallStmt
+// during template instantiation.
+class UnresolvedSYCLKernelCallStmt : public Stmt {
+ friend class ASTStmtReader;
+ friend class ASTStmtWriter;
+
+private:
+ Stmt *OriginalStmt = nullptr;
+ // KernelLaunchIdExpr stores an UnresolvedLookupExpr or UnresolvedMemberExpr
+ // corresponding to the SYCL kernel launch function for which a call
+ // will be synthesized during template instantiation.
+ Expr *KernelLaunchIdExpr = nullptr;
+
+ UnresolvedSYCLKernelCallStmt(CompoundStmt *CS, Expr *IdExpr)
+ : Stmt(UnresolvedSYCLKernelCallStmtClass), OriginalStmt(CS),
+ KernelLaunchIdExpr(IdExpr) {}
+
+ void setOriginalStmt(CompoundStmt *CS) { OriginalStmt = CS; }
+
+ void setKernelLaunchIdExpr(Expr *IdExpr) { KernelLaunchIdExpr = IdExpr; }
+
+public:
+ static UnresolvedSYCLKernelCallStmt *Create(const ASTContext &C,
+ CompoundStmt *CS, Expr *IdExpr) {
+ return new (C) UnresolvedSYCLKernelCallStmt(CS, IdExpr);
+ }
+
+ static UnresolvedSYCLKernelCallStmt *CreateEmpty(const ASTContext &C) {
+ return new (C) UnresolvedSYCLKernelCallStmt(nullptr, nullptr);
+ }
+
+ CompoundStmt *getOriginalStmt() { return cast<CompoundStmt>(OriginalStmt); }
+ const CompoundStmt *getOriginalStmt() const {
+ return cast<CompoundStmt>(OriginalStmt);
+ }
+
+ Expr *getKernelLaunchIdExpr() { return KernelLaunchIdExpr; }
+ const Expr *getKernelLaunchIdExpr() const { return KernelLaunchIdExpr; }
+
+ SourceLocation getBeginLoc() const LLVM_READONLY {
+ return getOriginalStmt()->getBeginLoc();
+ }
+
+ SourceLocation getEndLoc() const LLVM_READONLY {
+ return getOriginalStmt()->getEndLoc();
+ }
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == UnresolvedSYCLKernelCallStmtClass;
+ }
+ child_range children() {
+ return child_range(&OriginalStmt, &OriginalStmt + 1);
+ }
+
+ const_child_range children() const {
+ return const_child_range(&OriginalStmt, &OriginalStmt + 1);
+ }
+};
+
} // end namespace clang
#endif // LLVM_CLANG_AST_STMTSYCL_H
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 68fdc3b976d6..60dfdfc2f23f 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -580,25 +580,26 @@ The following examples demonstrate the use of this attribute:
def SYCLKernelEntryPointDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
-The ``sycl_kernel_entry_point`` attribute facilitates the generation of an
-offload kernel entry point, sometimes called a SYCL kernel caller function,
-suitable for invoking a SYCL kernel on an offload device. The attribute is
-intended for use in the implementation of SYCL kernel invocation functions
-like the ``single_task`` and ``parallel_for`` member functions of the
-``sycl::handler`` class specified in section 4.9.4, "Command group ``handler``
-class", of the SYCL 2020 specification.
-
-The attribute requires a single type argument that specifies a class type that
-meets the requirements for a SYCL kernel name as described in section 5.2,
-"Naming of kernels", of the SYCL 2020 specification. A unique kernel name type
-is required for each function declared with the attribute. The attribute may
-not first appear on a declaration that follows a definition of the function.
+The ``sycl_kernel_entry_point`` attribute facilitates the launch of a SYCL
+kernel and the generation of an offload kernel entry point, sometimes called
+a SYCL kernel caller function, suitable for invoking a SYCL kernel on an
+offload device. The attribute is intended for use in the implementation of
+SYCL kernel invocation functions like the ``single_task`` and ``parallel_for``
+member functions of the ``sycl::handler`` class specified in section 4.9.4,
+"Command group ``handler`` class", of the SYCL 2020 specification.
+
+The attribute requires a single type argument that meets the requirements for
+a SYCL kernel name as described in section 5.2, "Naming of kernels", of the
+SYCL 2020 specification. A unique kernel name type is required for each
+function declared with the attribute. The attribute may not first appear on a
+declaration that follows a definition of the function.
The attribute only appertains to functions and only those that meet the
following requirements.
* Has a non-deduced ``void`` return type.
-* Is not a non-static member function, constructor, or destructor.
+* Is not a constructor or destructor.
+* Is not a non-static member function with an explicit object parameter.
* Is not a C variadic function.
* Is not a coroutine.
* Is not defined as deleted or as defaulted.
@@ -613,73 +614,84 @@ follows.
namespace sycl {
class handler {
- template<typename KernelNameType, typename KernelType>
- [[ clang::sycl_kernel_entry_point(KernelNameType) ]]
- static void kernel_entry_point(KernelType kernel) {
- kernel();
+ template<typename KernelName, typename... Ts>
+ void sycl_kernel_launch(const char* kernelSymbol, Ts&&... kernelArgs) {
+ // This code will run on the host and is responsible for calling functions
+ // appropriate for the desired offload backend (OpenCL, CUDA, HIP,
+ // Level Zero, etc...) to copy the kernel arguments denoted by kernelArgs
+ // to a device and to schedule an invocation of the offload kernel entry
+ // point denoted by kernelSymbol with the copied arguments.
+ }
+
+ template<typename KernelName, typename KernelType>
+ [[ clang::sycl_kernel_entry_point(KernelName) ]]
+ void kernel_entry_point(KernelType kernelFunc) {
+ // This code will run on the device. The call to kernelFunc() invokes
+ // the SYCL kernel.
+ kernelFunc();
}
public:
- template<typename KernelNameType, typename KernelType>
- void single_task(KernelType kernel) {
- // Call kernel_entry_point() to trigger generation of an offload
- // kernel entry point.
- kernel_entry_point<KernelNameType>(kernel);
- // Call functions appropriate for the desired offload backend
- // (OpenCL, CUDA, HIP, Level Zero, etc...).
+ template<typename KernelName, typename KernelType>
+ void single_task(const KernelType& kernelFunc) {
+ // This code will run on the host. kernel_entry_point() is called to
+ // trigger generation of an offload kernel entry point and to schedule
+ // an invocation of it on a device with kernelFunc (a SYCL kernel object)
+ // passed as a kernel argument. This call will result in an implicit call
+ // to sycl_kernel_launch() with the symbol name for the generated offload
+ // kernel entry point passed as the first function argument followed by
+ // kernelFunc.
+ kernel_entry_point<KernelName>(kernelFunc);
}
};
} // namespace sycl
-A SYCL kernel is a callable object of class type that is constructed on a host,
-often via a lambda expression, and then passed to a SYCL kernel invocation
-function to be executed on an offload device. A SYCL kernel invocation function
-is responsible for copying the provided SYCL kernel object to an offload
-device and initiating a call to it. The SYCL kernel object and its data members
-constitute the parameters of an offload kernel.
-
-A SYCL kernel type is required to satisfy the device copyability requirements
-specified in section 3.13.1, "Device copyable", of the SYCL 2020 specification.
-Additionally, any data members of the kernel object type are required to satisfy
-section 4.12.4, "Rules for parameter passing to kernels". For most types, these
-rules require that the type is trivially copyable. However, the SYCL
-specification mandates that certain special SYCL types, such as
-``sycl::accessor`` and ``sycl::stream`` be device copyable even if they are not
-trivially copyable. These types require special handling because they cannot
-be copied to device memory as if by ``memcpy()``. Additionally, some offload
-backends, OpenCL for example, require objects of some of these types to be
-passed as individual arguments to the offload kernel.
-
-An offload kernel consists of an entry point function that declares the
-parameters of the offload kernel and the set of all functions and variables that
-are directly or indirectly used by the entry point function.
-
-A SYCL kernel invocation function invokes a SYCL kernel on a device by
-performing the following tasks (likely with the help of an offload backend
-like OpenCL):
+A SYCL kernel object is a callable object of class type that is constructed on
+a host, often via a lambda expression, and then passed to a SYCL kernel
+invocation function to be executed on an offload device. The ``kernelFunc``
+parameters in the example code above correspond to SYCL kernel objects.
+
+A SYCL kernel object type is required to satisfy the device copyability
+requirements specified in section 3.13.1, "Device copyable", of the SYCL 2020
+specification. Additionally, any data members of the kernel object type are
+required to satisfy section 4.12.4, "Rules for parameter passing to kernels".
+For most types, these rules require that the type is trivially copyable.
+However, the SYCL specification mandates that certain special SYCL types, such
+as ``sycl::accessor`` and ``sycl::stream``, be device copyable even if they are
+not trivially copyable. These types require special handling because they cannot
+necessarily be copied to device memory as if by ``memcpy()``.
+
+The SYCL kernel object and its data members constitute the parameters of an
+offload kernel. An offload kernel consists of an offload entry point function
+and the set of all functions and variables that are directly or indirectly used
+by the entry point function.
+
+A SYCL kernel invocation function is responsible for performing the following
+tasks (likely with the help of an offload backend like OpenCL):
#. Identifying the offload kernel entry point to be used for the SYCL kernel.
-#. Deconstructing the SYCL kernel object, if necessary, to produce the set of
- offload kernel arguments required by the offload kernel entry point.
+#. Validating that the SYCL kernel object type and its data members meet the
+ SYCL device copyability and kernel parameter requirements noted above.
-#. Copying the offload kernel arguments to device memory.
+#. Copying the SYCL kernel object and any other kernel arguments to device
+ memory including any special handling required for SYCL special types.
#. Initiating execution of the offload kernel entry point.
The offload kernel entry point for a SYCL kernel performs the following tasks:
-#. Reconstituting the SYCL kernel object, if necessary, using the offload
- kernel parameters.
+#. Calling the ``operator()`` member function of the SYCL kernel object.
-#. Calling the ``operator()`` member function of the (reconstituted) SYCL kernel
- object.
+The ``sycl_kernel_entry_point`` attribute facilitates or automates these tasks
+by providing generation of an offload kernel entry point with a unique symbol
+name, type checking of kernel argument requirements, and initiation of kernel
+execution via synthesized calls to a ``sycl_kernel_launch`` template.
-The ``sycl_kernel_entry_point`` attribute automates generation of an offload
-kernel entry point that performs those latter tasks. The parameters and body of
-a function declared with the ``sycl_kernel_entry_point`` attribute specify a
-pattern from which the parameters and body of the entry point function are
-derived. Consider the following call to a SYCL kernel invocation function.
+A function declared with the ``sycl_kernel_entry_point`` attribute specifies
+the parameters and body of an offload entry point function. Consider the
+following call to the ``single_task()`` SYCL kernel invocation function assuming
+an implementation similar to the one shown above.
.. code-block:: c++
@@ -690,65 +702,87 @@ derived. Consider the following call to a SYCL kernel invocation function.
});
}
-The SYCL kernel object is the result of the lambda expression. It has two
-data members corresponding to the captures of ``sout`` and ``s``. Since one
-of these data members corresponds to a special SYCL type that must be passed
-individually as an offload kernel parameter, it is necessary to decompose the
-SYCL kernel object into its constituent parts; the offload kernel will have
-two kernel parameters. Given a SYCL implementation that uses a
-``sycl_kernel_entry_point`` attributed function like the one shown above, an
-offload kernel entry point function will be generated that looks approximately
+The SYCL kernel object is the result of the lambda expression. The call to
+``kernel_entry_point()`` via the call to ``single_task()`` triggers the
+generation of an offload kernel entry point function that looks approximately
as follows.
.. code-block:: c++
- void sycl-kernel-caller-for-KN(sycl::stream sout, S s) {
- kernel-type kernel = { sout, s );
- kernel();
+ void sycl-kernel-caller-for-KN(kernel-type kernelFunc) {
+ kernelFunc();
}
There are a few items worthy of note:
-#. The name of the generated function incorporates the SYCL kernel name,
- ``KN``, that was passed as the ``KernelNameType`` template parameter to
- ``kernel_entry_point()`` and provided as the argument to the
- ``sycl_kernel_entry_point`` attribute. There is a one-to-one correspondence
- between SYCL kernel names and offload kernel entry points.
+#. ``sycl-kernel-caller-for-KN`` is an exposition only name; the actual name
+ generated for an entry point is an implementation detail and subject to
+ change. However, the name will incorporate the SYCL kernel name, ``KN``,
+ that was passed as the ``KernelName`` template parameter to
+ ``single_task()`` and eventually provided as the argument to the
+ ``sycl_kernel_entry_point`` attribute in order to ensure that a unique
+ name is generated for each entry point. There is a one-to-one correspondence
+ between SYCL kernel names and offload kernel entry points.
#. The SYCL kernel is a lambda closure type and therefore has no name;
``kernel-type`` is substituted above and corresponds to the ``KernelType``
- template parameter deduced in the call to ``kernel_entry_point()``.
- Lambda types cannot be declared and initialized using the aggregate
- initialization syntax used above, but the intended behavior should be clear.
+ template parameter deduced in the call to ``single_task()``.
+
+#. The parameter and the call to ``kernelFunc()`` in the function body
+ correspond to the definition of ``kernel_entry_point()`` as called by
+ ``single_task()``.
-#. ``S`` is a device copyable type that does not directly or indirectly contain
- a data member of a SYCL special type. It therefore does not need to be
- decomposed into its constituent members to be passed as a kernel argument.
+#. The parameter is type checked for conformance with the SYCL device
+ copyability and kernel parameter requirements.
-#. The depiction of the ``sycl::stream`` parameter as a single self contained
- kernel parameter is an oversimplification. SYCL special types may require
- additional decomposition such that the generated function might have three
- or more parameters depending on how the SYCL library implementation defines
- these types.
+Within ``single_task()``, the call to ``kernel_entry_point()`` is effectively
+replaced with a synthesized call to a ''sycl_kernel_launch`` template that
+looks approximately as follows.
-#. The call to ``kernel_entry_point()`` has no effect other than to trigger
- emission of the entry point function. The statments that make up the body
- of the function are not executed when the function is called; they are
- only used in the generation of the entry point function.
+.. code-block:: c++
+
+ sycl_kernel_launch<KN>("sycl-kernel-caller-for-KN", kernelFunc);
+
+There are a few items worthy of note:
+
+#. Lookup for the ``sycl_kernel_launch`` template is performed as if from the
+ body of the (possibly instantiated) definition of ``kernel_entry_point()``.
+ If name lookup or overload resolution fails, the program is ill-formed.
+ If the selected overload is a non-static member function, then ``this`` is
+ passed as the implicit object parameter.
+
+#. Function arguments passed to ``sycl_kernel_launch()`` are passed
+ as if by ``std::move(x)``.
+
+#. The ``sycl_kernel_launch`` template is expected to be provided by the SYCL
+ library implementation. It is responsible for copying the kernel arguments
+ to device memory and for scheduling execution of the generated offload
+ kernel entry point identified by the symbol name passed as the first
+ function argument. ``sycl-kernel-caller-for-KN`` is substituted above for
+ the actual symbol name that would be generated for the offload kernel entry
+ point.
It is not necessary for a function declared with the ``sycl_kernel_entry_point``
attribute to be called for the offload kernel entry point to be emitted. For
inline functions and function templates, any ODR-use will suffice. For other
functions, an ODR-use is not required; the offload kernel entry point will be
-emitted if the function is defined.
+emitted if the function is defined. In any case, a call to the function is
+required for the synthesized call to ``sycl_kernel_launch()`` to occur.
+
+A function declared with the ``sycl_kernel_entry_point`` attribute may include
+an exception specification. If a non-throwing exception specification is
+present, an exception propagating from the implicit call to the
+``sycl_kernel_launch`` template will result in a call to ``std::terminate()``.
+Otherwise, such an exception will propagate normally.
Functions declared with the ``sycl_kernel_entry_point`` attribute are not
limited to the simple example shown above. They may have additional template
parameters, declare additional function parameters, and have complex control
-flow in the function body. Function parameter decomposition and reconstitution
-is performed for all function parameters. The function must abide by the
-language feature restrictions described in section 5.4, "Language restrictions
-for device functions" in the SYCL 2020 specification.
+flow in the function body. The function must abide by the language feature
+restrictions described in section 5.4, "Language restrictions for device
+functions" in the SYCL 2020 specification. If the function is a non-static
+member function, ``this`` shall not be used in a potentially evaluated
+expression.
}];
}
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 7063d7c06c4c..62421e588814 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5072,6 +5072,18 @@ def HLSLResourceSampleCmpLevelZero : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLResourceGather : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_resource_gather"];
+ let Attributes = [NoThrow];
+ let Prototype = "void(...)";
+}
+
+def HLSLResourceGatherCmp : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_resource_gather_cmp"];
+ let Attributes = [NoThrow];
+ let Prototype = "void(...)";
+}
+
def HLSLResourceUninitializedHandle : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_uninitializedhandle"];
let Attributes = [NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 2f8a37e50613..99ab83f5e021 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12735,8 +12735,7 @@ def note_unreachable_entity : Note<
"is not %select{visible|reachable|reachable|reachable|reachable|reachable}0">;
def ext_module_import_in_extern_c : ExtWarn<
"import of C++ module '%0' appears within extern \"C\" language linkage "
- "specification">, DefaultError,
- InGroup<DiagGroup<"module-import-in-extern-c">>;
+ "specification">, InGroup<DiagGroup<"module-import-in-extern-c">>;
def err_module_import_not_at_top_level_fatal : Error<
"import of module '%0' appears within %1">, DefaultFatal;
def ext_module_import_not_at_top_level_noop : ExtWarn<
@@ -13353,19 +13352,24 @@ def warn_sycl_external_missing_on_first_decl : Warning<
// SYCL kernel entry point diagnostics
def err_sycl_entry_point_invalid : Error<
"the %0 attribute cannot be applied to a %enum_select<InvalidSKEPReason>{"
- "%NonStaticMemberFn{non-static member function}|"
"%VariadicFn{variadic function}|"
"%DeletedFn{deleted function}|"
"%DefaultedFn{defaulted function}|"
+ "%Constructor{constructor}|"
+ "%Destructor{destructor}|"
+ "%Coroutine{coroutine}|"
"%ConstexprFn{constexpr function}|"
"%ConstevalFn{consteval function}|"
"%NoreturnFn{function declared with the 'noreturn' attribute}|"
- "%Coroutine{coroutine}|"
- "%FunctionTryBlock{function defined with a function try block}"
+ "%FunctionTryBlock{function defined with a function try block}|"
+ "%ExplicitObjectFn{function with an explicit object parameter}|"
"}1">;
def err_sycl_entry_point_invalid_redeclaration : Error<
"the %0 kernel name argument does not match prior"
" declaration%diff{: $ vs $|}1,2">;
+def err_sycl_entry_point_invalid_this : Error<
+ "'this' cannot be%select{| implicitly}0 used in a potentially evaluated"
+ " expression in the body of a function declared with the %1 attribute">;
def err_sycl_kernel_name_conflict : Error<
"the %0 kernel name argument conflicts with a previous declaration">;
def warn_sycl_kernel_name_not_a_class_type : Warning<
@@ -13381,6 +13385,18 @@ def err_sycl_entry_point_return_type : Error<
def err_sycl_entry_point_deduced_return_type : Error<
"the %0 attribute only applies to functions with a non-deduced 'void' return"
" type">;
+def note_sycl_runtime_defect : Note<
+ "this indicates a problem with the SYCL runtime header files; please consider"
+ " reporting this to your SYCL runtime provider">;
+def note_sycl_kernel_launch_lookup_here : Note<
+ "in implicit call to 'sycl_kernel_launch' with template argument %0 required"
+ " here">;
+def note_sycl_kernel_launch_overload_resolution_here : Note<
+ "in implicit call to 'sycl_kernel_launch' with template argument %0 and"
+ " function arguments %1 required here">;
+def err_sycl_entry_point_device_use : Error<
+ "function %0 cannot be used in device code because it is declared with the"
+ " %1 attribute">;
def warn_cuda_maxclusterrank_sm_90 : Warning<
"maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring "
@@ -13625,6 +13641,8 @@ def err_hlsl_push_constant_unique
def err_hlsl_samplecmp_requires_float
: Error<"'SampleCmp' and 'SampleCmpLevelZero' require resource to contain "
"a floating point type">;
+def err_hlsl_gathercmp_invalid_component
+ : Error<"gatherCmp%select{Red|Green|Blue|Alpha}0 operations on the Vulkan target are not supported; only GatherCmp and GatherCmpRed are allowed">;
// Layout randomization diagnostics.
def err_non_designated_init_used : Error<
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index cb869cc21062..b196382025c9 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -24,6 +24,7 @@ def CaseStmt : StmtNode<SwitchCase>;
def DefaultStmt : StmtNode<SwitchCase>;
def CapturedStmt : StmtNode<Stmt>;
def SYCLKernelCallStmt : StmtNode<Stmt>;
+def UnresolvedSYCLKernelCallStmt : StmtNode<Stmt>;
// Break/continue.
def LoopControlStmt : StmtNode<Stmt, 1>;
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index d206503d914f..1e3a2c9af35d 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -291,7 +291,6 @@ struct MissingFeatures {
static bool handleBuiltinICEArguments() { return false; }
static bool hip() { return false; }
static bool incrementProfileCounter() { return false; }
- static bool innermostEHScope() { return false; }
static bool insertBuiltinUnpredictable() { return false; }
static bool instrumentation() { return false; }
static bool intrinsicElementTypeSupport() { return false; }
@@ -348,6 +347,7 @@ struct MissingFeatures {
static bool targetCodeGenInfoGetNullPointer() { return false; }
static bool thunks() { return false; }
static bool tryEmitAsConstant() { return false; }
+ static bool typeAwareAllocation() { return false; }
static bool typeChecks() { return false; }
static bool useEHCleanupForArray() { return false; }
static bool vaArgABILowering() { return false; }
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 217efa3fe756..266e0826b38f 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -197,6 +197,14 @@ class CompilerInstance : public ModuleLoader {
/// Force an output buffer.
std::unique_ptr<llvm::raw_pwrite_stream> OutputStream;
+ using GenModuleActionWrapperFunc =
+ std::function<std::unique_ptr<FrontendAction>(
+ const FrontendOptions &, std::unique_ptr<FrontendAction>)>;
+
+ /// An optional callback function used to wrap all FrontendActions
+ /// produced to generate imported modules before they are executed.
+ GenModuleActionWrapperFunc GenModuleActionWrapper;
+
CompilerInstance(const CompilerInstance &) = delete;
void operator=(const CompilerInstance &) = delete;
public:
@@ -958,6 +966,14 @@ public:
bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override;
+ void setGenModuleActionWrapper(GenModuleActionWrapperFunc Wrapper) {
+ GenModuleActionWrapper = Wrapper;
+ }
+
+ GenModuleActionWrapperFunc getGenModuleActionWrapper() const {
+ return GenModuleActionWrapper;
+ }
+
void addDependencyCollector(std::shared_ptr<DependencyCollector> Listener) {
DependencyCollectors.push_back(std::move(Listener));
}
diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 4f4d38c96114..f334f58ebd0a 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -245,6 +245,10 @@ public:
/// The set of GNU address of label extension "&&label".
llvm::SmallVector<AddrLabelExpr *, 4> AddrLabels;
+ /// An unresolved identifier lookup expression for an implicit call
+ /// to a SYCL kernel launch function in a dependent context.
+ Expr *SYCLKernelLaunchIdExpr = nullptr;
+
public:
/// Represents a simple identification of a weak object.
///
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 13a412914f5c..5917eb0ffbfe 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -1430,7 +1430,8 @@ public:
/// Diagnostics that are emitted only if we discover that the given function
/// must be codegen'ed. Because handling these correctly adds overhead to
- /// compilation, this is currently only enabled for CUDA compilations.
+ /// compilation, this is currently only used for offload languages like CUDA,
+ /// OpenMP, and SYCL.
SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags;
/// CurContext - This is the current declaration context of parsing.
@@ -13275,6 +13276,14 @@ public:
/// We are performing partial ordering for template template parameters.
PartialOrderingTTP,
+
+ /// We are performing name lookup for a function template or variable
+ /// template named 'sycl_kernel_launch'.
+ SYCLKernelLaunchLookup,
+
+ /// We are performing overload resolution for a call to a function
+ /// template or variable template named 'sycl_kernel_launch'.
+ SYCLKernelLaunchOverloadResolution,
} Kind;
/// Whether we're substituting into constraints.
@@ -13630,6 +13639,20 @@ public:
operator=(const SynthesizedFunctionScope &) = delete;
};
+ /// RAII object to ensure that a code synthesis context is popped on scope
+ /// exit.
+ class ScopedCodeSynthesisContext {
+ Sema &S;
+
+ public:
+ ScopedCodeSynthesisContext(Sema &S, const CodeSynthesisContext &Ctx)
+ : S(S) {
+ S.pushCodeSynthesisContext(Ctx);
+ }
+
+ ~ScopedCodeSynthesisContext() { S.popCodeSynthesisContext(); }
+ };
+
/// List of active code synthesis contexts.
///
/// This vector is treated as a stack. As synthesis of one entity requires
diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h
index 7ae556da2bec..4980aa44c301 100644
--- a/clang/include/clang/Sema/SemaSYCL.h
+++ b/clang/include/clang/Sema/SemaSYCL.h
@@ -64,9 +64,38 @@ public:
void handleKernelAttr(Decl *D, const ParsedAttr &AL);
void handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL);
+ /// Issues a deferred diagnostic if use of the declaration designated
+ /// by 'ND' is invalid in a device context.
+ void CheckDeviceUseOfDecl(NamedDecl *ND, SourceLocation Loc);
+
void CheckSYCLExternalFunctionDecl(FunctionDecl *FD);
void CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD);
- StmtResult BuildSYCLKernelCallStmt(FunctionDecl *FD, CompoundStmt *Body);
+
+ /// Builds an expression for the lookup of a 'sycl_kernel_launch' template
+ /// with 'KernelName' as an explicit template argument. Lookup is performed
+ /// as if from the first statement of the body of 'FD' and thus requires
+ /// searching the scopes that exist at parse time. This function therefore
+ /// requires the current semantic context to be the definition of 'FD'. In a
+ /// dependent context, the returned expression will be an UnresolvedLookupExpr
+ /// or an UnresolvedMemberExpr. In a non-dependent context, the returned
+ /// expression will be a DeclRefExpr or MemberExpr. If lookup fails, a null
+ /// error result is returned. The resulting expression is intended to be
+ /// passed as the 'LaunchIdExpr' argument in a call to either
+ /// BuildSYCLKernelCallStmt() or BuildUnresolvedSYCLKernelCallStmt() after
+ /// the function body has been parsed.
+ ExprResult BuildSYCLKernelLaunchIdExpr(FunctionDecl *FD, QualType KernelName);
+
+ /// Builds a SYCLKernelCallStmt to wrap 'Body' and to be used as the body of
+ /// 'FD'. 'LaunchIdExpr' specifies the lookup result returned by a previous
+ /// call to BuildSYCLKernelLaunchIdExpr().
+ StmtResult BuildSYCLKernelCallStmt(FunctionDecl *FD, CompoundStmt *Body,
+ Expr *LaunchIdExpr);
+
+ /// Builds an UnresolvedSYCLKernelCallStmt to wrap 'Body'. 'LaunchIdExpr'
+ /// specifies the lookup result returned by a previous call to
+ /// BuildSYCLKernelLaunchIdExpr().
+ StmtResult BuildUnresolvedSYCLKernelCallStmt(CompoundStmt *Body,
+ Expr *LaunchIdExpr);
};
} // namespace clang
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index d72f1f9db86b..752e7fd288aa 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1618,6 +1618,9 @@ enum StmtCode {
/// A SYCLKernelCallStmt record.
STMT_SYCLKERNELCALL,
+ /// An UnresolvedSYCLKernelCallStmt record.
+ STMT_UNRESOLVED_SYCL_KERNEL_CALL,
+
/// A GCC-style AsmStmt record.
STMT_GCCASM,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index f4ce4a7573aa..4d364fdcd550 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -600,7 +600,7 @@ void StmtPrinter::VisitCapturedStmt(CapturedStmt *Node) {
}
void StmtPrinter::VisitSYCLKernelCallStmt(SYCLKernelCallStmt *Node) {
- PrintStmt(Node->getOutlinedFunctionDecl()->getBody());
+ PrintStmt(Node->getOriginalStmt());
}
void StmtPrinter::VisitObjCAtTryStmt(ObjCAtTryStmt *Node) {
@@ -1447,6 +1447,11 @@ void StmtPrinter::VisitSYCLUniqueStableNameExpr(
OS << ")";
}
+void StmtPrinter::VisitUnresolvedSYCLKernelCallStmt(
+ UnresolvedSYCLKernelCallStmt *Node) {
+ PrintStmt(Node->getOriginalStmt());
+}
+
void StmtPrinter::VisitPredefinedExpr(PredefinedExpr *Node) {
OS << PredefinedExpr::getIdentKindName(Node->getIdentKind());
}
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 623905188b2d..dc7fd352a67b 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -1410,6 +1410,11 @@ void StmtProfiler::VisitSYCLUniqueStableNameExpr(
VisitType(S->getTypeSourceInfo()->getType());
}
+void StmtProfiler::VisitUnresolvedSYCLKernelCallStmt(
+ const UnresolvedSYCLKernelCallStmt *S) {
+ VisitStmt(S);
+}
+
void StmtProfiler::VisitPredefinedExpr(const PredefinedExpr *S) {
VisitExpr(S);
ID.AddInteger(llvm::to_underlying(S->getIdentKind()));
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 493891e40db5..ea215985f303 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -40,36 +40,394 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc,
builder.getUInt64(scalingFactor, loc));
}
-static bool aarch64SVEIntrinsicsProvenSorted = false;
+//===----------------------------------------------------------------------===//
+// Intrinsics maps
+//
+// Maps that help automate code-generation.
+//
+// TODO(cir): Share this code with ARM.cpp
+//===----------------------------------------------------------------------===//
+enum {
+ AddRetType = (1 << 0),
+ Add1ArgType = (1 << 1),
+ Add2ArgTypes = (1 << 2),
+
+ VectorizeRetType = (1 << 3),
+ VectorizeArgTypes = (1 << 4),
+
+ InventFloatType = (1 << 5),
+ UnsignedAlts = (1 << 6),
+
+ Use64BitVectors = (1 << 7),
+ Use128BitVectors = (1 << 8),
+
+ Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
+ VectorRet = AddRetType | VectorizeRetType,
+ VectorRetGetArgs01 =
+ AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
+ FpCmpzModifiers =
+ AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
+};
namespace {
-struct AArch64BuiltinInfo {
+struct ARMVectorIntrinsicInfo {
+ const char *nameHint;
unsigned builtinID;
unsigned llvmIntrinsic;
+ unsigned altLLVMIntrinsic;
uint64_t typeModifier;
bool operator<(unsigned rhsBuiltinID) const {
return builtinID < rhsBuiltinID;
}
- bool operator<(const AArch64BuiltinInfo &te) const {
+ bool operator<(const ARMVectorIntrinsicInfo &te) const {
return builtinID < te.builtinID;
}
};
} // end anonymous namespace
-#define SVEMAP1(NameBase, llvmIntrinsic, TypeModifier) \
- {SVE::BI__builtin_sve_##NameBase, Intrinsic::llvmIntrinsic, TypeModifier}
+#define NEONMAP0(NameBase) \
+ {#NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0}
+
+#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+ {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
+ TypeModifier}
+
+#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
+ {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, \
+ Intrinsic::AltLLVMIntrinsic, TypeModifier}
+
+static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+ NEONMAP0(splat_lane_v),
+ NEONMAP0(splat_laneq_v),
+ NEONMAP0(splatq_lane_v),
+ NEONMAP0(splatq_laneq_v),
+ NEONMAP1(vabs_v, aarch64_neon_abs, 0),
+ NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
+ NEONMAP0(vadd_v),
+ NEONMAP0(vaddhn_v),
+ NEONMAP0(vaddq_p128),
+ NEONMAP0(vaddq_v),
+ NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
+ NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
+ NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
+ NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
+ NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
+ NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
+ NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
+ NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
+ NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
+ NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcage_v, aarch64_neon_facge, 0),
+ NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
+ NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
+ NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
+ NEONMAP1(vcale_v, aarch64_neon_facge, 0),
+ NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
+ NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
+ NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
+ NEONMAP0(vceqz_v),
+ NEONMAP0(vceqzq_v),
+ NEONMAP0(vcgez_v),
+ NEONMAP0(vcgezq_v),
+ NEONMAP0(vcgtz_v),
+ NEONMAP0(vcgtzq_v),
+ NEONMAP0(vclez_v),
+ NEONMAP0(vclezq_v),
+ NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
+ NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
+ NEONMAP0(vcltz_v),
+ NEONMAP0(vcltzq_v),
+ NEONMAP1(vclz_v, ctlz, Add1ArgType),
+ NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+ NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcnt_v, ctpop, Add1ArgType),
+ NEONMAP1(vcntq_v, ctpop, Add1ArgType),
+ NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
+ NEONMAP0(vcvt_f16_s16),
+ NEONMAP0(vcvt_f16_u16),
+ NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
+ NEONMAP0(vcvt_f32_v),
+ NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
+ NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_f16_s16),
+ NEONMAP0(vcvtq_f16_u16),
+ NEONMAP0(vcvtq_f32_v),
+ NEONMAP0(vcvtq_high_bf16_f32),
+ NEONMAP0(vcvtq_low_bf16_f32),
+ NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
+ NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp,
+ 0),
+ NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp,
+ 0),
+ NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
+ NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
+ NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
+ NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
+ NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
+ NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP0(vext_v),
+ NEONMAP0(vextq_v),
+ NEONMAP0(vfma_v),
+ NEONMAP0(vfmaq_v),
+ NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
+ NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
+ NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
+ NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
+ NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
+ NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
+ NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
+ NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
+ NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
+ NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
+ NEONMAP0(vmovl_v),
+ NEONMAP0(vmovn_v),
+ NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
+ NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
+ NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
+ NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+ NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+ NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
+ NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
+ NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
+ NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
+ NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
+ NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
+ NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
+ NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
+ NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
+ NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
+ NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
+ NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
+ NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
+ NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
+ NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
+ NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
+ NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
+ NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
+ NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
+ NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
+ NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
+ NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
+ NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
+ NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
+ NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
+ NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+ NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+ NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
+ NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
+ NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP0(vrndi_v),
+ NEONMAP0(vrndiq_v),
+ NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
+ NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
+ NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+ NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+ NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
+ NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
+ NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
+ NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
+ NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
+ NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
+ NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
+ NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
+ NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
+ NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
+ NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
+ NEONMAP0(vshl_n_v),
+ NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshll_n_v),
+ NEONMAP0(vshlq_n_v),
+ NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshr_n_v),
+ NEONMAP0(vshrn_n_v),
+ NEONMAP0(vshrq_n_v),
+ NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
+ NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
+ NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
+ NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
+ NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
+ NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
+ NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
+ NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
+ NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
+ NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
+ NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
+ NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
+ NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
+ NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
+ NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
+ NEONMAP0(vsubhn_v),
+ NEONMAP0(vtst_v),
+ NEONMAP0(vtstq_v),
+ NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
+ NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
+ NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
+ NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
+};
+
+#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+ {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
+ TypeModifier}
#define SVEMAP2(NameBase, TypeModifier) \
- {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier}
-static const AArch64BuiltinInfo aarch64SVEIntrinsicMap[] = {
+ {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier}
+static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = {
#define GET_SVE_LLVM_INTRINSIC_MAP
#include "clang/Basic/arm_sve_builtin_cg.inc"
#undef GET_SVE_LLVM_INTRINSIC_MAP
};
-static const AArch64BuiltinInfo *
-findARMVectorIntrinsicInMap(ArrayRef<AArch64BuiltinInfo> intrinsicMap,
+static bool aarch64SIMDIntrinsicsProvenSorted = false;
+static bool aarch64SVEIntrinsicsProvenSorted = false;
+
+// Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns
+// the corresponding info struct.
+static const ARMVectorIntrinsicInfo *
+findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> intrinsicMap,
unsigned builtinID, bool &mapProvenSorted) {
#ifndef NDEBUG
@@ -79,7 +437,8 @@ findARMVectorIntrinsicInMap(ArrayRef<AArch64BuiltinInfo> intrinsicMap,
}
#endif
- const AArch64BuiltinInfo *info = llvm::lower_bound(intrinsicMap, builtinID);
+ const ARMVectorIntrinsicInfo *info =
+ llvm::lower_bound(intrinsicMap, builtinID);
if (info != intrinsicMap.end() && info->builtinID == builtinID)
return info;
@@ -97,30 +456,383 @@ emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
bool scalarCmp = !isa<cir::VectorType>(src.getType());
if (!scalarCmp) {
- assert(cast<cir::VectorType>(retTy).getIsScalable() &&
+ assert(!cast<cir::VectorType>(retTy).getIsScalable() &&
"This is only intended for fixed-width vectors");
- // Vector retTypes are cast to i8 vectors. Recover original retType.
- cgf.cgm.errorNYI(loc, std::string("unimplemented vector compare"));
+ // Vector types are cast to i8 vectors. Recover original type.
+ src = builder.createBitcast(src, retTy);
}
mlir::Value zero = builder.getNullValue(src.getType(), loc);
- mlir::Value cmp;
- if (cir::isFPOrVectorOfFPType(src.getType())) {
- cgf.cgm.errorNYI(loc, std::string("unimplemented FP compare"));
- } else {
- if (scalarCmp)
- // For scalars, cast !cir.bool to !cir.int<s, 1> so that the compare
- // result is sign- rather zero-extended when casting to the output
- // retType.
- cmp = builder.createCast(
- loc, cir::CastKind::bool_to_int,
- builder.createCompare(loc, cir::CmpOpKind::eq, src, zero),
- builder.getSIntNTy(1));
+
+ if (!scalarCmp)
+ return builder.createVecCompare(loc, kind, src, zero);
+
+ // For scalars, cast !cir.bool to !cir.int<s, 1> so that the compare
+ // result is sign- rather zero-extended when casting to the output
+ // retType.
+ mlir::Value cmp = builder.createCast(
+ loc, cir::CastKind::bool_to_int,
+ builder.createCompare(loc, kind, src, zero), builder.getSIntNTy(1));
+
+ return builder.createCast(loc, cir::CastKind::integral, cmp, retTy);
+}
+
+// TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone.
+static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
+ mlir::Location loc,
+ bool hasLegalHalfType = true,
+ bool v1Ty = false,
+ bool allowBFloatArgsAndRet = true) {
+ int isQuad = typeFlags.isQuad();
+ switch (typeFlags.getEltType()) {
+ case NeonTypeFlags::Int8:
+ case NeonTypeFlags::Poly8:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty
+ : cgf->sInt8Ty,
+ v1Ty ? 1 : (8 << isQuad));
+ case NeonTypeFlags::MFloat8:
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: MFloat8"));
+ [[fallthrough]];
+ case NeonTypeFlags::Int16:
+ case NeonTypeFlags::Poly16:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty
+ : cgf->sInt16Ty,
+ v1Ty ? 1 : (4 << isQuad));
+ case NeonTypeFlags::BFloat16:
+ if (allowBFloatArgsAndRet)
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
+ else
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
+ [[fallthrough]];
+ case NeonTypeFlags::Float16:
+ if (hasLegalHalfType)
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
else
- cgf.cgm.errorNYI(loc, std::string("unimplemented vector compare"));
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
+ [[fallthrough]];
+ case NeonTypeFlags::Int32:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
+ : cgf->sInt32Ty,
+ v1Ty ? 1 : (2 << isQuad));
+ case NeonTypeFlags::Int64:
+ case NeonTypeFlags::Poly64:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty
+ : cgf->sInt64Ty,
+ v1Ty ? 1 : (1 << isQuad));
+ case NeonTypeFlags::Poly128:
+ // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
+ // There is a lot of i128 and f128 API missing.
+ // so we use v16i8 to represent poly128 and get pattern matched.
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128"));
+ [[fallthrough]];
+ case NeonTypeFlags::Float32:
+ return cir::VectorType::get(cgf->getCIRGenModule().floatTy,
+ v1Ty ? 1 : (2 << isQuad));
+ case NeonTypeFlags::Float64:
+ return cir::VectorType::get(cgf->getCIRGenModule().doubleTy,
+ v1Ty ? 1 : (1 << isQuad));
}
+ llvm_unreachable("Unknown vector element type!");
+}
- return builder.createCast(loc, cir::CastKind::integral, cmp, retTy);
+static mlir::Value emitCommonNeonBuiltinExpr(
+ CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic,
+ unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier,
+ const CallExpr *expr, llvm::SmallVectorImpl<mlir::Value> &ops) {
+
+ mlir::Location loc = cgf.getLoc(expr->getExprLoc());
+ clang::ASTContext &ctx = cgf.getContext();
+
+ // Extract the trailing immediate argument that encodes the type discriminator
+ // for this overloaded intrinsic.
+ // TODO: Move to the parent code that takes care of argument processing.
+ const clang::Expr *arg = expr->getArg(expr->getNumArgs() - 1);
+ std::optional<llvm::APSInt> neonTypeConst = arg->getIntegerConstantExpr(ctx);
+ if (!neonTypeConst)
+ return nullptr;
+
+ // Determine the type of this overloaded NEON intrinsic.
+ NeonTypeFlags neonType(neonTypeConst->getZExtValue());
+ const bool hasLegalHalfType = cgf.getTarget().hasFastHalfType();
+
+ // The value of allowBFloatArgsAndRet is true for AArch64, but it should
+ // come from ABI info.
+ const bool allowBFloatArgsAndRet = false;
+ // FIXME
+ // getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
+
+ cir::VectorType vTy = getNeonType(&cgf, neonType, loc, hasLegalHalfType,
+ false, allowBFloatArgsAndRet);
+ mlir::Type ty = vTy;
+ if (!ty)
+ return nullptr;
+
+ switch (builtinID) {
+ case NEON::BI__builtin_neon_splat_lane_v:
+ case NEON::BI__builtin_neon_splat_laneq_v:
+ case NEON::BI__builtin_neon_splatq_lane_v:
+ case NEON::BI__builtin_neon_splatq_laneq_v:
+ case NEON::BI__builtin_neon_vpadd_v:
+ case NEON::BI__builtin_neon_vpaddq_v:
+ case NEON::BI__builtin_neon_vabs_v:
+ case NEON::BI__builtin_neon_vabsq_v:
+ case NEON::BI__builtin_neon_vadd_v:
+ case NEON::BI__builtin_neon_vaddq_v:
+ case NEON::BI__builtin_neon_vaddhn_v:
+ case NEON::BI__builtin_neon_vcale_v:
+ case NEON::BI__builtin_neon_vcaleq_v:
+ case NEON::BI__builtin_neon_vcalt_v:
+ case NEON::BI__builtin_neon_vcaltq_v:
+ case NEON::BI__builtin_neon_vcage_v:
+ case NEON::BI__builtin_neon_vcageq_v:
+ case NEON::BI__builtin_neon_vcagt_v:
+ case NEON::BI__builtin_neon_vcagtq_v:
+ cgf.cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ ctx.BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
+ case NEON::BI__builtin_neon_vceqz_v:
+ case NEON::BI__builtin_neon_vceqzq_v:
+ return emitAArch64CompareBuiltinExpr(cgf, cgf.getBuilder(), loc, ops[0],
+ vTy, cir::CmpOpKind::eq);
+ case NEON::BI__builtin_neon_vcgez_v:
+ case NEON::BI__builtin_neon_vcgezq_v:
+ case NEON::BI__builtin_neon_vclez_v:
+ case NEON::BI__builtin_neon_vclezq_v:
+ case NEON::BI__builtin_neon_vcgtz_v:
+ case NEON::BI__builtin_neon_vcgtzq_v:
+ case NEON::BI__builtin_neon_vcltz_v:
+ case NEON::BI__builtin_neon_vcltzq_v:
+ case NEON::BI__builtin_neon_vclz_v:
+ case NEON::BI__builtin_neon_vclzq_v:
+ case NEON::BI__builtin_neon_vcvt_f32_v:
+ case NEON::BI__builtin_neon_vcvtq_f32_v:
+ case NEON::BI__builtin_neon_vcvt_f16_s16:
+ case NEON::BI__builtin_neon_vcvt_f16_u16:
+ case NEON::BI__builtin_neon_vcvtq_f16_s16:
+ case NEON::BI__builtin_neon_vcvtq_f16_u16:
+ case NEON::BI__builtin_neon_vcvt_n_f16_s16:
+ case NEON::BI__builtin_neon_vcvt_n_f16_u16:
+ case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
+ case NEON::BI__builtin_neon_vcvtq_n_f16_u16:
+ case NEON::BI__builtin_neon_vcvt_n_f32_v:
+ case NEON::BI__builtin_neon_vcvt_n_f64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_f32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_f64_v:
+ case NEON::BI__builtin_neon_vcvt_n_s16_f16:
+ case NEON::BI__builtin_neon_vcvt_n_s32_v:
+ case NEON::BI__builtin_neon_vcvt_n_u16_f16:
+ case NEON::BI__builtin_neon_vcvt_n_u32_v:
+ case NEON::BI__builtin_neon_vcvt_n_s64_v:
+ case NEON::BI__builtin_neon_vcvt_n_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
+ case NEON::BI__builtin_neon_vcvtq_n_s32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
+ case NEON::BI__builtin_neon_vcvtq_n_u32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_s64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_u64_v:
+ case NEON::BI__builtin_neon_vcvt_s32_v:
+ case NEON::BI__builtin_neon_vcvt_u32_v:
+ case NEON::BI__builtin_neon_vcvt_s64_v:
+ case NEON::BI__builtin_neon_vcvt_u64_v:
+ case NEON::BI__builtin_neon_vcvt_s16_f16:
+ case NEON::BI__builtin_neon_vcvt_u16_f16:
+ case NEON::BI__builtin_neon_vcvtq_s32_v:
+ case NEON::BI__builtin_neon_vcvtq_u32_v:
+ case NEON::BI__builtin_neon_vcvtq_s64_v:
+ case NEON::BI__builtin_neon_vcvtq_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtq_u16_f16:
+ case NEON::BI__builtin_neon_vcvta_s16_f16:
+ case NEON::BI__builtin_neon_vcvta_s32_v:
+ case NEON::BI__builtin_neon_vcvta_s64_v:
+ case NEON::BI__builtin_neon_vcvta_u16_f16:
+ case NEON::BI__builtin_neon_vcvta_u32_v:
+ case NEON::BI__builtin_neon_vcvta_u64_v:
+ case NEON::BI__builtin_neon_vcvtaq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtaq_s32_v:
+ case NEON::BI__builtin_neon_vcvtaq_s64_v:
+ case NEON::BI__builtin_neon_vcvtaq_u16_f16:
+ case NEON::BI__builtin_neon_vcvtaq_u32_v:
+ case NEON::BI__builtin_neon_vcvtaq_u64_v:
+ case NEON::BI__builtin_neon_vcvtn_s16_f16:
+ case NEON::BI__builtin_neon_vcvtn_s32_v:
+ case NEON::BI__builtin_neon_vcvtn_s64_v:
+ case NEON::BI__builtin_neon_vcvtn_u16_f16:
+ case NEON::BI__builtin_neon_vcvtn_u32_v:
+ case NEON::BI__builtin_neon_vcvtn_u64_v:
+ case NEON::BI__builtin_neon_vcvtnq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtnq_s32_v:
+ case NEON::BI__builtin_neon_vcvtnq_s64_v:
+ case NEON::BI__builtin_neon_vcvtnq_u16_f16:
+ case NEON::BI__builtin_neon_vcvtnq_u32_v:
+ case NEON::BI__builtin_neon_vcvtnq_u64_v:
+ case NEON::BI__builtin_neon_vcvtp_s16_f16:
+ case NEON::BI__builtin_neon_vcvtp_s32_v:
+ case NEON::BI__builtin_neon_vcvtp_s64_v:
+ case NEON::BI__builtin_neon_vcvtp_u16_f16:
+ case NEON::BI__builtin_neon_vcvtp_u32_v:
+ case NEON::BI__builtin_neon_vcvtp_u64_v:
+ case NEON::BI__builtin_neon_vcvtpq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtpq_s32_v:
+ case NEON::BI__builtin_neon_vcvtpq_s64_v:
+ case NEON::BI__builtin_neon_vcvtpq_u16_f16:
+ case NEON::BI__builtin_neon_vcvtpq_u32_v:
+ case NEON::BI__builtin_neon_vcvtpq_u64_v:
+ case NEON::BI__builtin_neon_vcvtm_s16_f16:
+ case NEON::BI__builtin_neon_vcvtm_s32_v:
+ case NEON::BI__builtin_neon_vcvtm_s64_v:
+ case NEON::BI__builtin_neon_vcvtm_u16_f16:
+ case NEON::BI__builtin_neon_vcvtm_u32_v:
+ case NEON::BI__builtin_neon_vcvtm_u64_v:
+ case NEON::BI__builtin_neon_vcvtmq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtmq_s32_v:
+ case NEON::BI__builtin_neon_vcvtmq_s64_v:
+ case NEON::BI__builtin_neon_vcvtmq_u16_f16:
+ case NEON::BI__builtin_neon_vcvtmq_u32_v:
+ case NEON::BI__builtin_neon_vcvtmq_u64_v:
+ case NEON::BI__builtin_neon_vcvtx_f32_v:
+ case NEON::BI__builtin_neon_vext_v:
+ case NEON::BI__builtin_neon_vextq_v:
+ case NEON::BI__builtin_neon_vfma_v:
+ case NEON::BI__builtin_neon_vfmaq_v:
+ case NEON::BI__builtin_neon_vld1_v:
+ case NEON::BI__builtin_neon_vld1q_v:
+ case NEON::BI__builtin_neon_vld1_x2_v:
+ case NEON::BI__builtin_neon_vld1q_x2_v:
+ case NEON::BI__builtin_neon_vld1_x3_v:
+ case NEON::BI__builtin_neon_vld1q_x3_v:
+ case NEON::BI__builtin_neon_vld1_x4_v:
+ case NEON::BI__builtin_neon_vld1q_x4_v:
+ case NEON::BI__builtin_neon_vld2_v:
+ case NEON::BI__builtin_neon_vld2q_v:
+ case NEON::BI__builtin_neon_vld3_v:
+ case NEON::BI__builtin_neon_vld3q_v:
+ case NEON::BI__builtin_neon_vld4_v:
+ case NEON::BI__builtin_neon_vld4q_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v:
+ case NEON::BI__builtin_neon_vld1_dup_v:
+ case NEON::BI__builtin_neon_vld1q_dup_v:
+ case NEON::BI__builtin_neon_vld2_lane_v:
+ case NEON::BI__builtin_neon_vld2q_lane_v:
+ case NEON::BI__builtin_neon_vld3_lane_v:
+ case NEON::BI__builtin_neon_vld3q_lane_v:
+ case NEON::BI__builtin_neon_vld4_lane_v:
+ case NEON::BI__builtin_neon_vld4q_lane_v:
+ case NEON::BI__builtin_neon_vmovl_v:
+ case NEON::BI__builtin_neon_vmovn_v:
+ case NEON::BI__builtin_neon_vmull_v:
+ case NEON::BI__builtin_neon_vpadal_v:
+ case NEON::BI__builtin_neon_vpadalq_v:
+ case NEON::BI__builtin_neon_vpaddl_v:
+ case NEON::BI__builtin_neon_vpaddlq_v:
+ case NEON::BI__builtin_neon_vqdmlal_v:
+ case NEON::BI__builtin_neon_vqdmlsl_v:
+ case NEON::BI__builtin_neon_vqdmulhq_lane_v:
+ case NEON::BI__builtin_neon_vqdmulh_lane_v:
+ case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
+ case NEON::BI__builtin_neon_vqrdmulh_lane_v:
+ case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
+ case NEON::BI__builtin_neon_vqdmulh_laneq_v:
+ case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
+ case NEON::BI__builtin_neon_vqrdmulh_laneq_v:
+ case NEON::BI__builtin_neon_vqshl_n_v:
+ case NEON::BI__builtin_neon_vqshlq_n_v:
+ case NEON::BI__builtin_neon_vqshlu_n_v:
+ case NEON::BI__builtin_neon_vqshluq_n_v:
+ case NEON::BI__builtin_neon_vrecpe_v:
+ case NEON::BI__builtin_neon_vrecpeq_v:
+ case NEON::BI__builtin_neon_vrsqrte_v:
+ case NEON::BI__builtin_neon_vrsqrteq_v:
+ case NEON::BI__builtin_neon_vrndi_v:
+ case NEON::BI__builtin_neon_vrndiq_v:
+ case NEON::BI__builtin_neon_vrshr_n_v:
+ case NEON::BI__builtin_neon_vrshrq_n_v:
+ case NEON::BI__builtin_neon_vsha512hq_u64:
+ case NEON::BI__builtin_neon_vsha512h2q_u64:
+ case NEON::BI__builtin_neon_vsha512su0q_u64:
+ case NEON::BI__builtin_neon_vsha512su1q_u64:
+ case NEON::BI__builtin_neon_vshl_n_v:
+ case NEON::BI__builtin_neon_vshlq_n_v:
+ case NEON::BI__builtin_neon_vshll_n_v:
+ case NEON::BI__builtin_neon_vshrn_n_v:
+ case NEON::BI__builtin_neon_vshr_n_v:
+ case NEON::BI__builtin_neon_vshrq_n_v:
+ case NEON::BI__builtin_neon_vst1_v:
+ case NEON::BI__builtin_neon_vst1q_v:
+ case NEON::BI__builtin_neon_vst2_v:
+ case NEON::BI__builtin_neon_vst2q_v:
+ case NEON::BI__builtin_neon_vst3_v:
+ case NEON::BI__builtin_neon_vst3q_v:
+ case NEON::BI__builtin_neon_vst4_v:
+ case NEON::BI__builtin_neon_vst4q_v:
+ case NEON::BI__builtin_neon_vst2_lane_v:
+ case NEON::BI__builtin_neon_vst2q_lane_v:
+ case NEON::BI__builtin_neon_vst3_lane_v:
+ case NEON::BI__builtin_neon_vst3q_lane_v:
+ case NEON::BI__builtin_neon_vst4_lane_v:
+ case NEON::BI__builtin_neon_vst4q_lane_v:
+ case NEON::BI__builtin_neon_vsm3partw1q_u32:
+ case NEON::BI__builtin_neon_vsm3partw2q_u32:
+ case NEON::BI__builtin_neon_vsm3ss1q_u32:
+ case NEON::BI__builtin_neon_vsm4ekeyq_u32:
+ case NEON::BI__builtin_neon_vsm4eq_u32:
+ case NEON::BI__builtin_neon_vsm3tt1aq_u32:
+ case NEON::BI__builtin_neon_vsm3tt1bq_u32:
+ case NEON::BI__builtin_neon_vsm3tt2aq_u32:
+ case NEON::BI__builtin_neon_vsm3tt2bq_u32:
+ case NEON::BI__builtin_neon_vst1_x2_v:
+ case NEON::BI__builtin_neon_vst1q_x2_v:
+ case NEON::BI__builtin_neon_vst1_x3_v:
+ case NEON::BI__builtin_neon_vst1q_x3_v:
+ case NEON::BI__builtin_neon_vst1_x4_v:
+ case NEON::BI__builtin_neon_vst1q_x4_v:
+ case NEON::BI__builtin_neon_vsubhn_v:
+ case NEON::BI__builtin_neon_vtrn_v:
+ case NEON::BI__builtin_neon_vtrnq_v:
+ case NEON::BI__builtin_neon_vtst_v:
+ case NEON::BI__builtin_neon_vtstq_v:
+ case NEON::BI__builtin_neon_vuzp_v:
+ case NEON::BI__builtin_neon_vuzpq_v:
+ case NEON::BI__builtin_neon_vxarq_u64:
+ case NEON::BI__builtin_neon_vzip_v:
+ case NEON::BI__builtin_neon_vzipq_v:
+ case NEON::BI__builtin_neon_vdot_s32:
+ case NEON::BI__builtin_neon_vdot_u32:
+ case NEON::BI__builtin_neon_vdotq_s32:
+ case NEON::BI__builtin_neon_vdotq_u32:
+ case NEON::BI__builtin_neon_vfmlal_low_f16:
+ case NEON::BI__builtin_neon_vfmlalq_low_f16:
+ case NEON::BI__builtin_neon_vfmlsl_low_f16:
+ case NEON::BI__builtin_neon_vfmlslq_low_f16:
+ case NEON::BI__builtin_neon_vfmlal_high_f16:
+ case NEON::BI__builtin_neon_vfmlalq_high_f16:
+ case NEON::BI__builtin_neon_vfmlsl_high_f16:
+ case NEON::BI__builtin_neon_vfmlslq_high_f16:
+ case NEON::BI__builtin_neon_vmmlaq_s32:
+ case NEON::BI__builtin_neon_vmmlaq_u32:
+ case NEON::BI__builtin_neon_vusmmlaq_s32:
+ case NEON::BI__builtin_neon_vusdot_s32:
+ case NEON::BI__builtin_neon_vusdotq_s32:
+ case NEON::BI__builtin_neon_vbfdot_f32:
+ case NEON::BI__builtin_neon_vbfdotq_f32:
+ case NEON::BI__builtin_neon___a32_vcvt_bf16_f32:
+ default:
+ cgf.cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ ctx.BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
+
+ cgf.cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ ctx.BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
+ }
}
// Emit an intrinsic where all operands are of the same type as the result.
@@ -243,7 +955,7 @@ static unsigned getSVEMinEltCount(clang::SVETypeFlags::EltType sveType) {
}
}
-// TODO: Share with OGCG
+// TODO(cir): Share with OGCG
constexpr unsigned sveBitsPerBlock = 128;
static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm,
@@ -261,7 +973,7 @@ static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm,
/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
/// should be kept consistent with the logic in Sema.
/// TODO: Make this return false for SISD builtins.
-/// TODO: Share this with ARM.cpp
+/// TODO(cir): Share this with ARM.cpp
static bool hasExtraNeonArgument(unsigned builtinID) {
// Required by the headers included below, but not in this particular
// function.
@@ -290,64 +1002,6 @@ static bool hasExtraNeonArgument(unsigned builtinID) {
return mask != 0;
}
-// TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone.
-static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
- mlir::Location loc,
- bool hasLegalHalfType = true,
- bool v1Ty = false,
- bool allowBFloatArgsAndRet = true) {
- int isQuad = typeFlags.isQuad();
- switch (typeFlags.getEltType()) {
- case NeonTypeFlags::Int8:
- case NeonTypeFlags::Poly8:
- return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty
- : cgf->sInt8Ty,
- v1Ty ? 1 : (8 << isQuad));
- case NeonTypeFlags::MFloat8:
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: MFloat8"));
- [[fallthrough]];
- case NeonTypeFlags::Int16:
- case NeonTypeFlags::Poly16:
- return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty
- : cgf->sInt16Ty,
- v1Ty ? 1 : (4 << isQuad));
- case NeonTypeFlags::BFloat16:
- if (allowBFloatArgsAndRet)
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
- else
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
- [[fallthrough]];
- case NeonTypeFlags::Float16:
- if (hasLegalHalfType)
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
- else
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
- [[fallthrough]];
- case NeonTypeFlags::Int32:
- return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
- : cgf->sInt32Ty,
- v1Ty ? 1 : (2 << isQuad));
- case NeonTypeFlags::Int64:
- case NeonTypeFlags::Poly64:
- return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty
- : cgf->sInt64Ty,
- v1Ty ? 1 : (1 << isQuad));
- case NeonTypeFlags::Poly128:
- // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
- // There is a lot of i128 and f128 API missing.
- // so we use v16i8 to represent poly128 and get pattern matched.
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128"));
- [[fallthrough]];
- case NeonTypeFlags::Float32:
- return cir::VectorType::get(cgf->getCIRGenModule().floatTy,
- v1Ty ? 1 : (2 << isQuad));
- case NeonTypeFlags::Float64:
- return cir::VectorType::get(cgf->getCIRGenModule().doubleTy,
- v1Ty ? 1 : (1 << isQuad));
- }
- llvm_unreachable("Unknown vector element type!");
-}
-
// TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone.
template <typename Operation>
static mlir::Value
@@ -1584,6 +2238,18 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
mlir::Location loc = getLoc(expr->getExprLoc());
+ // Not all intrinsics handled by the common case work for AArch64 yet, so only
+ // defer to common code if it's been added to our special map.
+ const ARMVectorIntrinsicInfo *builtin;
+ builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID,
+ aarch64SIMDIntrinsicsProvenSorted);
+
+ if (builtin)
+ return emitCommonNeonBuiltinExpr(
+ *this, builtin->builtinID, builtin->llvmIntrinsic,
+ builtin->altLLVMIntrinsic, builtin->nameHint, builtin->typeModifier,
+ expr, ops);
+
// Handle non-overloaded intrinsics first.
switch (builtinID) {
default:
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
index 157dc3fdd56f..61ccd85cd634 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
@@ -1021,6 +1021,16 @@ CIRGenTypes::arrangeFunctionDeclaration(const FunctionDecl *fd) {
return arrangeFreeFunctionType(funcTy.castAs<FunctionProtoType>());
}
+RValue CallArg::getRValue(CIRGenFunction &cgf, mlir::Location loc) const {
+ if (!hasLV)
+ return rv;
+ LValue copy = cgf.makeAddrLValue(cgf.createMemTemp(ty, loc), ty);
+ cgf.emitAggregateCopy(copy, lv, ty, AggValueSlot::DoesNotOverlap,
+ lv.isVolatile());
+ isUsed = true;
+ return RValue::getAggregate(copy.getAddress());
+}
+
static cir::CIRCallOpInterface
emitCallLikeOp(CIRGenFunction &cgf, mlir::Location callLoc,
cir::FuncType indirectFuncTy, mlir::Value indirectFuncVal,
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.h b/clang/lib/CIR/CodeGen/CIRGenCall.h
index 347bd4a7c826..b30b4969ca45 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCall.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.h
@@ -202,7 +202,7 @@ private:
/// A data-flow flag to make sure getRValue and/or copyInto are not
/// called twice for duplicated IR emission.
- [[maybe_unused]] mutable bool isUsed;
+ mutable bool isUsed;
public:
clang::QualType ty;
@@ -215,6 +215,10 @@ public:
bool hasLValue() const { return hasLV; }
+ /// \returns an independent RValue. If the CallArg contains an LValue,
+ /// a temporary copy is returned.
+ RValue getRValue(CIRGenFunction &cgf, mlir::Location loc) const;
+
LValue getKnownLValue() const {
assert(hasLV && !isUsed);
return lv;
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
index bdb2947200f2..cbed8452810c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
@@ -95,7 +95,6 @@ void *EHScopeStack::pushCleanup(CleanupKind kind, size_t size) {
bool isLifetimeMarker = kind & LifetimeMarker;
bool skipCleanupScope = false;
- assert(!cir::MissingFeatures::innermostEHScope());
cir::CleanupKind cleanupKind = cir::CleanupKind::All;
if (isEHCleanup && cgf->getLangOpts().Exceptions) {
cleanupKind =
@@ -193,6 +192,25 @@ bool EHScopeStack::requiresCatchOrCleanup() const {
return false;
}
+/// Deactive a cleanup that was created in an active state.
+void CIRGenFunction::deactivateCleanupBlock(EHScopeStack::stable_iterator c,
+ mlir::Operation *dominatingIP) {
+ assert(c != ehStack.stable_end() && "deactivating bottom of stack?");
+ EHCleanupScope &scope = cast<EHCleanupScope>(*ehStack.find(c));
+ assert(scope.isActive() && "double deactivation");
+
+ // If it's the top of the stack, just pop it, but do so only if it belongs
+ // to the current RunCleanupsScope.
+ if (c == ehStack.stable_begin() &&
+ currentCleanupStackDepth.strictlyEncloses(c)) {
+ popCleanupBlock();
+ return;
+ }
+
+ // Otherwise, follow the general case.
+ cgm.errorNYI("deactivateCleanupBlock: setupCleanupBlockActivation");
+}
+
static void emitCleanup(CIRGenFunction &cgf, cir::CleanupScopeOp cleanupScope,
EHScopeStack::Cleanup *cleanup,
EHScopeStack::Cleanup::Flags flags) {
@@ -245,10 +263,11 @@ void CIRGenFunction::popCleanupBlock() {
bool hasFallthrough = fallthroughSource != nullptr && isActive;
bool requiresNormalCleanup = scope.isNormalCleanup() && hasFallthrough;
+ bool requiresEHCleanup = scope.isEHCleanup() && hasFallthrough;
// If we don't need the cleanup at all, we're done.
assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup());
- if (!requiresNormalCleanup) {
+ if (!requiresNormalCleanup && !requiresEHCleanup) {
ehStack.popCleanup();
return;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 97f496c89ab0..35f74e7120b0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -20,6 +20,7 @@
#include "clang/AST/ExprObjC.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/TrailingObjects.h"
using namespace clang;
using namespace clang::CIRGen;
@@ -647,6 +648,209 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
return size;
}
+/// Emit a call to an operator new or operator delete function, as implicitly
+/// created by new-expressions and delete-expressions.
+static RValue emitNewDeleteCall(CIRGenFunction &cgf,
+ const FunctionDecl *calleeDecl,
+ const FunctionProtoType *calleeType,
+ const CallArgList &args) {
+ cir::CIRCallOpInterface callOrTryCall;
+ cir::FuncOp calleePtr = cgf.cgm.getAddrOfFunction(calleeDecl);
+ CIRGenCallee callee =
+ CIRGenCallee::forDirect(calleePtr, GlobalDecl(calleeDecl));
+ RValue rv =
+ cgf.emitCall(cgf.cgm.getTypes().arrangeFreeFunctionCall(args, calleeType),
+ callee, ReturnValueSlot(), args, &callOrTryCall);
+
+ /// C++1y [expr.new]p10:
+ /// [In a new-expression,] an implementation is allowed to omit a call
+ /// to a replaceable global allocation function.
+ ///
+ /// We model such elidable calls with the 'builtin' attribute.
+ assert(!cir::MissingFeatures::attributeBuiltin());
+ return rv;
+}
+
+RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type,
+ const CallExpr *callExpr,
+ OverloadedOperatorKind op) {
+ CallArgList args;
+ emitCallArgs(args, type, callExpr->arguments());
+ // Find the allocation or deallocation function that we're calling.
+ ASTContext &astContext = getContext();
+ assert(op == OO_New || op == OO_Delete);
+ DeclarationName name = astContext.DeclarationNames.getCXXOperatorName(op);
+
+ clang::DeclContextLookupResult lookupResult =
+ astContext.getTranslationUnitDecl()->lookup(name);
+ for (const NamedDecl *decl : lookupResult) {
+ if (const auto *funcDecl = dyn_cast<FunctionDecl>(decl)) {
+ if (astContext.hasSameType(funcDecl->getType().getTypePtr(), type)) {
+ if (sanOpts.has(SanitizerKind::AllocToken)) {
+ // TODO: Set !alloc_token metadata.
+ assert(!cir::MissingFeatures::allocToken());
+ cgm.errorNYI("Alloc token sanitizer not yet supported!");
+ }
+
+ // Emit the call to operator new/delete.
+ return emitNewDeleteCall(*this, funcDecl, type, args);
+ }
+ }
+ }
+
+ llvm_unreachable("predeclared global operator new/delete is missing");
+}
+
+namespace {
+template <typename Traits> struct PlacementArg {
+ typename Traits::RValueTy argValue;
+ QualType argType;
+};
+
+/// A cleanup to call the given 'operator delete' function upon abnormal
+/// exit from a new expression. Templated on a traits type that deals with
+/// ensuring that the arguments dominate the cleanup if necessary.
+template <typename Traits>
+class CallDeleteDuringNew final
+ : public EHScopeStack::Cleanup,
+ private llvm::TrailingObjects<CallDeleteDuringNew<Traits>,
+ PlacementArg<Traits>> {
+ using TrailingObj =
+ llvm::TrailingObjects<CallDeleteDuringNew<Traits>, PlacementArg<Traits>>;
+ friend TrailingObj;
+ using TrailingObj::getTrailingObjects;
+
+ /// Type used to hold llvm::Value*s.
+ typedef typename Traits::ValueTy ValueTy;
+ /// Type used to hold RValues.
+ typedef typename Traits::RValueTy RValueTy;
+
+ unsigned numPlacementArgs : 30;
+ LLVM_PREFERRED_TYPE(AlignedAllocationMode)
+ unsigned passAlignmentToPlacementDelete : 1;
+ const FunctionDecl *operatorDelete;
+ ValueTy ptr;
+ ValueTy allocSize;
+ CharUnits allocAlign;
+
+ PlacementArg<Traits> *getPlacementArgs() { return getTrailingObjects(); }
+
+ void setPlacementArg(unsigned i, RValueTy argValue, QualType argType) {
+ assert(i < numPlacementArgs && "index out of range");
+ getPlacementArgs()[i] = {argValue, argType};
+ }
+
+public:
+ static size_t getExtraSize(size_t numPlacementArgs) {
+ return TrailingObj::template additionalSizeToAlloc<PlacementArg<Traits>>(
+ numPlacementArgs);
+ }
+
+ CallDeleteDuringNew(size_t numPlacementArgs,
+ const FunctionDecl *operatorDelete, ValueTy ptr,
+ ValueTy allocSize,
+ const ImplicitAllocationParameters &iap,
+ CharUnits allocAlign, const CallArgList *newArgs,
+ unsigned numNonPlacementArgs, CIRGenFunction *cgf,
+ mlir::Location loc)
+ : numPlacementArgs(numPlacementArgs),
+ passAlignmentToPlacementDelete(isAlignedAllocation(iap.PassAlignment)),
+ operatorDelete(operatorDelete), ptr(ptr), allocSize(allocSize),
+ allocAlign(allocAlign) {
+ for (unsigned i = 0, n = numPlacementArgs; i != n; ++i) {
+ const CallArg &arg = (*newArgs)[i + numNonPlacementArgs];
+ setPlacementArg(i, arg.getRValue(*cgf, loc), arg.ty);
+ }
+ }
+
+ void emit(CIRGenFunction &cgf, Flags flags) override {
+ const auto *fpt = operatorDelete->getType()->castAs<FunctionProtoType>();
+ CallArgList deleteArgs;
+
+ unsigned firstNonTypeArg = 0;
+ TypeAwareAllocationMode typeAwareDeallocation = TypeAwareAllocationMode::No;
+ assert(!cir::MissingFeatures::typeAwareAllocation());
+
+ // The first argument after type-identity parameter (if any) is always
+ // a void* (or C* for a destroying operator delete for class type C).
+ deleteArgs.add(Traits::get(cgf, ptr), fpt->getParamType(firstNonTypeArg));
+
+ // Figure out what other parameters we should be implicitly passing.
+ UsualDeleteParams params;
+ if (numPlacementArgs) {
+ // A placement deallocation function is implicitly passed an alignment
+ // if the placement allocation function was, but is never passed a size.
+ params.Alignment =
+ alignedAllocationModeFromBool(passAlignmentToPlacementDelete);
+ params.TypeAwareDelete = typeAwareDeallocation;
+ params.Size = isTypeAwareAllocation(params.TypeAwareDelete);
+ } else {
+ // For a non-placement new-expression, 'operator delete' can take a
+ // size and/or an alignment if it has the right parameters.
+ params = operatorDelete->getUsualDeleteParams();
+ }
+
+ assert(!params.DestroyingDelete &&
+ "should not call destroying delete in a new-expression");
+
+ // The second argument can be a std::size_t (for non-placement delete).
+ if (params.Size)
+ deleteArgs.add(Traits::get(cgf, allocSize),
+ cgf.getContext().getSizeType());
+
+ // The next (second or third) argument can be a std::align_val_t, which
+ // is an enum whose underlying type is std::size_t.
+ // FIXME: Use the right type as the parameter type. Note that in a call
+ // to operator delete(size_t, ...), we may not have it available.
+ if (isAlignedAllocation(params.Alignment))
+ cgf.cgm.errorNYI("CallDeleteDuringNew: aligned allocation");
+
+ // Pass the rest of the arguments, which must match exactly.
+ for (unsigned i = 0; i != numPlacementArgs; ++i) {
+ auto arg = getPlacementArgs()[i];
+ deleteArgs.add(Traits::get(cgf, arg.argValue), arg.argType);
+ }
+
+ // Call 'operator delete'.
+ emitNewDeleteCall(cgf, operatorDelete, fpt, deleteArgs);
+ }
+};
+} // namespace
+
+/// Enter a cleanup to call 'operator delete' if the initializer in a
+/// new-expression throws.
+static void enterNewDeleteCleanup(CIRGenFunction &cgf, const CXXNewExpr *e,
+ Address newPtr, mlir::Value allocSize,
+ CharUnits allocAlign,
+ const CallArgList &newArgs) {
+ unsigned numNonPlacementArgs = e->getNumImplicitArgs();
+
+ // If we're not inside a conditional branch, then the cleanup will
+ // dominate and we can do the easier (and more efficient) thing.
+ if (!cgf.isInConditionalBranch()) {
+ struct DirectCleanupTraits {
+ typedef mlir::Value ValueTy;
+ typedef RValue RValueTy;
+ static RValue get(CIRGenFunction &, ValueTy v) { return RValue::get(v); }
+ static RValue get(CIRGenFunction &, RValueTy v) { return v; }
+ };
+
+ typedef CallDeleteDuringNew<DirectCleanupTraits> DirectCleanup;
+
+ assert(!cir::MissingFeatures::typeAwareAllocation());
+ cgf.ehStack.pushCleanupWithExtra<DirectCleanup>(
+ EHCleanup, e->getNumPlacementArgs(), e->getOperatorDelete(),
+ newPtr.getPointer(), allocSize, e->implicitAllocationParameters(),
+ allocAlign, &newArgs, numNonPlacementArgs, &cgf,
+ cgf.getLoc(e->getSourceRange()));
+
+ return;
+ }
+
+ cgf.cgm.errorNYI(e->getSourceRange(),
+ "enterNewDeleteCleanup: conditional branch");
+}
+
static void storeAnyExprIntoOneUnit(CIRGenFunction &cgf, const Expr *init,
QualType allocType, Address newPtr,
AggValueSlot::Overlap_t mayOverlap) {
@@ -912,59 +1116,6 @@ RValue CIRGenFunction::emitCXXPseudoDestructorExpr(
return RValue::get(nullptr);
}
-/// Emit a call to an operator new or operator delete function, as implicitly
-/// created by new-expressions and delete-expressions.
-static RValue emitNewDeleteCall(CIRGenFunction &cgf,
- const FunctionDecl *calleeDecl,
- const FunctionProtoType *calleeType,
- const CallArgList &args) {
- cir::CIRCallOpInterface callOrTryCall;
- cir::FuncOp calleePtr = cgf.cgm.getAddrOfFunction(calleeDecl);
- CIRGenCallee callee =
- CIRGenCallee::forDirect(calleePtr, GlobalDecl(calleeDecl));
- RValue rv =
- cgf.emitCall(cgf.cgm.getTypes().arrangeFreeFunctionCall(args, calleeType),
- callee, ReturnValueSlot(), args, &callOrTryCall);
-
- /// C++1y [expr.new]p10:
- /// [In a new-expression,] an implementation is allowed to omit a call
- /// to a replaceable global allocation function.
- ///
- /// We model such elidable calls with the 'builtin' attribute.
- assert(!cir::MissingFeatures::attributeBuiltin());
- return rv;
-}
-
-RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type,
- const CallExpr *callExpr,
- OverloadedOperatorKind op) {
- CallArgList args;
- emitCallArgs(args, type, callExpr->arguments());
- // Find the allocation or deallocation function that we're calling.
- ASTContext &astContext = getContext();
- assert(op == OO_New || op == OO_Delete);
- DeclarationName name = astContext.DeclarationNames.getCXXOperatorName(op);
-
- clang::DeclContextLookupResult lookupResult =
- astContext.getTranslationUnitDecl()->lookup(name);
- for (const auto *decl : lookupResult) {
- if (const auto *funcDecl = dyn_cast<FunctionDecl>(decl)) {
- if (astContext.hasSameType(funcDecl->getType(), QualType(type, 0))) {
- if (sanOpts.has(SanitizerKind::AllocToken)) {
- // TODO: Set !alloc_token metadata.
- assert(!cir::MissingFeatures::allocToken());
- cgm.errorNYI("Alloc token sanitizer not yet supported!");
- }
-
- // Emit the call to operator new/delete.
- return emitNewDeleteCall(*this, funcDecl, type, args);
- }
- }
- }
-
- llvm_unreachable("predeclared global operator new/delete is missing");
-}
-
namespace {
/// Calls the given 'operator delete' on a single object.
struct CallObjectDelete final : EHScopeStack::Cleanup {
@@ -1190,10 +1341,24 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: null check");
// If there's an operator delete, enter a cleanup to call it if an
- // exception is thrown.
- if (e->getOperatorDelete() &&
- !e->getOperatorDelete()->isReservedGlobalPlacementOperator())
- cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: operator delete");
+ // exception is thrown. If we do this, we'll be creating the result pointer
+ // inside a cleanup scope, either with a bitcast or an offset based on the
+ // array cookie size. However, we need to return that pointer from outside
+ // the cleanup scope, so we need to store it in a temporary variable.
+ bool useNewDeleteCleanup =
+ e->getOperatorDelete() &&
+ !e->getOperatorDelete()->isReservedGlobalPlacementOperator();
+ EHScopeStack::stable_iterator operatorDeleteCleanup;
+ mlir::Operation *cleanupDominator = nullptr;
+ if (useNewDeleteCleanup) {
+ assert(!cir::MissingFeatures::typeAwareAllocation());
+ enterNewDeleteCleanup(*this, e, allocation, allocSize, allocAlign,
+ allocatorArgs);
+ operatorDeleteCleanup = ehStack.stable_begin();
+ cleanupDominator =
+ cir::UnreachableOp::create(builder, getLoc(e->getSourceRange()))
+ .getOperation();
+ }
if (allocSize != allocSizeWithoutCookie) {
assert(e->isArray());
@@ -1212,6 +1377,16 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
Address result = builder.createElementBitCast(getLoc(e->getSourceRange()),
allocation, elementTy);
+ // If we're inside a new delete cleanup, store the result pointer.
+ Address resultPtr = Address::invalid();
+ if (useNewDeleteCleanup) {
+ resultPtr =
+ createTempAlloca(builder.getPointerTo(elementTy), result.getAlignment(),
+ getLoc(e->getSourceRange()), "__new_result");
+ builder.createStore(getLoc(e->getSourceRange()), result.getPointer(),
+ resultPtr);
+ }
+
// Passing pointer through launder.invariant.group to avoid propagation of
// vptrs information which may be included in previous type.
// To not break LTO with different optimizations levels, we do it regardless
@@ -1224,6 +1399,21 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
emitNewInitializer(*this, e, allocType, elementTy, result, numElements,
allocSizeWithoutCookie);
+
+ // Deactivate the 'operator delete' cleanup if we finished
+ // initialization.
+ if (useNewDeleteCleanup) {
+ assert(operatorDeleteCleanup.isValid());
+ assert(resultPtr.isValid());
+ deactivateCleanupBlock(operatorDeleteCleanup, cleanupDominator);
+ cleanupDominator->erase();
+ cir::LoadOp loadResult =
+ builder.createLoad(getLoc(e->getSourceRange()), resultPtr);
+ result = result.withPointer(loadResult.getResult());
+ }
+
+ assert(!cir::MissingFeatures::exprNewNullCheck());
+
return result.getPointer();
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 539d7839d1df..0e82958ef6f3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -970,6 +970,16 @@ public:
ArrayRef<mlir::Value *> valuesToReload = {});
void popCleanupBlock();
+ /// Deactivates the given cleanup block. The block cannot be reactivated. Pops
+ /// it if it's the top of the stack.
+ ///
+ /// \param DominatingIP - An instruction which is known to
+ /// dominate the current IP (if set) and which lies along
+ /// all paths of execution between the current IP and the
+ /// the point at which the cleanup comes into scope.
+ void deactivateCleanupBlock(EHScopeStack::stable_iterator cleanup,
+ mlir::Operation *dominatingIP);
+
/// Push a cleanup to be run at the end of the current full-expression. Safe
/// against the possibility that we're currently inside a
/// conditionally-evaluated expression.
diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h
index 9d614c858dbe..09b78820a258 100644
--- a/clang/lib/CIR/CodeGen/EHScopeStack.h
+++ b/clang/lib/CIR/CodeGen/EHScopeStack.h
@@ -187,6 +187,25 @@ public:
[[maybe_unused]] Cleanup *obj = new (buffer) T(a...);
}
+ /// Push a cleanup with non-constant storage requirements on the
+ /// stack. The cleanup type must provide an additional static method:
+ /// static size_t getExtraSize(size_t);
+ /// The argument to this method will be the value N, which will also
+ /// be passed as the first argument to the constructor.
+ ///
+ /// The data stored in the extra storage must obey the same
+ /// restrictions as normal cleanup member data.
+ ///
+ /// The pointer returned from this method is valid until the cleanup
+ /// stack is modified.
+ template <class T, class... As>
+ T *pushCleanupWithExtra(CleanupKind kind, size_t n, As... a) {
+ static_assert(alignof(T) <= ScopeStackAlignment,
+ "Cleanup's alignment is too large.");
+ void *buffer = pushCleanup(kind, sizeof(T) + T::getExtraSize(n));
+ return new (buffer) T(n, a...);
+ }
+
void setCGF(CIRGenFunction *inCGF) { cgf = inCGF; }
/// Pops a cleanup scope off the stack. This is private to CIRGenCleanup.cpp.
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 47b7e2b18d94..d6687b8e295e 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -662,6 +662,54 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return Builder.CreateIntrinsic(
RetTy, CGM.getHLSLRuntime().getSampleCmpLevelZeroIntrinsic(), Args);
}
+ case Builtin::BI__builtin_hlsl_resource_gather: {
+ Value *HandleOp = EmitScalarExpr(E->getArg(0));
+ Value *SamplerOp = EmitScalarExpr(E->getArg(1));
+ Value *CoordOp = EmitScalarExpr(E->getArg(2));
+ Value *ComponentOp = EmitScalarExpr(E->getArg(3));
+ if (ComponentOp->getType() != Builder.getInt32Ty())
+ ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(),
+ /*isSigned=*/false);
+
+ SmallVector<Value *, 5> Args;
+ Args.push_back(HandleOp);
+ Args.push_back(SamplerOp);
+ Args.push_back(CoordOp);
+ Args.push_back(ComponentOp);
+ Args.push_back(emitHlslOffset(*this, E, 4));
+
+ llvm::Type *RetTy = ConvertType(E->getType());
+ return Builder.CreateIntrinsic(
+ RetTy, CGM.getHLSLRuntime().getGatherIntrinsic(), Args);
+ }
+ case Builtin::BI__builtin_hlsl_resource_gather_cmp: {
+ Value *HandleOp = EmitScalarExpr(E->getArg(0));
+ Value *SamplerOp = EmitScalarExpr(E->getArg(1));
+ Value *CoordOp = EmitScalarExpr(E->getArg(2));
+ Value *CompareOp = EmitScalarExpr(E->getArg(3));
+ if (CompareOp->getType() != Builder.getFloatTy())
+ CompareOp = Builder.CreateFPCast(CompareOp, Builder.getFloatTy());
+
+ SmallVector<Value *, 6> Args;
+ Args.push_back(HandleOp);
+ Args.push_back(SamplerOp);
+ Args.push_back(CoordOp);
+ Args.push_back(CompareOp);
+
+ if (CGM.getTarget().getTriple().isDXIL()) {
+ Value *ComponentOp = EmitScalarExpr(E->getArg(4));
+ if (ComponentOp->getType() != Builder.getInt32Ty())
+ ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(),
+ /*isSigned=*/false);
+ Args.push_back(ComponentOp);
+ }
+
+ Args.push_back(emitHlslOffset(*this, E, 5));
+
+ llvm::Type *RetTy = ConvertType(E->getType());
+ return Builder.CreateIntrinsic(
+ RetTy, CGM.getHLSLRuntime().getGatherCmpIntrinsic(), Args);
+ }
case Builtin::BI__builtin_hlsl_resource_load_with_status:
case Builtin::BI__builtin_hlsl_resource_load_with_status_typed: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index aa5fed1ad751..466c809fdef7 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -175,6 +175,8 @@ public:
GENERATE_HLSL_INTRINSIC_FUNCTION(SampleCmpClamp, resource_samplecmp_clamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(SampleCmpLevelZero,
resource_samplecmplevelzero)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(Gather, resource_gather)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(GatherCmp, resource_gather_cmp)
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding,
resource_handlefrombinding)
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromImplicitBinding,
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 0658ecc93d88..ad31ecc75b01 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -19,6 +19,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/Expr.h"
#include "clang/AST/Stmt.h"
+#include "clang/AST/StmtSYCL.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/DiagnosticSema.h"
@@ -99,6 +100,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::SEHExceptStmtClass:
case Stmt::SEHFinallyStmtClass:
case Stmt::MSDependentExistsStmtClass:
+ case Stmt::UnresolvedSYCLKernelCallStmtClass:
llvm_unreachable("invalid statement class to emit generically");
case Stmt::NullStmtClass:
case Stmt::CompoundStmtClass:
@@ -543,21 +545,7 @@ bool CodeGenFunction::EmitSimpleStmt(const Stmt *S,
EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S));
break;
case Stmt::SYCLKernelCallStmtClass:
- // SYCL kernel call statements are generated as wrappers around the body
- // of functions declared with the sycl_kernel_entry_point attribute. Such
- // functions are used to specify how a SYCL kernel (a function object) is
- // to be invoked; the SYCL kernel call statement contains a transformed
- // variation of the function body and is used to generate a SYCL kernel
- // caller function; a function that serves as the device side entry point
- // used to execute the SYCL kernel. The sycl_kernel_entry_point attributed
- // function is invoked by host code in order to trigger emission of the
- // device side SYCL kernel caller function and to generate metadata needed
- // by SYCL run-time library implementations; the function is otherwise
- // intended to have no effect. As such, the function body is not evaluated
- // as part of the invocation during host compilation (and the function
- // should not be called or emitted during device compilation); the SYCL
- // kernel call statement is thus handled as a null statement for the
- // purpose of code generation.
+ EmitSYCLKernelCallStmt(cast<SYCLKernelCallStmt>(*S));
break;
}
return true;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ae2956eeac57..c3d470b179dc 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3675,6 +3675,8 @@ public:
LValue EmitCoyieldLValue(const CoyieldExpr *E);
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID);
+ void EmitSYCLKernelCallStmt(const SYCLKernelCallStmt &S);
+
void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);
void ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);
diff --git a/clang/lib/CodeGen/CodeGenSYCL.cpp b/clang/lib/CodeGen/CodeGenSYCL.cpp
index 7d66d96ad0a1..5a52675de299 100644
--- a/clang/lib/CodeGen/CodeGenSYCL.cpp
+++ b/clang/lib/CodeGen/CodeGenSYCL.cpp
@@ -13,10 +13,23 @@
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include <cassert>
using namespace clang;
using namespace CodeGen;
+void CodeGenFunction::EmitSYCLKernelCallStmt(const SYCLKernelCallStmt &S) {
+ // SYCLKernelCallStmt instances are only injected in the definitions of
+ // functions declared with the sycl_kernel_entry_point attribute. ODR-use of
+ // such a function in code emitted during device compilation should be
+ // diagnosed. Thus, any attempt to emit a SYCLKernelCallStmt during device
+ // compilation indicates a missing diagnostic.
+ assert(!getLangOpts().SYCLIsDevice &&
+ "Attempt to emit a SYCL kernel call statement during device"
+ " compilation");
+ EmitStmt(S.getKernelLaunchStmt());
+}
+
static void SetSYCLKernelAttributes(llvm::Function *Fn, CodeGenFunction &CGF) {
// SYCL 2020 device language restrictions require forward progress and
// disallow recursion.
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index bff1ed3d2ec1..72d5cb804011 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -89,6 +89,11 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
+ // Do not emit __oclc_ABI_version references with non-empty environment.
+ if (Cov == CodeObjectVersionKind::COV_None &&
+ CGF.getTarget().getTriple().hasEnvironment())
+ Cov = CodeObjectVersionKind::COV_6;
+
if (Cov == CodeObjectVersionKind::COV_None) {
StringRef Name = "__oclc_ABI_version";
auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 45c717d6c5ba..cdee440a5c60 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -534,6 +534,11 @@ Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
return Builder.CreateAShr(Vec, Shift, name);
}
+//===----------------------------------------------------------------------===//
+// Intrinsics maps
+//
+// Maps that help automate code-generation.
+//===----------------------------------------------------------------------===//
enum {
AddRetType = (1 << 0),
Add1ArgType = (1 << 1),
@@ -556,6 +561,12 @@ enum {
AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
};
+//===----------------------------------------------------------------------===//
+// Intrinsic maps
+//
+// Maps that help automate code-generation.
+//===----------------------------------------------------------------------===//
+
namespace {
struct ARMVectorIntrinsicInfo {
const char *NameHint;
@@ -1654,6 +1665,8 @@ static bool AArch64SISDIntrinsicsProvenSorted = false;
static bool AArch64SVEIntrinsicsProvenSorted = false;
static bool AArch64SMEIntrinsicsProvenSorted = false;
+// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
+// the corresponding info struct.
static const ARMVectorIntrinsicInfo *
findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
unsigned BuiltinID, bool &MapProvenSorted) {
@@ -1783,7 +1796,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
const char *NameHint, unsigned Modifier, const CallExpr *E,
SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
llvm::Triple::ArchType Arch) {
- // Get the last argument, which specifies the vector type.
+
+ // Extract the trailing immediate argument that encodes the type discriminator
+ // for this overloaded intrinsic.
+ // TODO: Move to the parent code that takes care of argument processing.
const Expr *Arg = E->getArg(E->getNumArgs() - 1);
std::optional<llvm::APSInt> NeonTypeConst =
Arg->getIntegerConstantExpr(getContext());
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index afa1884d94b7..420340aaab88 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3082,7 +3082,7 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
InputTypeArg->claim();
// stdin must be handled specially.
- if (memcmp(Value, "-", 2) == 0) {
+ if (strcmp(Value, "-") == 0) {
if (IsFlangMode()) {
Ty = types::TY_Fortran;
} else if (IsDXCMode()) {
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index dd774f7319bb..60914d9b2cbc 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -1214,6 +1214,9 @@ std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompileImpl(
// Make a copy for the new instance.
Instance.FailedModules = FailedModules;
+ // Pass along the GenModuleActionWrapper callback.
+ Instance.setGenModuleActionWrapper(getGenModuleActionWrapper());
+
if (GetDependencyDirectives)
Instance.GetDependencyDirectives =
GetDependencyDirectives->cloneFor(Instance.getFileManager());
@@ -1268,8 +1271,14 @@ bool CompilerInstance::compileModule(SourceLocation ImportLoc,
// thread so that we get a stack large enough.
bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnNewStack(
[&]() {
- GenerateModuleFromModuleMapAction Action;
- Instance.ExecuteAction(Action);
+ std::unique_ptr<FrontendAction> Action =
+ std::make_unique<GenerateModuleFromModuleMapAction>();
+
+ if (auto WrapGenModuleAction = Instance.getGenModuleActionWrapper())
+ Action = WrapGenModuleAction(Instance.getFrontendOpts(),
+ std::move(Action));
+
+ Instance.ExecuteAction(*Action);
},
DesiredStackSize);
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index f03b14058db4..492f7b1742be 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -476,6 +476,10 @@ private:
return "TypeAliasTemplateInstantiation";
case CodeSynthesisContext::PartialOrderingTTP:
return "PartialOrderingTTP";
+ case CodeSynthesisContext::SYCLKernelLaunchLookup:
+ return "SYCLKernelLaunchLookup";
+ case CodeSynthesisContext::SYCLKernelLaunchOverloadResolution:
+ return "SYCLKernelLaunchOverloadResolution";
}
return "";
}
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
index 91b34838d572..f99c16c8fe92 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
@@ -16,6 +16,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/HLSLResource.h"
#include "clang/AST/Stmt.h"
@@ -1304,7 +1305,7 @@ BuiltinTypeDeclBuilder &
BuiltinTypeDeclBuilder::addSampleMethods(ResourceDimension Dim) {
assert(!Record->isCompleteDefinition() && "record is already complete");
ASTContext &AST = Record->getASTContext();
- QualType ReturnType = getFirstTemplateTypeParam();
+ QualType ReturnType = getHandleElementType();
QualType SamplerStateType =
lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext());
uint32_t VecSize = getResourceDimensions(Dim);
@@ -1352,7 +1353,7 @@ BuiltinTypeDeclBuilder &
BuiltinTypeDeclBuilder::addSampleBiasMethods(ResourceDimension Dim) {
assert(!Record->isCompleteDefinition() && "record is already complete");
ASTContext &AST = Record->getASTContext();
- QualType ReturnType = getFirstTemplateTypeParam();
+ QualType ReturnType = getHandleElementType();
QualType SamplerStateType =
lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext());
uint32_t VecSize = getResourceDimensions(Dim);
@@ -1404,7 +1405,7 @@ BuiltinTypeDeclBuilder &
BuiltinTypeDeclBuilder::addSampleGradMethods(ResourceDimension Dim) {
assert(!Record->isCompleteDefinition() && "record is already complete");
ASTContext &AST = Record->getASTContext();
- QualType ReturnType = getFirstTemplateTypeParam();
+ QualType ReturnType = getHandleElementType();
QualType SamplerStateType =
lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext());
uint32_t VecSize = getResourceDimensions(Dim);
@@ -1461,7 +1462,7 @@ BuiltinTypeDeclBuilder &
BuiltinTypeDeclBuilder::addSampleLevelMethods(ResourceDimension Dim) {
assert(!Record->isCompleteDefinition() && "record is already complete");
ASTContext &AST = Record->getASTContext();
- QualType ReturnType = getFirstTemplateTypeParam();
+ QualType ReturnType = getHandleElementType();
QualType SamplerStateType =
lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext());
uint32_t VecSize = getResourceDimensions(Dim);
@@ -1588,6 +1589,129 @@ BuiltinTypeDeclBuilder::addSampleCmpLevelZeroMethods(ResourceDimension Dim) {
.finalize();
}
+QualType BuiltinTypeDeclBuilder::getGatherReturnType() {
+ ASTContext &AST = SemaRef.getASTContext();
+ QualType T = getHandleElementType();
+ if (T.isNull())
+ return QualType();
+
+ if (const auto *VT = T->getAs<VectorType>())
+ T = VT->getElementType();
+ else if (const auto *DT = T->getAs<DependentSizedExtVectorType>())
+ T = DT->getElementType();
+
+ return AST.getExtVectorType(T, 4);
+}
+
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addGatherMethods(ResourceDimension Dim) {
+ assert(!Record->isCompleteDefinition() && "record is already complete");
+ ASTContext &AST = Record->getASTContext();
+ QualType ReturnType = getGatherReturnType();
+
+ QualType SamplerStateType =
+ lookupBuiltinType(SemaRef, "SamplerState", Record->getDeclContext());
+ uint32_t VecSize = getResourceDimensions(Dim);
+ QualType LocationTy = AST.FloatTy;
+ QualType Float2Ty = AST.getExtVectorType(LocationTy, VecSize);
+ QualType IntTy = AST.IntTy;
+ QualType OffsetTy = AST.getExtVectorType(IntTy, VecSize);
+ using PH = BuiltinTypeMethodBuilder::PlaceHolder;
+
+ // Overloads for Gather, GatherRed, GatherGreen, GatherBlue, GatherAlpha
+ struct GatherVariant {
+ const char *Name;
+ int Component;
+ };
+ GatherVariant Variants[] = {{"Gather", 0},
+ {"GatherRed", 0},
+ {"GatherGreen", 1},
+ {"GatherBlue", 2},
+ {"GatherAlpha", 3}};
+
+ for (const auto &V : Variants) {
+ // ret GatherVariant(SamplerState s, float2 location)
+ BuiltinTypeMethodBuilder(*this, V.Name, ReturnType)
+ .addParam("Sampler", SamplerStateType)
+ .addParam("Location", Float2Ty)
+ .accessHandleFieldOnResource(PH::_0)
+ .callBuiltin("__builtin_hlsl_resource_gather", ReturnType, PH::Handle,
+ PH::LastStmt, PH::_1,
+ getConstantUnsignedIntExpr(V.Component))
+ .finalize();
+
+ // ret GatherVariant(SamplerState s, float2 location, int2 offset)
+ BuiltinTypeMethodBuilder(*this, V.Name, ReturnType)
+ .addParam("Sampler", SamplerStateType)
+ .addParam("Location", Float2Ty)
+ .addParam("Offset", OffsetTy)
+ .accessHandleFieldOnResource(PH::_0)
+ .callBuiltin("__builtin_hlsl_resource_gather", ReturnType, PH::Handle,
+ PH::LastStmt, PH::_1,
+ getConstantUnsignedIntExpr(V.Component), PH::_2)
+ .finalize();
+ }
+
+ return *this;
+}
+
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addGatherCmpMethods(ResourceDimension Dim) {
+ assert(!Record->isCompleteDefinition() && "record is already complete");
+ ASTContext &AST = Record->getASTContext();
+ QualType ReturnType = AST.getExtVectorType(AST.FloatTy, 4);
+
+ QualType SamplerComparisonStateType = lookupBuiltinType(
+ SemaRef, "SamplerComparisonState", Record->getDeclContext());
+ uint32_t VecSize = getResourceDimensions(Dim);
+ QualType FloatTy = AST.FloatTy;
+ QualType Float2Ty = AST.getExtVectorType(FloatTy, VecSize);
+ QualType IntTy = AST.IntTy;
+ QualType Int2Ty = AST.getExtVectorType(IntTy, VecSize);
+ using PH = BuiltinTypeMethodBuilder::PlaceHolder;
+
+ // Overloads for GatherCmp, GatherCmpRed, GatherCmpGreen, GatherCmpBlue,
+ // GatherCmpAlpha
+ struct GatherVariant {
+ const char *Name;
+ int Component;
+ };
+ GatherVariant Variants[] = {{"GatherCmp", 0},
+ {"GatherCmpRed", 0},
+ {"GatherCmpGreen", 1},
+ {"GatherCmpBlue", 2},
+ {"GatherCmpAlpha", 3}};
+
+ for (const auto &V : Variants) {
+ // ret GatherCmpVariant(SamplerComparisonState s, float2 location, float
+ // compare_value)
+ BuiltinTypeMethodBuilder(*this, V.Name, ReturnType)
+ .addParam("Sampler", SamplerComparisonStateType)
+ .addParam("Location", Float2Ty)
+ .addParam("CompareValue", FloatTy)
+ .accessHandleFieldOnResource(PH::_0)
+ .callBuiltin("__builtin_hlsl_resource_gather_cmp", ReturnType,
+ PH::Handle, PH::LastStmt, PH::_1, PH::_2,
+ getConstantUnsignedIntExpr(V.Component))
+ .finalize();
+
+ // ret GatherCmpVariant(SamplerComparisonState s, float2 location, float
+ // compare_value, int2 offset)
+ BuiltinTypeMethodBuilder(*this, V.Name, ReturnType)
+ .addParam("Sampler", SamplerComparisonStateType)
+ .addParam("Location", Float2Ty)
+ .addParam("CompareValue", FloatTy)
+ .addParam("Offset", Int2Ty)
+ .accessHandleFieldOnResource(PH::_0)
+ .callBuiltin("__builtin_hlsl_resource_gather_cmp", ReturnType,
+ PH::Handle, PH::LastStmt, PH::_1, PH::_2,
+ getConstantUnsignedIntExpr(V.Component), PH::_3)
+ .finalize();
+ }
+
+ return *this;
+}
+
FieldDecl *BuiltinTypeDeclBuilder::getResourceHandleField() const {
auto I = Fields.find("__handle");
assert(I != Fields.end() &&
@@ -1616,6 +1740,14 @@ QualType BuiltinTypeDeclBuilder::getFirstTemplateTypeParam() {
QualType BuiltinTypeDeclBuilder::getHandleElementType() {
if (Template)
return getFirstTemplateTypeParam();
+
+ if (auto *PartialSpec =
+ dyn_cast<ClassTemplatePartialSpecializationDecl>(Record)) {
+ const auto &Args = PartialSpec->getTemplateArgs();
+ if (Args.size() > 0 && Args[0].getKind() == TemplateArgument::Type)
+ return Args[0].getAsType();
+ }
+
// TODO: Should we default to VoidTy? Using `i8` is arguably ambiguous.
return SemaRef.getASTContext().Char8Ty;
}
@@ -1642,6 +1774,13 @@ Expr *BuiltinTypeDeclBuilder::getConstantIntExpr(int value) {
SourceLocation());
}
+Expr *BuiltinTypeDeclBuilder::getConstantUnsignedIntExpr(unsigned value) {
+ ASTContext &AST = SemaRef.getASTContext();
+ return IntegerLiteral::Create(
+ AST, llvm::APInt(AST.getTypeSize(AST.UnsignedIntTy), value),
+ AST.UnsignedIntTy, SourceLocation());
+}
+
BuiltinTypeDeclBuilder &
BuiltinTypeDeclBuilder::addSimpleTemplateParams(ArrayRef<StringRef> Names,
ConceptDecl *CD = nullptr) {
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
index fcb61731c541..c27ff30c6ff7 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
@@ -100,6 +100,8 @@ public:
BuiltinTypeDeclBuilder &addSampleLevelMethods(ResourceDimension Dim);
BuiltinTypeDeclBuilder &addSampleCmpMethods(ResourceDimension Dim);
BuiltinTypeDeclBuilder &addSampleCmpLevelZeroMethods(ResourceDimension Dim);
+ BuiltinTypeDeclBuilder &addGatherMethods(ResourceDimension Dim);
+ BuiltinTypeDeclBuilder &addGatherCmpMethods(ResourceDimension Dim);
BuiltinTypeDeclBuilder &addIncrementCounterMethod();
BuiltinTypeDeclBuilder &addDecrementCounterMethod();
BuiltinTypeDeclBuilder &addHandleAccessFunction(DeclarationName &Name,
@@ -132,11 +134,13 @@ private:
BuiltinTypeDeclBuilder &
addCounterHandleMember(ResourceClass RC, bool IsROV, bool RawBuffer,
AccessSpecifier Access = AccessSpecifier::AS_private);
+ QualType getGatherReturnType();
FieldDecl *getResourceHandleField() const;
FieldDecl *getResourceCounterHandleField() const;
QualType getFirstTemplateTypeParam();
QualType getHandleElementType();
Expr *getConstantIntExpr(int value);
+ Expr *getConstantUnsignedIntExpr(unsigned value);
HLSLAttributedResourceType::Attributes getResourceAttrs() const;
};
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index 662627901539..788a129ec539 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -15,12 +15,14 @@
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/Type.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/SemaHLSL.h"
+#include "clang/Sema/TemplateDeduction.h"
#include "llvm/ADT/SmallVector.h"
using namespace clang;
@@ -265,7 +267,72 @@ static BuiltinTypeDeclBuilder setupTextureType(CXXRecordDecl *Decl, Sema &S,
.addSampleGradMethods(Dim)
.addSampleLevelMethods(Dim)
.addSampleCmpMethods(Dim)
- .addSampleCmpLevelZeroMethods(Dim);
+ .addSampleCmpLevelZeroMethods(Dim)
+ .addGatherMethods(Dim)
+ .addGatherCmpMethods(Dim);
+}
+
+// Add a partial specialization for a template. The `TextureTemplate` is
+// `Texture<element_type>`, and it will be specialized for vectors:
+// `Texture<vector<element_type, element_count>>`.
+static ClassTemplatePartialSpecializationDecl *
+addVectorTexturePartialSpecialization(Sema &S, NamespaceDecl *HLSLNamespace,
+ ClassTemplateDecl *TextureTemplate) {
+ ASTContext &AST = S.getASTContext();
+
+ // Create the template parameters: element_type and element_count.
+ auto *ElementType = TemplateTypeParmDecl::Create(
+ AST, HLSLNamespace, SourceLocation(), SourceLocation(), 0, 0,
+ &AST.Idents.get("element_type"), false, false);
+ auto *ElementCount = NonTypeTemplateParmDecl::Create(
+ AST, HLSLNamespace, SourceLocation(), SourceLocation(), 0, 1,
+ &AST.Idents.get("element_count"), AST.IntTy, false,
+ AST.getTrivialTypeSourceInfo(AST.IntTy));
+
+ auto *TemplateParams = TemplateParameterList::Create(
+ AST, SourceLocation(), SourceLocation(), {ElementType, ElementCount},
+ SourceLocation(), nullptr);
+
+ // Create the dependent vector type: vector<element_type, element_count>.
+ QualType VectorType = AST.getDependentSizedExtVectorType(
+ AST.getTemplateTypeParmType(0, 0, false, ElementType),
+ DeclRefExpr::Create(
+ AST, NestedNameSpecifierLoc(), SourceLocation(), ElementCount, false,
+ DeclarationNameInfo(ElementCount->getDeclName(), SourceLocation()),
+ AST.IntTy, VK_LValue),
+ SourceLocation());
+
+ // Create the partial specialization declaration.
+ QualType CanonInjectedTST =
+ AST.getCanonicalType(AST.getTemplateSpecializationType(
+ ElaboratedTypeKeyword::Class, TemplateName(TextureTemplate),
+ {TemplateArgument(VectorType)}, {}));
+
+ auto *PartialSpec = ClassTemplatePartialSpecializationDecl::Create(
+ AST, TagDecl::TagKind::Class, HLSLNamespace, SourceLocation(),
+ SourceLocation(), TemplateParams, TextureTemplate,
+ {TemplateArgument(VectorType)},
+ CanQualType::CreateUnsafe(CanonInjectedTST), nullptr);
+
+ // Set the template arguments as written.
+ TemplateArgument Arg(VectorType);
+ TemplateArgumentLoc ArgLoc =
+ S.getTrivialTemplateArgumentLoc(Arg, QualType(), SourceLocation());
+ TemplateArgumentListInfo ArgsInfo =
+ TemplateArgumentListInfo(SourceLocation(), SourceLocation());
+ ArgsInfo.addArgument(ArgLoc);
+ PartialSpec->setTemplateArgsAsWritten(
+ ASTTemplateArgumentListInfo::Create(AST, ArgsInfo));
+
+ PartialSpec->setImplicit(true);
+ PartialSpec->setLexicalDeclContext(HLSLNamespace);
+ PartialSpec->setHasExternalLexicalStorage();
+
+ // Add the partial specialization to the namespace and the class template.
+ HLSLNamespace->addDecl(PartialSpec);
+ TextureTemplate->AddPartialSpecialization(PartialSpec, nullptr);
+
+ return PartialSpec;
}
// This function is responsible for constructing the constraint expression for
@@ -548,11 +615,20 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "Texture2D")
.addSimpleTemplateParams({"element_type"}, TypedBufferConcept)
.finalizeForwardDeclaration();
+
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupTextureType(Decl, *SemaPtr, ResourceClass::SRV, /*IsROV=*/false,
ResourceDimension::Dim2D)
.completeDefinition();
});
+
+ auto *PartialSpec = addVectorTexturePartialSpecialization(
+ *SemaPtr, HLSLNamespace, Decl->getDescribedClassTemplate());
+ onCompletion(PartialSpec, [this](CXXRecordDecl *Decl) {
+ setupTextureType(Decl, *SemaPtr, ResourceClass::SRV, /*IsROV=*/false,
+ ResourceDimension::Dim2D)
+ .completeDefinition();
+ });
}
void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record,
@@ -568,8 +644,27 @@ void HLSLExternalSemaSource::CompleteType(TagDecl *Tag) {
// If this is a specialization, we need to get the underlying templated
// declaration and complete that.
- if (auto TDecl = dyn_cast<ClassTemplateSpecializationDecl>(Record))
- Record = TDecl->getSpecializedTemplate()->getTemplatedDecl();
+ if (auto TDecl = dyn_cast<ClassTemplateSpecializationDecl>(Record)) {
+ if (!isa<ClassTemplatePartialSpecializationDecl>(TDecl)) {
+ ClassTemplateDecl *Template = TDecl->getSpecializedTemplate();
+ llvm::SmallVector<ClassTemplatePartialSpecializationDecl *, 4> Partials;
+ Template->getPartialSpecializations(Partials);
+ ClassTemplatePartialSpecializationDecl *MatchedPartial = nullptr;
+ for (auto *Partial : Partials) {
+ sema::TemplateDeductionInfo Info(TDecl->getLocation());
+ if (SemaPtr->DeduceTemplateArguments(Partial, TDecl->getTemplateArgs(),
+ Info) ==
+ TemplateDeductionResult::Success) {
+ MatchedPartial = Partial;
+ break;
+ }
+ }
+ if (MatchedPartial)
+ Record = MatchedPartial;
+ else
+ Record = Template->getTemplatedDecl();
+ }
+ }
Record = Record->getCanonicalDecl();
auto It = Completions.find(Record);
if (It == Completions.end())
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index be84974c70f2..405832a446e1 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -16360,6 +16360,32 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
maybeAddDeclWithEffects(FD);
+ if (FD && !FD->isInvalidDecl() && FD->hasAttr<SYCLKernelEntryPointAttr>() &&
+ FnBodyScope) {
+ // An implicit call expression is synthesized for functions declared with
+ // the sycl_kernel_entry_point attribute. The call may resolve to a
+ // function template, a member function template, or a call operator
+ // of a variable template depending on the results of unqualified lookup
+ // for 'sycl_kernel_launch' from the beginning of the function body.
+ // Performing that lookup requires the stack of parsing scopes active
+ // when the definition is parsed and is thus done here; the result is
+ // cached in FunctionScopeInfo and used to synthesize the (possibly
+ // unresolved) call expression after the function body has been parsed.
+ const auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>();
+ if (!SKEPAttr->isInvalidAttr()) {
+ ExprResult LaunchIdExpr =
+ SYCL().BuildSYCLKernelLaunchIdExpr(FD, SKEPAttr->getKernelName());
+ // Do not mark 'FD' as invalid if construction of `LaunchIDExpr` produces
+ // an invalid result. Name lookup failure for 'sycl_kernel_launch' is
+ // treated as an error in the definition of 'FD'; treating it as an error
+ // of the declaration would affect overload resolution which would
+ // potentially result in additional errors. If construction of
+ // 'LaunchIDExpr' failed, then 'SYCLKernelLaunchIdExpr' will be assigned
+ // a null pointer value below; that is expected.
+ getCurFunction()->SYCLKernelLaunchIdExpr = LaunchIdExpr.get();
+ }
+ }
+
return D;
}
@@ -16561,12 +16587,37 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, bool IsInstantiation,
SKEPAttr->setInvalidAttr();
}
- if (Body && !FD->isTemplated() && !SKEPAttr->isInvalidAttr()) {
- StmtResult SR =
- SYCL().BuildSYCLKernelCallStmt(FD, cast<CompoundStmt>(Body));
- if (SR.isInvalid())
- return nullptr;
- Body = SR.get();
+ // Build an unresolved SYCL kernel call statement for a function template,
+ // validate that a SYCL kernel call statement was instantiated for an
+ // (implicit or explicit) instantiation of a function template, or otherwise
+ // build a (resolved) SYCL kernel call statement for a non-templated
+ // function or an explicit specialization.
+ if (Body && !SKEPAttr->isInvalidAttr()) {
+ StmtResult SR;
+ if (FD->isTemplateInstantiation()) {
+ // The function body should already be a SYCLKernelCallStmt in this
+ // case, but might not be if there were previous errors.
+ SR = Body;
+ } else if (!getCurFunction()->SYCLKernelLaunchIdExpr) {
+ // If name lookup for a template named sycl_kernel_launch failed
+ // earlier, don't try to build a SYCL kernel call statement as that
+ // would cause additional errors to be issued; just proceed with the
+ // original function body.
+ SR = Body;
+ } else if (FD->isTemplated()) {
+ SR = SYCL().BuildUnresolvedSYCLKernelCallStmt(
+ cast<CompoundStmt>(Body), getCurFunction()->SYCLKernelLaunchIdExpr);
+ } else {
+ SR = SYCL().BuildSYCLKernelCallStmt(
+ FD, cast<CompoundStmt>(Body),
+ getCurFunction()->SYCLKernelLaunchIdExpr);
+ }
+ // If construction of the replacement body fails, just continue with the
+ // original function body. An early error return here is not valid; the
+ // current declaration context and function scopes must be popped before
+ // returning.
+ if (SR.isUsable())
+ Body = SR.get();
}
}
@@ -21037,7 +21088,9 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD,
// SYCL functions can be template, so we check if they have appropriate
// attribute prior to checking if it is a template.
- if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>())
+ if (LangOpts.SYCLIsDevice && (FD->hasAttr<SYCLKernelAttr>() ||
+ FD->hasAttr<SYCLKernelEntryPointAttr>() ||
+ FD->hasAttr<SYCLExternalAttr>()))
return FunctionEmissionStatus::Emitted;
// Templates are emitted when they're instantiated.
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 8df01a8a616c..56079ea8e1bf 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -15,6 +15,7 @@
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/StmtObjC.h"
+#include "clang/AST/StmtSYCL.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceManager.h"
@@ -1250,6 +1251,18 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
return CT;
}
+ case Stmt::SYCLKernelCallStmtClass: {
+ auto *SKCS = cast<SYCLKernelCallStmt>(S);
+ if (getLangOpts().SYCLIsDevice)
+ return canSubStmtsThrow(*this,
+ SKCS->getOutlinedFunctionDecl()->getBody());
+ assert(getLangOpts().SYCLIsHost);
+ return canSubStmtsThrow(*this, SKCS->getKernelLaunchStmt());
+ }
+
+ case Stmt::UnresolvedSYCLKernelCallStmtClass:
+ return CT_Dependent;
+
// ObjC message sends are like function calls, but never have exception
// specs.
case Expr::ObjCMessageExprClass:
@@ -1433,7 +1446,6 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
case Stmt::AttributedStmtClass:
case Stmt::BreakStmtClass:
case Stmt::CapturedStmtClass:
- case Stmt::SYCLKernelCallStmtClass:
case Stmt::CaseStmtClass:
case Stmt::CompoundStmtClass:
case Stmt::ContinueStmtClass:
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 53d215f5c5e3..04b3b36aacf6 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -406,6 +406,9 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
targetDiag(*Locs.begin(), diag::err_thread_unsupported);
}
+ if (LangOpts.SYCLIsDevice && isa<FunctionDecl>(D))
+ SYCL().CheckDeviceUseOfDecl(D, Loc);
+
return false;
}
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index f3e672642816..5701b76427d6 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3326,6 +3326,121 @@ static bool CheckVectorElementCount(Sema *S, QualType PassedType,
enum class SampleKind { Sample, Bias, Grad, Level, Cmp, CmpLevelZero };
+static bool CheckTextureSamplerAndLocation(Sema &S, CallExpr *TheCall) {
+ // Check the texture handle.
+ if (CheckResourceHandle(&S, TheCall, 0,
+ [](const HLSLAttributedResourceType *ResType) {
+ return ResType->getAttrs().ResourceDimension ==
+ llvm::dxil::ResourceDimension::Unknown;
+ }))
+ return true;
+
+ // Check the sampler handle.
+ if (CheckResourceHandle(&S, TheCall, 1,
+ [](const HLSLAttributedResourceType *ResType) {
+ return ResType->getAttrs().ResourceClass !=
+ llvm::hlsl::ResourceClass::Sampler;
+ }))
+ return true;
+
+ auto *ResourceTy =
+ TheCall->getArg(0)->getType()->castAs<HLSLAttributedResourceType>();
+
+ // Check the location.
+ unsigned ExpectedDim =
+ getResourceDimensions(ResourceTy->getAttrs().ResourceDimension);
+ if (CheckVectorElementCount(&S, TheCall->getArg(2)->getType(),
+ S.Context.FloatTy, ExpectedDim,
+ TheCall->getBeginLoc()))
+ return true;
+
+ return false;
+}
+
+static bool CheckGatherBuiltin(Sema &S, CallExpr *TheCall, bool IsCmp) {
+ if (S.checkArgCountRange(TheCall, IsCmp ? 5 : 4, IsCmp ? 6 : 5))
+ return true;
+
+ if (CheckTextureSamplerAndLocation(S, TheCall))
+ return true;
+
+ unsigned NextIdx = 3;
+ if (IsCmp) {
+ // Check the compare value.
+ QualType CmpTy = TheCall->getArg(NextIdx)->getType();
+ if (!CmpTy->isFloatingType() || CmpTy->isVectorType()) {
+ S.Diag(TheCall->getArg(NextIdx)->getBeginLoc(),
+ diag::err_typecheck_convert_incompatible)
+ << CmpTy << S.Context.FloatTy << 1 << 0 << 0;
+ return true;
+ }
+ NextIdx++;
+ }
+
+ // Check the component operand.
+ Expr *ComponentArg = TheCall->getArg(NextIdx);
+ QualType ComponentTy = ComponentArg->getType();
+ if (!ComponentTy->isIntegerType() || ComponentTy->isVectorType()) {
+ S.Diag(ComponentArg->getBeginLoc(),
+ diag::err_typecheck_convert_incompatible)
+ << ComponentTy << S.Context.UnsignedIntTy << 1 << 0 << 0;
+ return true;
+ }
+
+ // GatherCmp operations on Vulkan target must use component 0 (Red).
+ if (IsCmp && S.getASTContext().getTargetInfo().getTriple().isSPIRV()) {
+ std::optional<llvm::APSInt> ComponentOpt =
+ ComponentArg->getIntegerConstantExpr(S.getASTContext());
+ if (ComponentOpt) {
+ int64_t ComponentVal = ComponentOpt->getSExtValue();
+ if (ComponentVal != 0) {
+ // Issue an error if the component is not 0 (Red).
+ // 0 -> Red, 1 -> Green, 2 -> Blue, 3 -> Alpha
+ assert(ComponentVal >= 0 && ComponentVal <= 3 &&
+ "The component is not in the expected range.");
+ S.Diag(ComponentArg->getBeginLoc(),
+ diag::err_hlsl_gathercmp_invalid_component)
+ << ComponentVal;
+ return true;
+ }
+ }
+ }
+
+ NextIdx++;
+
+ // Check the offset operand.
+ const HLSLAttributedResourceType *ResourceTy =
+ TheCall->getArg(0)->getType()->castAs<HLSLAttributedResourceType>();
+ if (TheCall->getNumArgs() > NextIdx) {
+ unsigned ExpectedDim =
+ getResourceDimensions(ResourceTy->getAttrs().ResourceDimension);
+ if (CheckVectorElementCount(&S, TheCall->getArg(NextIdx)->getType(),
+ S.Context.IntTy, ExpectedDim,
+ TheCall->getArg(NextIdx)->getBeginLoc()))
+ return true;
+ NextIdx++;
+ }
+
+ assert(ResourceTy->hasContainedType() &&
+ "Expecting a contained type for resource with a dimension "
+ "attribute.");
+ QualType ReturnType = ResourceTy->getContainedType();
+
+ if (IsCmp) {
+ if (!ReturnType->hasFloatingRepresentation()) {
+ S.Diag(TheCall->getBeginLoc(), diag::err_hlsl_samplecmp_requires_float);
+ return true;
+ }
+ }
+
+ if (const auto *VecTy = ReturnType->getAs<VectorType>())
+ ReturnType = VecTy->getElementType();
+ ReturnType = S.Context.getExtVectorType(ReturnType, 4);
+
+ TheCall->setType(ReturnType);
+
+ return false;
+}
static bool CheckSamplingBuiltin(Sema &S, CallExpr *TheCall, SampleKind Kind) {
unsigned MinArgs, MaxArgs;
if (Kind == SampleKind::Sample) {
@@ -3352,32 +3467,13 @@ static bool CheckSamplingBuiltin(Sema &S, CallExpr *TheCall, SampleKind Kind) {
if (S.checkArgCountRange(TheCall, MinArgs, MaxArgs))
return true;
- // Check the texture handle.
- if (CheckResourceHandle(&S, TheCall, 0,
- [](const HLSLAttributedResourceType *ResType) {
- return ResType->getAttrs().ResourceDimension ==
- llvm::dxil::ResourceDimension::Unknown;
- }))
- return true;
-
- // Check the sampler handle.
- if (CheckResourceHandle(&S, TheCall, 1,
- [](const HLSLAttributedResourceType *ResType) {
- return ResType->getAttrs().ResourceClass !=
- llvm::hlsl::ResourceClass::Sampler;
- }))
+ if (CheckTextureSamplerAndLocation(S, TheCall))
return true;
- auto *ResourceTy =
+ const HLSLAttributedResourceType *ResourceTy =
TheCall->getArg(0)->getType()->castAs<HLSLAttributedResourceType>();
-
- // Check the location.
unsigned ExpectedDim =
getResourceDimensions(ResourceTy->getAttrs().ResourceDimension);
- if (CheckVectorElementCount(&S, TheCall->getArg(2)->getType(),
- S.Context.FloatTy, ExpectedDim,
- TheCall->getBeginLoc()))
- return true;
unsigned NextIdx = 3;
if (Kind == SampleKind::Bias || Kind == SampleKind::Level ||
@@ -3577,6 +3673,10 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return CheckSamplingBuiltin(SemaRef, TheCall, SampleKind::Cmp);
case Builtin::BI__builtin_hlsl_resource_sample_cmp_level_zero:
return CheckSamplingBuiltin(SemaRef, TheCall, SampleKind::CmpLevelZero);
+ case Builtin::BI__builtin_hlsl_resource_gather:
+ return CheckGatherBuiltin(SemaRef, TheCall, /*IsCmp=*/false);
+ case Builtin::BI__builtin_hlsl_resource_gather_cmp:
+ return CheckGatherBuiltin(SemaRef, TheCall, /*IsCmp=*/true);
case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
assert(TheCall->getNumArgs() == 1 && "expected 1 arg");
// Update return type to be the attributed resource type from arg0.
diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index 280f9b1a4b42..ff8ad61aa3af 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -30,15 +30,25 @@ SemaSYCL::SemaSYCL(Sema &S) : SemaBase(S) {}
Sema::SemaDiagnosticBuilder SemaSYCL::DiagIfDeviceCode(SourceLocation Loc,
unsigned DiagID) {
assert(getLangOpts().SYCLIsDevice &&
- "Should only be called during SYCL compilation");
- FunctionDecl *FD = dyn_cast<FunctionDecl>(SemaRef.getCurLexicalContext());
- SemaDiagnosticBuilder::Kind DiagKind = [this, FD] {
- if (!FD)
- return SemaDiagnosticBuilder::K_Nop;
- if (SemaRef.getEmissionStatus(FD) == Sema::FunctionEmissionStatus::Emitted)
- return SemaDiagnosticBuilder::K_ImmediateWithCallStack;
- return SemaDiagnosticBuilder::K_Deferred;
- }();
+ "Device diagnostics Should only be issued during device compilation");
+ SemaDiagnosticBuilder::Kind DiagKind = SemaDiagnosticBuilder::K_Nop;
+ FunctionDecl *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
+ if (FD) {
+ Sema::FunctionEmissionStatus FES = SemaRef.getEmissionStatus(FD);
+ switch (FES) {
+ case Sema::FunctionEmissionStatus::Emitted:
+ DiagKind = SemaDiagnosticBuilder::K_ImmediateWithCallStack;
+ break;
+ case Sema::FunctionEmissionStatus::Unknown:
+ case Sema::FunctionEmissionStatus::TemplateDiscarded:
+ DiagKind = SemaDiagnosticBuilder::K_Deferred;
+ break;
+ case Sema::FunctionEmissionStatus::OMPDiscarded:
+ llvm_unreachable("OMPDiscarded unexpected in SYCL device compilation");
+ case Sema::FunctionEmissionStatus::CUDADiscarded:
+ llvm_unreachable("CUDADiscarded unexpected in SYCL device compilation");
+ }
+ }
return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, FD, SemaRef);
}
@@ -211,6 +221,23 @@ void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) {
SYCLKernelEntryPointAttr(SemaRef.Context, AL, TSI));
}
+void SemaSYCL::CheckDeviceUseOfDecl(NamedDecl *ND, SourceLocation Loc) {
+ assert(getLangOpts().SYCLIsDevice &&
+ "Should only be called during SYCL device compilation");
+
+ // Function declarations with the sycl_kernel_entry_point attribute cannot
+ // be ODR-used in a potentially evaluated context.
+ if (FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
+ if (const auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>()) {
+ if (SemaRef.currentEvaluationContext().isPotentiallyEvaluated()) {
+ DiagIfDeviceCode(Loc, diag::err_sycl_entry_point_device_use)
+ << FD << SKEPAttr;
+ DiagIfDeviceCode(SKEPAttr->getLocation(), diag::note_attribute) << FD;
+ }
+ }
+ }
+}
+
// Given a potentially qualified type, SourceLocationForUserDeclaredType()
// returns the source location of the canonical declaration of the unqualified
// desugared user declared type, if any. For non-user declared types, an
@@ -315,10 +342,20 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) {
}
}
+ if (isa<CXXConstructorDecl>(FD)) {
+ Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+ << SKEPAttr << diag::InvalidSKEPReason::Constructor;
+ SKEPAttr->setInvalidAttr();
+ }
+ if (isa<CXXDestructorDecl>(FD)) {
+ Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+ << SKEPAttr << diag::InvalidSKEPReason::Destructor;
+ SKEPAttr->setInvalidAttr();
+ }
if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
- if (!MD->isStatic()) {
+ if (MD->isExplicitObjectMemberFunction()) {
Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
- << SKEPAttr << diag::InvalidSKEPReason::NonStaticMemberFn;
+ << SKEPAttr << diag::InvalidSKEPReason::ExplicitObjectFn;
SKEPAttr->setInvalidAttr();
}
}
@@ -387,8 +424,165 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) {
}
}
+ExprResult SemaSYCL::BuildSYCLKernelLaunchIdExpr(FunctionDecl *FD,
+ QualType KNT) {
+ // The current context must be the function definition context to ensure
+ // that name lookup is performed within the correct scope.
+ assert(SemaRef.CurContext == FD && "The current declaration context does not "
+ "match the requested function context");
+
+ // An appropriate source location is required to emit diagnostics if
+ // lookup fails to produce an overload set. The desired location is the
+ // start of the function body, but that is not yet available since the
+ // body of the function has not yet been set when this function is called.
+ // The general location of the function is used instead.
+ SourceLocation Loc = FD->getLocation();
+
+ ASTContext &Ctx = SemaRef.getASTContext();
+ IdentifierInfo &SYCLKernelLaunchID =
+ Ctx.Idents.get("sycl_kernel_launch", tok::TokenKind::identifier);
+
+ // Establish a code synthesis context for the implicit name lookup of
+ // a template named 'sycl_kernel_launch'. In the event of an error, this
+ // ensures an appropriate diagnostic note is issued to explain why the
+ // lookup was performed.
+ Sema::CodeSynthesisContext CSC;
+ CSC.Kind = Sema::CodeSynthesisContext::SYCLKernelLaunchLookup;
+ CSC.Entity = FD;
+ Sema::ScopedCodeSynthesisContext ScopedCSC(SemaRef, CSC);
+
+ // Perform ordinary name lookup for a function or variable template that
+ // accepts a single type template argument.
+ LookupResult Result(SemaRef, &SYCLKernelLaunchID, Loc,
+ Sema::LookupOrdinaryName);
+ CXXScopeSpec EmptySS;
+ if (SemaRef.LookupTemplateName(Result, SemaRef.getCurScope(), EmptySS,
+ /*ObjectType*/ QualType(),
+ /*EnteringContext*/ false,
+ Sema::TemplateNameIsRequired))
+ return ExprError();
+ if (Result.isAmbiguous())
+ return ExprError();
+
+ TemplateArgumentListInfo TALI{Loc, Loc};
+ TemplateArgument KNTA = TemplateArgument(KNT);
+ TemplateArgumentLoc TAL =
+ SemaRef.getTrivialTemplateArgumentLoc(KNTA, QualType(), Loc);
+ TALI.addArgument(TAL);
+
+ ExprResult IdExpr;
+ if (SemaRef.isPotentialImplicitMemberAccess(EmptySS, Result,
+ /*IsAddressOfOperand*/ false)) {
+ // The lookup result allows for a possible implicit member access that
+ // would require an implicit or explicit 'this' argument.
+ IdExpr = SemaRef.BuildPossibleImplicitMemberExpr(
+ EmptySS, SourceLocation(), Result, &TALI, SemaRef.getCurScope());
+ } else {
+ IdExpr = SemaRef.BuildTemplateIdExpr(EmptySS, SourceLocation(), Result,
+ /*RequiresADL*/ true, &TALI);
+ }
+
+ // The resulting expression may be invalid if, for example, 'FD' is a
+ // non-static member function and sycl_kernel_launch lookup selects a
+ // member function (which would require a 'this' argument which is
+ // not available).
+ if (IdExpr.isInvalid())
+ return ExprError();
+
+ return IdExpr;
+}
+
namespace {
+// Constructs the arguments to be passed for the SYCL kernel launch call.
+// The first argument is a string literal that contains the SYCL kernel
+// name. The remaining arguments are the parameters of 'FD' passed as
+// move-elligible xvalues. Returns true on error and false otherwise.
+bool BuildSYCLKernelLaunchCallArgs(Sema &SemaRef, FunctionDecl *FD,
+ const SYCLKernelInfo *SKI,
+ SmallVectorImpl<Expr *> &Args,
+ SourceLocation Loc) {
+ // The current context must be the function definition context to ensure
+ // that parameter references occur within the correct scope.
+ assert(SemaRef.CurContext == FD && "The current declaration context does not "
+ "match the requested function context");
+
+ // Prepare a string literal that contains the kernel name.
+ ASTContext &Ctx = SemaRef.getASTContext();
+ const std::string &KernelName = SKI->GetKernelName();
+ QualType KernelNameCharTy = Ctx.CharTy.withConst();
+ llvm::APInt KernelNameSize(Ctx.getTypeSize(Ctx.getSizeType()),
+ KernelName.size() + 1);
+ QualType KernelNameArrayTy = Ctx.getConstantArrayType(
+ KernelNameCharTy, KernelNameSize, nullptr, ArraySizeModifier::Normal, 0);
+ Expr *KernelNameExpr =
+ StringLiteral::Create(Ctx, KernelName, StringLiteralKind::Ordinary,
+ /*Pascal*/ false, KernelNameArrayTy, Loc);
+ Args.push_back(KernelNameExpr);
+
+ // Forward all parameters of 'FD' to the SYCL kernel launch function as if
+ // by std::move().
+ for (ParmVarDecl *PVD : FD->parameters()) {
+ QualType ParamType = PVD->getOriginalType().getNonReferenceType();
+ ExprResult E = SemaRef.BuildDeclRefExpr(PVD, ParamType, VK_LValue, Loc);
+ if (E.isInvalid())
+ return true;
+ if (!PVD->getType()->isLValueReferenceType())
+ E = ImplicitCastExpr::Create(SemaRef.Context, E.get()->getType(), CK_NoOp,
+ E.get(), nullptr, VK_XValue,
+ FPOptionsOverride());
+ if (E.isInvalid())
+ return true;
+ Args.push_back(E.get());
+ }
+
+ return false;
+}
+
+// Constructs the SYCL kernel launch call.
+StmtResult BuildSYCLKernelLaunchCallStmt(Sema &SemaRef, FunctionDecl *FD,
+ const SYCLKernelInfo *SKI,
+ Expr *IdExpr, SourceLocation Loc) {
+ SmallVector<Stmt *> Stmts;
+ // IdExpr may be null if name lookup failed.
+ if (IdExpr) {
+ llvm::SmallVector<Expr *, 12> Args;
+
+ // Establish a code synthesis context for construction of the arguments
+ // for the implicit call to 'sycl_kernel_launch'.
+ {
+ Sema::CodeSynthesisContext CSC;
+ CSC.Kind = Sema::CodeSynthesisContext::SYCLKernelLaunchLookup;
+ CSC.Entity = FD;
+ Sema::ScopedCodeSynthesisContext ScopedCSC(SemaRef, CSC);
+
+ if (BuildSYCLKernelLaunchCallArgs(SemaRef, FD, SKI, Args, Loc))
+ return StmtError();
+ }
+
+ // Establish a code synthesis context for the implicit call to
+ // 'sycl_kernel_launch'.
+ {
+ Sema::CodeSynthesisContext CSC;
+ CSC.Kind = Sema::CodeSynthesisContext::SYCLKernelLaunchOverloadResolution;
+ CSC.Entity = FD;
+ CSC.CallArgs = Args.data();
+ CSC.NumCallArgs = Args.size();
+ Sema::ScopedCodeSynthesisContext ScopedCSC(SemaRef, CSC);
+
+ ExprResult LaunchResult =
+ SemaRef.BuildCallExpr(SemaRef.getCurScope(), IdExpr, Loc, Args, Loc);
+ if (LaunchResult.isInvalid())
+ return StmtError();
+
+ Stmts.push_back(SemaRef.MaybeCreateExprWithCleanups(LaunchResult).get());
+ }
+ }
+
+ return CompoundStmt::Create(SemaRef.getASTContext(), Stmts,
+ FPOptionsOverride(), Loc, Loc);
+}
+
// The body of a function declared with the [[sycl_kernel_entry_point]]
// attribute is cloned and transformed to substitute references to the original
// function parameters with references to replacement variables that stand in
@@ -399,9 +593,10 @@ class OutlinedFunctionDeclBodyInstantiator
public:
using ParmDeclMap = llvm::DenseMap<ParmVarDecl *, VarDecl *>;
- OutlinedFunctionDeclBodyInstantiator(Sema &S, ParmDeclMap &M)
+ OutlinedFunctionDeclBodyInstantiator(Sema &S, ParmDeclMap &M,
+ FunctionDecl *FD)
: TreeTransform<OutlinedFunctionDeclBodyInstantiator>(S), SemaRef(S),
- MapRef(M) {}
+ MapRef(M), FD(FD) {}
// A new set of AST nodes is always required.
bool AlwaysRebuild() { return true; }
@@ -427,18 +622,62 @@ public:
return DRE;
}
+ // Diagnose CXXThisExpr in a potentially evaluated expression.
+ ExprResult TransformCXXThisExpr(CXXThisExpr *CTE) {
+ if (SemaRef.currentEvaluationContext().isPotentiallyEvaluated()) {
+ SemaRef.Diag(CTE->getExprLoc(), diag::err_sycl_entry_point_invalid_this)
+ << (CTE->isImplicitCXXThis() ? /* implicit */ 1 : /* empty */ 0)
+ << FD->getAttr<SYCLKernelEntryPointAttr>();
+ }
+ return CTE;
+ }
+
private:
Sema &SemaRef;
ParmDeclMap &MapRef;
+ FunctionDecl *FD;
};
+OutlinedFunctionDecl *BuildSYCLKernelEntryPointOutline(Sema &SemaRef,
+ FunctionDecl *FD,
+ CompoundStmt *Body) {
+ using ParmDeclMap = OutlinedFunctionDeclBodyInstantiator::ParmDeclMap;
+ ParmDeclMap ParmMap;
+
+ OutlinedFunctionDecl *OFD = OutlinedFunctionDecl::Create(
+ SemaRef.getASTContext(), FD, FD->getNumParams());
+ unsigned i = 0;
+ for (ParmVarDecl *PVD : FD->parameters()) {
+ ImplicitParamDecl *IPD = ImplicitParamDecl::Create(
+ SemaRef.getASTContext(), OFD, SourceLocation(), PVD->getIdentifier(),
+ PVD->getType(), ImplicitParamKind::Other);
+ OFD->setParam(i, IPD);
+ ParmMap[PVD] = IPD;
+ ++i;
+ }
+
+ OutlinedFunctionDeclBodyInstantiator OFDBodyInstantiator(SemaRef, ParmMap,
+ FD);
+ Stmt *OFDBody = OFDBodyInstantiator.TransformStmt(Body).get();
+ OFD->setBody(OFDBody);
+ OFD->setNothrow();
+
+ return OFD;
+}
+
} // unnamed namespace
StmtResult SemaSYCL::BuildSYCLKernelCallStmt(FunctionDecl *FD,
- CompoundStmt *Body) {
+ CompoundStmt *Body,
+ Expr *LaunchIdExpr) {
assert(!FD->isInvalidDecl());
assert(!FD->isTemplated());
assert(FD->hasPrototype());
+ // The current context must be the function definition context to ensure
+ // that name lookup and parameter and local variable creation are performed
+ // within the correct scope.
+ assert(SemaRef.CurContext == FD && "The current declaration context does not "
+ "match the requested function context");
const auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>();
assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute");
@@ -451,29 +690,28 @@ StmtResult SemaSYCL::BuildSYCLKernelCallStmt(FunctionDecl *FD,
getASTContext().getSYCLKernelInfo(SKEPAttr->getKernelName());
assert(declaresSameEntity(SKI.getKernelEntryPointDecl(), FD) &&
"SYCL kernel name conflict");
- (void)SKI;
- using ParmDeclMap = OutlinedFunctionDeclBodyInstantiator::ParmDeclMap;
- ParmDeclMap ParmMap;
-
- assert(SemaRef.CurContext == FD);
+ // Build the outline of the synthesized device entry point function.
OutlinedFunctionDecl *OFD =
- OutlinedFunctionDecl::Create(getASTContext(), FD, FD->getNumParams());
- unsigned i = 0;
- for (ParmVarDecl *PVD : FD->parameters()) {
- ImplicitParamDecl *IPD = ImplicitParamDecl::Create(
- getASTContext(), OFD, SourceLocation(), PVD->getIdentifier(),
- PVD->getType(), ImplicitParamKind::Other);
- OFD->setParam(i, IPD);
- ParmMap[PVD] = IPD;
- ++i;
- }
+ BuildSYCLKernelEntryPointOutline(SemaRef, FD, Body);
+ assert(OFD);
- OutlinedFunctionDeclBodyInstantiator OFDBodyInstantiator(SemaRef, ParmMap);
- Stmt *OFDBody = OFDBodyInstantiator.TransformStmt(Body).get();
- OFD->setBody(OFDBody);
- OFD->setNothrow();
- Stmt *NewBody = new (getASTContext()) SYCLKernelCallStmt(Body, OFD);
+ // Build the host kernel launch statement. An appropriate source location
+ // is required to emit diagnostics.
+ SourceLocation Loc = Body->getLBracLoc();
+ StmtResult LaunchResult =
+ BuildSYCLKernelLaunchCallStmt(SemaRef, FD, &SKI, LaunchIdExpr, Loc);
+ if (LaunchResult.isInvalid())
+ return StmtError();
+
+ Stmt *NewBody =
+ new (getASTContext()) SYCLKernelCallStmt(Body, LaunchResult.get(), OFD);
return NewBody;
}
+
+StmtResult SemaSYCL::BuildUnresolvedSYCLKernelCallStmt(CompoundStmt *Body,
+ Expr *LaunchIdExpr) {
+ return UnresolvedSYCLKernelCallStmt::Create(SemaRef.getASTContext(), Body,
+ LaunchIdExpr);
+}
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index b4d8158525f0..a60d11d8eb36 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -10,7 +10,6 @@
//===----------------------------------------------------------------------===/
#include "TreeTransform.h"
-#include "clang/AST/ASTConcept.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTLambda.h"
@@ -593,6 +592,8 @@ bool Sema::CodeSynthesisContext::isInstantiationRecord() const {
case BuildingDeductionGuides:
case TypeAliasTemplateInstantiation:
case PartialOrderingTTP:
+ case SYCLKernelLaunchLookup:
+ case SYCLKernelLaunchOverloadResolution:
return false;
// This function should never be called when Kind's value is Memoization.
@@ -898,6 +899,26 @@ static std::string convertCallArgsToString(Sema &S,
return Result;
}
+static std::string
+convertCallArgsValueCategoryAndTypeToString(Sema &S,
+ llvm::ArrayRef<const Expr *> Args) {
+ std::string Result;
+ llvm::raw_string_ostream OS(Result);
+ llvm::ListSeparator Comma;
+ OS << "(";
+ for (const Expr *Arg : Args) {
+ ExprValueKind EVK = Arg->getValueKind();
+ const char *ValueCategory =
+ (EVK == VK_LValue ? "lvalue"
+ : (EVK == VK_XValue ? "xvalue" : "prvalue"));
+ OS << Comma << ValueCategory << " of type '";
+ Arg->getType().print(OS, S.getPrintingPolicy());
+ OS << "'";
+ }
+ OS << ")";
+ return Result;
+}
+
void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) {
// Determine which template instantiations to skip, if any.
unsigned SkipStart = CodeSynthesisContexts.size(), SkipEnd = SkipStart;
@@ -1260,6 +1281,33 @@ void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) {
<< /*isTemplateTemplateParam=*/true
<< Active->InstantiationRange);
break;
+ case CodeSynthesisContext::SYCLKernelLaunchLookup: {
+ const auto *SKEPAttr =
+ Active->Entity->getAttr<SYCLKernelEntryPointAttr>();
+ assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute");
+ assert(!SKEPAttr->isInvalidAttr() &&
+ "sycl_kernel_entry_point attribute is invalid");
+ DiagFunc(SKEPAttr->getLocation(), PDiag(diag::note_sycl_runtime_defect));
+ DiagFunc(SKEPAttr->getLocation(),
+ PDiag(diag::note_sycl_kernel_launch_lookup_here)
+ << SKEPAttr->getKernelName());
+ break;
+ }
+ case CodeSynthesisContext::SYCLKernelLaunchOverloadResolution: {
+ const auto *SKEPAttr =
+ Active->Entity->getAttr<SYCLKernelEntryPointAttr>();
+ assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute");
+ assert(!SKEPAttr->isInvalidAttr() &&
+ "sycl_kernel_entry_point attribute is invalid");
+ DiagFunc(SKEPAttr->getLocation(), PDiag(diag::note_sycl_runtime_defect));
+ DiagFunc(SKEPAttr->getLocation(),
+ PDiag(diag::note_sycl_kernel_launch_overload_resolution_here)
+ << SKEPAttr->getKernelName()
+ << convertCallArgsValueCategoryAndTypeToString(
+ *this, llvm::ArrayRef(Active->CallArgs,
+ Active->NumCallArgs)));
+ break;
+ }
}
}
}
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index a416c73c458b..b8442f8fdd9e 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -13077,6 +13077,31 @@ ExprResult TreeTransform<Derived>::TransformSYCLUniqueStableNameExpr(
}
template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformUnresolvedSYCLKernelCallStmt(
+ UnresolvedSYCLKernelCallStmt *S) {
+ auto *FD = cast<FunctionDecl>(SemaRef.CurContext);
+ const auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>();
+ if (!SKEPAttr || SKEPAttr->isInvalidAttr())
+ return StmtError();
+
+ ExprResult IdExpr = getDerived().TransformExpr(S->getKernelLaunchIdExpr());
+ if (IdExpr.isInvalid())
+ return StmtError();
+
+ StmtResult Body = getDerived().TransformStmt(S->getOriginalStmt());
+ if (Body.isInvalid())
+ return StmtError();
+
+ StmtResult SR = SemaRef.SYCL().BuildSYCLKernelCallStmt(
+ cast<FunctionDecl>(SemaRef.CurContext), cast<CompoundStmt>(Body.get()),
+ IdExpr.get());
+ if (SR.isInvalid())
+ return StmtError();
+
+ return SR;
+}
+
+template <typename Derived>
ExprResult TreeTransform<Derived>::TransformCXXReflectExpr(CXXReflectExpr *E) {
// TODO(reflection): Implement its transform
assert(false && "not implemented yet");
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index a18fccb6518d..f351e185e5b5 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -543,6 +543,7 @@ void ASTStmtReader::VisitCXXReflectExpr(CXXReflectExpr *E) {
void ASTStmtReader::VisitSYCLKernelCallStmt(SYCLKernelCallStmt *S) {
VisitStmt(S);
S->setOriginalStmt(cast<CompoundStmt>(Record.readSubStmt()));
+ S->setKernelLaunchStmt(cast<Stmt>(Record.readSubStmt()));
S->setOutlinedFunctionDecl(readDeclAs<OutlinedFunctionDecl>());
}
@@ -608,6 +609,14 @@ void ASTStmtReader::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) {
E->setTypeSourceInfo(Record.readTypeSourceInfo());
}
+void ASTStmtReader::VisitUnresolvedSYCLKernelCallStmt(
+ UnresolvedSYCLKernelCallStmt *S) {
+ VisitStmt(S);
+
+ S->setOriginalStmt(cast<CompoundStmt>(Record.readSubStmt()));
+ S->setKernelLaunchIdExpr(Record.readExpr());
+}
+
void ASTStmtReader::VisitPredefinedExpr(PredefinedExpr *E) {
VisitExpr(E);
bool HasFunctionName = Record.readInt();
@@ -3212,6 +3221,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
S = SYCLUniqueStableNameExpr::CreateEmpty(Context);
break;
+ case STMT_UNRESOLVED_SYCL_KERNEL_CALL:
+ S = UnresolvedSYCLKernelCallStmt::CreateEmpty(Context);
+ break;
+
case EXPR_OPENACC_ASTERISK_SIZE:
S = OpenACCAsteriskSizeExpr::CreateEmpty(Context);
break;
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 4fcac4d0261a..d9b95e53f2da 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -637,6 +637,7 @@ void ASTStmtWriter::VisitCapturedStmt(CapturedStmt *S) {
void ASTStmtWriter::VisitSYCLKernelCallStmt(SYCLKernelCallStmt *S) {
VisitStmt(S);
Record.AddStmt(S->getOriginalStmt());
+ Record.AddStmt(S->getKernelLaunchStmt());
Record.AddDeclRef(S->getOutlinedFunctionDecl());
Code = serialization::STMT_SYCLKERNELCALL;
@@ -695,6 +696,16 @@ void ASTStmtWriter::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) {
Code = serialization::EXPR_SYCL_UNIQUE_STABLE_NAME;
}
+void ASTStmtWriter::VisitUnresolvedSYCLKernelCallStmt(
+ UnresolvedSYCLKernelCallStmt *S) {
+ VisitStmt(S);
+
+ Record.AddStmt(S->getOriginalStmt());
+ Record.AddStmt(S->getKernelLaunchIdExpr());
+
+ Code = serialization::STMT_UNRESOLVED_SYCL_KERNEL_CALL;
+}
+
void ASTStmtWriter::VisitPredefinedExpr(PredefinedExpr *E) {
VisitExpr(E);
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 644d57cc6b0d..bc8e9040444c 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1825,6 +1825,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
case Stmt::CapturedStmtClass:
case Stmt::SYCLKernelCallStmtClass:
+ case Stmt::UnresolvedSYCLKernelCallStmtClass:
case Stmt::OpenACCComputeConstructClass:
case Stmt::OpenACCLoopConstructClass:
case Stmt::OpenACCCombinedConstructClass:
diff --git a/clang/test/AST/HLSL/Texture2D-AST.hlsl b/clang/test/AST/HLSL/Texture2D-scalar-AST.hlsl
index abdf0a8b35ab..8725bcc05882 100644
--- a/clang/test/AST/HLSL/Texture2D-AST.hlsl
+++ b/clang/test/AST/HLSL/Texture2D-scalar-AST.hlsl
@@ -415,7 +415,289 @@
// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
// CHECK-NEXT: AlwaysInlineAttr
-Texture2D<float4> t;
+// CHECK: CXXMethodDecl {{.*}} Gather 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} Gather 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherRed 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherRed 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherGreen 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherGreen 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherBlue 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherBlue 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherAlpha 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherAlpha 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmp 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmp 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmpRed 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmpGreen 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmpBlue 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmpAlpha 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+Texture2D<float> t;
SamplerState s;
SamplerComparisonState scs;
@@ -436,4 +718,5 @@ void main(float2 loc, float cmp) {
t.SampleCmp(scs, loc, cmp, int2(1, 2), 1.0f);
t.SampleCmpLevelZero(scs, loc, cmp);
t.SampleCmpLevelZero(scs, loc, cmp, int2(1, 2));
+ t.Gather(s, loc);
}
diff --git a/clang/test/AST/HLSL/Texture2D-vector-AST.hlsl b/clang/test/AST/HLSL/Texture2D-vector-AST.hlsl
new file mode 100644
index 000000000000..4e1c41f05232
--- /dev/null
+++ b/clang/test/AST/HLSL/Texture2D-vector-AST.hlsl
@@ -0,0 +1,726 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -disable-llvm-passes -finclude-default-header -o - %s | FileCheck %s
+
+// CHECK: CXXRecordDecl {{.*}} SamplerState definition
+// CHECK: FinalAttr {{.*}} Implicit final
+// CHECK-NEXT: FieldDecl {{.*}} implicit {{.*}} __handle '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+
+// CHECK: CXXRecordDecl {{.*}} SamplerComparisonState definition
+// CHECK: FinalAttr {{.*}} Implicit final
+// CHECK-NEXT: FieldDecl {{.*}} implicit {{.*}} __handle '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+
+// CHECK: ClassTemplateDecl {{.*}} Texture2D
+// CHECK: TemplateTypeParmDecl {{.*}} element_type
+// CHECK: CXXRecordDecl {{.*}} Texture2D
+// CHECK: FinalAttr {{.*}} Implicit final
+// CHECK: ClassTemplatePartialSpecializationDecl {{.*}} Texture2D definition explicit_specialization
+// CHECK: TemplateArgument type 'vector<element_type, element_count>':'vector<type-parameter-0-0, element_count>'
+// CHECK: TemplateTypeParmDecl {{.*}} element_type
+// CHECK: NonTypeTemplateParmDecl {{.*}} element_count
+// CHECK-NEXT: FieldDecl {{.*}} implicit __handle '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+
+// CHECK: CXXMethodDecl {{.*}} Sample 'vector<element_type (hlsl::SamplerState, vector<float, 2>), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} Sample 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} Sample 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>, float), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Clamp 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Clamp' 'float'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleBias 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Bias 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_bias' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Bias' 'float'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleBias 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float, vector<int, 2>), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Bias 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_bias' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Bias' 'float'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleBias 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float, vector<int, 2>, float), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Bias 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Clamp 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_bias' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Bias' 'float'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Clamp' 'float'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleGrad 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<float, 2>, vector<float, 2>), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} DDX 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} DDY 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_grad' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDX' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDY' 'vector<float, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleGrad 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<float, 2>, vector<float, 2>, vector<int, 2>), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} DDX 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} DDY 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_grad' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDX' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDY' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleGrad 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<float, 2>, vector<float, 2>, vector<int, 2>, float), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} DDX 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} DDY 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Clamp 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_grad' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDX' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'DDY' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Clamp' 'float'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleLevel 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} LOD 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_level' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'LOD' 'float'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleLevel 'vector<element_type (hlsl::SamplerState, vector<float, 2>, float, vector<int, 2>), element_count>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} LOD 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, element_count>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_level' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'LOD' 'float'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleCmp 'float (hlsl::SamplerComparisonState, vector<float, 2>, float)'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleCmp 'float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>)'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleCmp 'float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>, float)'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Clamp 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'Clamp' 'float'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleCmpLevelZero 'float (hlsl::SamplerComparisonState, vector<float, 2>, float)'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp_level_zero' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} SampleCmpLevelZero 'float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>)'
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'float' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_sample_cmp_level_zero' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<element_type, element_count>)]]
+// CHECK-SAME{LITERAL}: [[hlsl::resource_dimension(2D)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<vector<element_type, element_count>>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME: ' lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} Gather 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} Gather 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherRed 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherRed 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherGreen 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherGreen 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherBlue 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherBlue 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherAlpha 'vector<element_type (hlsl::SamplerState, vector<float, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherAlpha 'vector<element_type (hlsl::SamplerState, vector<float, 2>, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<element_type, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmp 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmp 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmpRed 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 0
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmpGreen 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmpBlue 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 2
+// CHECK-NEXT: AlwaysInlineAttr
+
+// CHECK: CXXMethodDecl {{.*}} GatherCmpAlpha 'vector<float (hlsl::SamplerComparisonState, vector<float, 2>, float, vector<int, 2>), 4>' inline
+// CHECK-NEXT: ParmVarDecl {{.*}} Sampler 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: ParmVarDecl {{.*}} Location 'vector<float, 2>'
+// CHECK-NEXT: ParmVarDecl {{.*}} CompareValue 'float'
+// CHECK-NEXT: ParmVarDecl {{.*}} Offset 'vector<int, 2>'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: ReturnStmt
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'vector<float, 4>' <Dependent>
+// CHECK-NEXT: CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: DeclRefExpr {{.*}} '<builtin fn type>' Function {{.*}} '__builtin_hlsl_resource_gather_cmp' 'void (...) noexcept'
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::Texture2D<{{.*}}>' lvalue implicit this
+// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl::SamplerComparisonState' lvalue ParmVar {{.*}} 'Sampler' 'hlsl::SamplerComparisonState'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<float, 2>' lvalue ParmVar {{.*}} 'Location' 'vector<float, 2>'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'CompareValue' 'float'
+// CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 3
+// CHECK-NEXT: DeclRefExpr {{.*}} 'vector<int, 2>' lvalue ParmVar {{.*}} 'Offset' 'vector<int, 2>'
+// CHECK-NEXT: AlwaysInlineAttr
+
+Texture2D<float4> t;
+SamplerState s;
+SamplerComparisonState scs;
+
+void main(float2 loc, float cmp) {
+ t.Sample(s, loc);
+ t.Sample(s, loc, int2(1, 2));
+ t.Sample(s, loc, int2(1, 2), 1.0);
+ t.SampleBias(s, loc, 0.0);
+ t.SampleBias(s, loc, 0.0, int2(1, 2));
+ t.SampleBias(s, loc, 0.0, int2(1, 2), 1.0);
+ t.SampleGrad(s, loc, float2(0,0), float2(0,0));
+ t.SampleGrad(s, loc, float2(0,0), float2(0,0), int2(1, 2));
+ t.SampleGrad(s, loc, float2(0,0), float2(0,0), int2(1, 2), 1.0);
+ t.SampleLevel(s, loc, 0.0);
+ t.SampleLevel(s, loc, 0.0, int2(1, 2));
+ t.SampleCmp(scs, loc, cmp);
+ t.SampleCmp(scs, loc, cmp, int2(1, 2));
+ t.SampleCmp(scs, loc, cmp, int2(1, 2), 1.0f);
+ t.SampleCmpLevelZero(scs, loc, cmp);
+ t.SampleCmpLevelZero(scs, loc, cmp, int2(1, 2));
+ t.Gather(s, loc);
+}
diff --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp
index e3ff3dea1951..c5518d903844 100644
--- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp
+++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp
@@ -34,6 +34,8 @@ template<int> struct K {
void operator()(Ts...) const {}
};
+template<typename KernelName, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
[[clang::sycl_kernel_entry_point(KN<1>)]]
void skep1() {
@@ -41,6 +43,12 @@ void skep1() {
// CHECK: |-FunctionDecl {{.*}} skep1 'void ()'
// CHECK-NEXT: | |-SYCLKernelCallStmt {{.*}}
// CHECK-NEXT: | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *)' <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *)' lvalue Function {{.*}} 'sycl_kernel_launch' {{.*}}
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay>
+// CHECK-NEXT: | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi1EE"
// CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}}
// CHECK-NEXT: | | `-CompoundStmt {{.*}}
// CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<1>
@@ -57,9 +65,10 @@ void skep2<KN<2>>(K<2>);
// CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} KT
// CHECK-NEXT: | |-FunctionDecl {{.*}} skep2 'void (KT)'
// CHECK-NEXT: | | |-ParmVarDecl {{.*}} k 'KT'
-// CHECK-NEXT: | | |-CompoundStmt {{.*}}
-// CHECK-NEXT: | | | `-CallExpr {{.*}} '<dependent type>'
-// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT'
+// CHECK-NEXT: | | |-UnresolvedSYCLKernelCallStmt {{.*}}
+// CHECK-NEXT: | | | `-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT'
// CHECK-NEXT: | | `-SYCLKernelEntryPointAttr {{.*}} KNT
// CHECK-NEXT: | `-FunctionDecl {{.*}} skep2 'void (K<2>)' explicit_instantiation_definition instantiated_from 0x{{.+}}
@@ -77,6 +86,15 @@ void skep2<KN<2>>(K<2>);
// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const'
// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const K<2>' lvalue <NoOp>
// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<2>' lvalue ParmVar {{.*}} 'k' 'K<2>'
+// CHECK-NEXT: | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, K<2>)' lvalue Function {{.*}} 'sycl_kernel_launch' {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay>
+// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi2EE"
+// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'K<2>' 'void (K<2> &&) noexcept'
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'K<2>' xvalue <NoOp>
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<2>' lvalue ParmVar {{.*}} 'k' 'K<2>'
// CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}}
// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<2>'
// CHECK-NEXT: | | `-CompoundStmt {{.*}}
@@ -102,9 +120,10 @@ void skep3<KN<3>>(K<3> k) {
// CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} KT
// CHECK-NEXT: | |-FunctionDecl {{.*}} skep3 'void (KT)'
// CHECK-NEXT: | | |-ParmVarDecl {{.*}} k 'KT'
-// CHECK-NEXT: | | |-CompoundStmt {{.*}}
-// CHECK-NEXT: | | | `-CallExpr {{.*}} '<dependent type>'
-// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT'
+// CHECK-NEXT: | | |-UnresolvedSYCLKernelCallStmt {{.*}}
+// CHECK-NEXT: | | | `-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT'
// CHECK-NEXT: | | `-SYCLKernelEntryPointAttr {{.*}} KNT
// CHECK-NEXT: | `-Function {{.*}} 'skep3' 'void (K<3>)'
@@ -123,6 +142,15 @@ void skep3<KN<3>>(K<3> k) {
// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const'
// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const K<3>' lvalue <NoOp>
// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<3>' lvalue ParmVar {{.*}} 'k' 'K<3>'
+// CHECK-NEXT: | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, K<3>)' <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, K<3>)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, K<3>)' {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay>
+// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi3EE"
+// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'K<3>' 'void (K<3> &&) noexcept'
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'K<3>' xvalue <NoOp>
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<3>' lvalue ParmVar {{.*}} 'k' 'K<3>'
// CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}}
// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<3>'
// CHECK-NEXT: | | `-CompoundStmt {{.*}}
@@ -152,6 +180,21 @@ void skep4(K<4> k, int p1, int p2) {
// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p1' 'int'
// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p2' 'int'
+// CHECK-NEXT: | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, K<4>, int, int)' <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, K<4>, int, int)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, K<4>, int, int)' {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay>
+// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi4EE"
+// CHECK-NEXT: | | | |-CXXConstructExpr {{.*}} 'K<4>' 'void (K<4> &&) noexcept'
+// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'K<4>' xvalue <NoOp>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'K<4>' lvalue ParmVar {{.*}} 'k' 'K<4>'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p1' 'int'
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp>
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p2' 'int'
// CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}}
// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<4>'
// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used p1 'int'
@@ -182,7 +225,28 @@ void skep5(int unused1, K<5> k, int unused2, int p, int unused3) {
// CHECK-NEXT: | |-ParmVarDecl {{.*}} unused3 'int'
// CHECK-NEXT: | |-SYCLKernelCallStmt {{.*}}
// CHECK-NEXT: | | |-CompoundStmt {{.*}}
-// CHECK: | | `-OutlinedFunctionDecl {{.*}}
+// CHECK: | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, int, K<5>, int, int, int)' <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, int, K<5>, int, int, int)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, int, K<5>, int, int, int)' {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay>
+// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi5EE"
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'unused1' 'int'
+// CHECK-NEXT: | | | |-CXXConstructExpr {{.*}} 'K<5>' 'void (K<5> &&) noexcept'
+// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'K<5>' xvalue <NoOp>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'K<5>' lvalue ParmVar {{.*}} 'k' 'K<5>'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'unused2' 'int'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p' 'int'
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp>
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'unused3' 'int'
+// CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}}
// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit unused1 'int'
// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<5>'
// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit unused2 'int'
@@ -227,6 +291,14 @@ void skep6(const S6 &k) {
// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)() const' <FunctionToPointerDecay>
// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const'
// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'const S6' lvalue ParmVar {{.*}} 'k' 'const S6 &'
+// CHECK-NEXT: | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, S6)' <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, S6)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, S6)' {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay>
+// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi6EE"
+// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'S6' 'void (const S6 &) noexcept'
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'const S6' lvalue ParmVar {{.*}} 'k' 'const S6 &'
// CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}}
// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'const S6 &'
// CHECK-NEXT: | | `-CompoundStmt {{.*}}
@@ -260,6 +332,15 @@ void skep7(S7 k) {
// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const'
// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const S7' lvalue <NoOp>
// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S7' lvalue ParmVar {{.*}} 'k' 'S7'
+// CHECK-NEXT: | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, S7)' <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, S7)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, S7)' {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay>
+// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi7EE"
+// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'S7' 'void (S7 &&) noexcept'
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'S7' xvalue <NoOp>
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S7' lvalue ParmVar {{.*}} 'k' 'S7'
// CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}}
// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'S7'
// CHECK-NEXT: | | `-CompoundStmt {{.*}}
@@ -270,6 +351,114 @@ void skep7(S7 k) {
// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'S7' lvalue ImplicitParam {{.*}} 'k' 'S7'
// CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<7>
+// Symbol names generated for the kernel entry point function should be
+// representable in the ordinary literal encoding even when the kernel name
+// type is named with esoteric characters.
+struct \u03b4\u03c4\u03c7; // Delta Tau Chi (δτχ)
+struct S8 {
+ void operator()() const;
+};
+[[clang::sycl_kernel_entry_point(\u03b4\u03c4\u03c7)]]
+void skep8(S8 k) {
+ k();
+}
+// CHECK: |-FunctionDecl {{.*}} skep8 'void (S8)'
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} used k 'S8'
+// CHECK-NEXT: | |-SYCLKernelCallStmt {{.*}}
+// CHECK-NEXT: | | |-CompoundStmt {{.*}}
+// CHECK: | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, S8)' <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, S8)' lvalue Function {{.*}} 'sycl_kernel_launch' 'void (const char *, S8)' {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay>
+// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[12]' lvalue "_ZTS6\316\264\317\204\317\207"
+// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'S8' 'void (S8 &&) noexcept'
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'S8' xvalue <NoOp>
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S8' lvalue ParmVar {{.*}} 'k' 'S8'
+// CHECK: | | `-OutlinedFunctionDecl {{.*}}
+// CHECK: | `-SYCLKernelEntryPointAttr {{.*}}
+
+class Handler {
+ template <typename KNT, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...) {}
+public:
+ template<typename KNT, typename KT>
+ [[clang::sycl_kernel_entry_point(KNT)]]
+ void skep9(KT k, int a, int b) {
+ k(a, b);
+ }
+};
+void foo() {
+ Handler H;
+ H.skep9<KN<9>>([=] (int a, int b) { return a+b; }, 1, 2);
+}
+
+// CHECK: | |-FunctionTemplateDecl {{.*}} skep9
+// CHECK-NEXT: | | |-TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 0 KNT
+// CHECK-NEXT: | | |-TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 1 KT
+// CHECK-NEXT: | | |-CXXMethodDecl {{.*}} skep9 'void (KT, int, int)' implicit-inline
+// CHECK-NEXT: | | | |-ParmVarDecl {{.*}} referenced k 'KT'
+// CHECK-NEXT: | | | |-ParmVarDecl {{.*}} referenced a 'int'
+// CHECK-NEXT: | | | |-ParmVarDecl {{.*}} referenced b 'int'
+// CHECK-NEXT: | | | |-UnresolvedSYCLKernelCallStmt {{.*}}
+// CHECK-NEXT: | | | | `-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | | `-CallExpr {{.*}} '<dependent type>'
+// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT'
+// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'a' 'int'
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'b' 'int'
+// CHECK-NEXT: | | | `-SYCLKernelEntryPointAttr {{.*}} KNT
+// CHECK-NEXT: | | `-CXXMethodDecl {{.*}} used skep9 {{.*}} implicit_instantiation implicit-inline instantiated_from 0x{{.*}}
+// CHECK-NEXT: | | |-TemplateArgument type 'KN<9>'
+// CHECK-NEXT: | | | `-RecordType {{.*}} 'KN<9>' canonical
+// CHECK-NEXT: | | | `-ClassTemplateSpecialization {{.*}}'KN'
+// CHECK-NEXT: | | |-TemplateArgument type {{.*}}
+// CHECK-NEXT: | | | `-RecordType {{.*}}
+// CHECK-NEXT: | | | `-CXXRecord {{.*}}
+// CHECK-NEXT: | | |-ParmVarDecl {{.*}} used k {{.*}}
+// CHECK-NEXT: | | |-ParmVarDecl {{.*}} used a 'int'
+// CHECK-NEXT: | | |-ParmVarDecl {{.*}} used b 'int'
+// CHECK-NEXT: | | |-SYCLKernelCallStmt {{.*}}
+// CHECK-NEXT: | | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | | `-CXXOperatorCallExpr {{.*}} 'int' '()'
+// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int (*)(int, int) const' <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int (int, int) const' lvalue CXXMethod {{.*}} 'operator()' 'int (int, int) const'
+// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} lvalue <NoOp>
+// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} lvalue ParmVar {{.*}} 'k' {{.*}}
+// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'a' 'int'
+// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'b' 'int'
+// CHECK-NEXT: | | | |-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | | `-CXXMemberCallExpr {{.*}} 'void'
+// CHECK-NEXT: | | | | |-MemberExpr {{.*}} '<bound member function type>' ->sycl_kernel_launch {{.*}}
+// CHECK-NEXT: | | | | | `-CXXThisExpr {{.*}} 'Handler *' implicit this
+// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'const char *' <ArrayToPointerDecay>
+// CHECK-NEXT: | | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi9EE"
+// CHECK-NEXT: | | | | |-CXXConstructExpr {{.*}}
+// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} xvalue <NoOp>
+// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} lvalue ParmVar {{.*}} 'k' {{.*}}
+// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp>
+// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'a' 'int'
+// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' xvalue <NoOp>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'b' 'int'
+// CHECK-NEXT: | | | `-OutlinedFunctionDecl {{.*}}
+// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} implicit used k {{.*}}
+// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} implicit used a 'int'
+// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} implicit used b 'int'
+// CHECK-NEXT: | | | `-CompoundStmt {{.*}}
+// CHECK-NEXT: | | | `-CXXOperatorCallExpr {{.*}} 'int' '()'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int (*)(int, int) const' <FunctionToPointerDecay>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int (int, int) const' lvalue CXXMethod {{.*}} 'operator()' 'int (int, int) const'
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} lvalue <NoOp>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} lvalue ImplicitParam {{.*}} 'k' {{.*}}
+// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ImplicitParam {{.*}} 'a' 'int'
+// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ImplicitParam {{.*}} 'b' 'int'
+// CHECK-NEXT: | | `-SYCLKernelEntryPointAttr {{.*}} struct KN<9>
+
void the_end() {}
// CHECK: `-FunctionDecl {{.*}} the_end 'void ()'
diff --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
index 0171f72df0b3..011f48e91c29 100644
--- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
+++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
@@ -28,6 +28,9 @@
// A unique kernel name type is required for each declared kernel entry point.
template<int, int=0> struct KN;
+template<typename KernelName, typename... Ts>
+void sycl_kernel_launch(const char *, Ts... Args) {}
+
[[clang::sycl_kernel_entry_point(KN<1>)]]
void skep1() {
}
diff --git a/clang/test/ASTSYCL/ast-print-sycl-kernel-call.cpp b/clang/test/ASTSYCL/ast-print-sycl-kernel-call.cpp
new file mode 100644
index 000000000000..5adaa367ed9c
--- /dev/null
+++ b/clang/test/ASTSYCL/ast-print-sycl-kernel-call.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -fsycl-is-host -ast-print %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsycl-is-device -ast-print %s -o - | FileCheck %s
+
+struct sycl_kernel_launcher {
+ template<typename KernelName, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...) {}
+
+ template<typename KernelName, typename KernelType>
+ [[clang::sycl_kernel_entry_point(KernelName)]]
+ void sycl_kernel_entry_point(KernelType kernel) {
+ kernel();
+ }
+};
+// CHECK: template <typename KernelName, typename KernelType> void sycl_kernel_entry_point(KernelType kernel)
+// CHECK-NEXT: {
+// CHECK-NEXT: kernel();
+// CHECK-NEXT: }
+// CHECK: template<> void sycl_kernel_entry_point<KN, (lambda at {{.*}})>((lambda at {{.*}}) kernel)
+// CHECK-NEXT: {
+// CHECK-NEXT: kernel();
+// CHECK-NEXT: }
+
+void f(sycl_kernel_launcher skl) {
+ skl.sycl_kernel_entry_point<struct KN>([]{});
+}
diff --git a/clang/test/CIR/CodeGen/new-delete.cpp b/clang/test/CIR/CodeGen/new-delete.cpp
new file mode 100644
index 000000000000..58db8f8646f4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/new-delete.cpp
@@ -0,0 +1,164 @@
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -triple=x86_64-linux-gnu -fclangir -emit-cir -std=c++98 -fcxx-exceptions -fexceptions -o %t.cir
+// RUN: FileCheck -check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -triple=x86_64-linux-gnu -fclangir -emit-llvm -std=c++98 -fcxx-exceptions -fexceptions -o %t-cir.ll
+// RUN: FileCheck -check-prefixes=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -triple=x86_64-linux-gnu -emit-llvm -std=c++98 -fcxx-exceptions -fexceptions -o %t.ll
+// RUN: FileCheck -check-prefixes=OGCG --input-file=%t.ll %s
+
+
+struct A { A(int); ~A(); void *p; };
+
+A *a() {
+ return new A(5);
+}
+
+// CIR: cir.func {{.*}} @_Z1av() -> !cir.ptr<!rec_A> {
+// CIR: %[[RETVAL:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["__retval"]
+// CIR: %[[NEW_RESULT:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["__new_result"]
+// CIR: %[[ALLOC_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CIR: %[[PTR:.*]] = cir.call @_Znwm(%[[ALLOC_SIZE]])
+// CIR: cir.cleanup.scope {
+// CIR: %[[PTR_A:.*]] = cir.cast bitcast %[[PTR]] : !cir.ptr<!void> -> !cir.ptr<!rec_A>
+// CIR: cir.store{{.*}} %[[PTR_A]], %[[NEW_RESULT]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CIR: %[[FIVE:.*]] = cir.const #cir.int<5> : !s32i
+// CIR: cir.call @_ZN1AC1Ei(%[[PTR_A]], %[[FIVE]])
+// CIR: cir.yield
+// CIR: } cleanup eh {
+// CIR: cir.call @_ZdlPv(%[[PTR]]) nothrow : (!cir.ptr<!void>) -> ()
+// CIR: cir.yield
+// CIR: }
+
+// LLVM: define {{.*}} ptr @_Z1av() {{.*}} personality ptr @__gxx_personality_v0 {
+// LLVM: %[[RETVAL:.*]] = alloca ptr
+// LLVM: %[[NEW_RESULT:.*]] = alloca ptr
+// LLVM: %[[PTR:.*]] = call ptr @_Znwm(i64 8)
+// LLVM: br label %[[EH_SCOPE:.*]]
+// LLVM: [[EH_SCOPE]]:
+// LLVM: store ptr %[[PTR]], ptr %[[NEW_RESULT]]
+// LLVM: invoke void @_ZN1AC1Ei(ptr %[[PTR]], i32 5)
+// LLVM: to label %[[INVOKE_CONT:.*]] unwind label %[[UNWIND:.*]]
+// LLVM: [[INVOKE_CONT]]:
+// LLVM: br label %[[EH_SCOPE_END:.*]]
+// LLVM: [[UNWIND]]:
+// LLVM: %[[EXN:.*]] = landingpad { ptr, i32 }
+// LLVM: cleanup
+// LLVM: %[[EXN_PTR:.*]] = extractvalue { ptr, i32 } %[[EXN]], 0
+// LLVM: %[[TYPEID:.*]] = extractvalue { ptr, i32 } %[[EXN]], 1
+// LLVM: br label %[[EH_CLEANUP:.*]]
+// LLVM: [[EH_CLEANUP]]:
+// LLVM: %[[EXN_PTR_PHI:.*]] = phi ptr [ %[[EXN_PTR]], %[[UNWIND]] ]
+// LLVM: %[[TYPEID_PHI:.*]] = phi i32 [ %[[TYPEID]], %[[UNWIND]] ]
+// LLVM: call void @_ZdlPv(ptr %[[PTR]])
+// LLVM: %[[EXN_INSERT:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXN_PTR_PHI]], 0
+// LLVM: %[[EXN_INSERT_2:.*]] = insertvalue { ptr, i32 } %[[EXN_INSERT]], i32 %[[TYPEID_PHI]], 1
+// LLVM: resume { ptr, i32 } %[[EXN_INSERT_2]]
+// LLVM: [[EH_SCOPE_END]]:
+// LLVM: %[[LOAD:.*]] = load ptr, ptr %[[NEW_RESULT]]
+// LLVM: store ptr %[[LOAD]], ptr %[[RETVAL]]
+// LLVM: %[[RET:.*]] = load ptr, ptr %[[RETVAL]]
+// LLVM: ret ptr %[[RET]]
+
+// OGCG: define {{.*}} ptr @_Z1av() {{.*}} personality ptr @__gxx_personality_v0 {
+// OGCG: %[[EXN_SLOT:.*]] = alloca ptr
+// OGCG: %[[EHSELECTOR_SLOT:.*]] = alloca i32
+// OGCG: %[[PTR:.*]] = call {{.*}} ptr @_Znwm(i64 8)
+// OGCG: invoke void @_ZN1AC1Ei(ptr {{.*}} %[[PTR]], i32 5)
+// OGCG: to label %[[INVOKE_CONT:.*]] unwind label %[[UNWIND:.*]]
+// OGCG: [[INVOKE_CONT]]:
+// OGCG: ret ptr %[[PTR]]
+// OGCG: [[UNWIND]]:
+// OGCG: %[[EXN:.*]] = landingpad { ptr, i32 }
+// OGCG: cleanup
+// OGCG: %[[EXN_PTR:.*]] = extractvalue { ptr, i32 } %[[EXN]], 0
+// OGCG: store ptr %[[EXN_PTR]], ptr %[[EXN_SLOT]]
+// OGCG: %[[TYPEID:.*]] = extractvalue { ptr, i32 } %[[EXN]], 1
+// OGCG: store i32 %[[TYPEID]], ptr %[[EHSELECTOR_SLOT]]
+// OGCG: call void @_ZdlPv(ptr %[[PTR]])
+// OGCG: br label %[[EH_RESUME:.*]]
+// OGCG: [[EH_RESUME]]:
+// OGCG: %[[EXN_PTR:.*]] = load ptr, ptr %[[EXN_SLOT]]
+// OGCG: %[[EHSELECTOR:.*]] = load i32, ptr %[[EHSELECTOR_SLOT]]
+// OGCG: %[[EXN_INSERT:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXN_PTR]], 0
+// OGCG: %[[EXN_INSERT_2:.*]] = insertvalue { ptr, i32 } %[[EXN_INSERT]], i32 %[[EHSELECTOR]], 1
+// OGCG: resume { ptr, i32 } %[[EXN_INSERT_2]]
+
+A *b() {
+ extern int foo();
+ return new A(foo());
+}
+
+// CIR: cir.func {{.*}} @_Z1bv() -> !cir.ptr<!rec_A> {
+// CIR: %[[RETVAL:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["__retval"]
+// CIR: %[[NEW_RESULT:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["__new_result"]
+// CIR: %[[ALLOC_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CIR: %[[PTR:.*]] = cir.call @_Znwm(%[[ALLOC_SIZE]])
+// CIR: cir.cleanup.scope {
+// CIR: %[[PTR_A:.*]] = cir.cast bitcast %[[PTR]] : !cir.ptr<!void> -> !cir.ptr<!rec_A>
+// CIR: cir.store{{.*}} %[[PTR_A]], %[[NEW_RESULT]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CIR: %[[FOO:.*]] = cir.call @_Z3foov() : () -> !s32i
+// CIR: cir.call @_ZN1AC1Ei(%[[PTR_A]], %[[FOO]])
+// CIR: cir.yield
+// CIR: } cleanup eh {
+// CIR: cir.call @_ZdlPv(%[[PTR]]) nothrow : (!cir.ptr<!void>) -> ()
+// CIR: cir.yield
+// CIR: }
+
+// LLVM: define {{.*}} ptr @_Z1bv() {{.*}} personality ptr @__gxx_personality_v0 {
+// LLVM: %[[RETVAL:.*]] = alloca ptr
+// LLVM: %[[NEW_RESULT:.*]] = alloca ptr
+// LLVM: %[[PTR:.*]] = call ptr @_Znwm(i64 8)
+// LLVM: br label %[[EH_SCOPE:.*]]
+// LLVM: [[EH_SCOPE]]:
+// LLVM: store ptr %[[PTR]], ptr %[[NEW_RESULT]]
+// LLVM: %[[FOO:.*]] = invoke i32 @_Z3foov()
+// LLVM: to label %[[INVOKE_CONT:.*]] unwind label %[[UNWIND:.*]]
+// LLVM: [[INVOKE_CONT]]:
+// LLVM: invoke void @_ZN1AC1Ei(ptr %[[PTR]], i32 %[[FOO]])
+// LLVM: to label %[[INVOKE_CONT_2:.*]] unwind label %[[UNWIND:.*]]
+// LLVM: [[INVOKE_CONT_2]]:
+// LLVM: br label %[[EH_SCOPE_END:.*]]
+// LLVM: [[UNWIND]]:
+// LLVM: %[[EXN:.*]] = landingpad { ptr, i32 }
+// LLVM: cleanup
+// LLVM: %[[EXN_PTR:.*]] = extractvalue { ptr, i32 } %[[EXN]], 0
+// LLVM: %[[TYPEID:.*]] = extractvalue { ptr, i32 } %[[EXN]], 1
+// LLVM: br label %[[EH_CLEANUP:.*]]
+// LLVM: [[EH_CLEANUP]]:
+// LLVM: %[[EXN_PTR_PHI:.*]] = phi ptr [ %[[EXN_PTR]], %[[UNWIND]] ]
+// LLVM: %[[TYPEID_PHI:.*]] = phi i32 [ %[[TYPEID]], %[[UNWIND]] ]
+// LLVM: call void @_ZdlPv(ptr %[[PTR]])
+// LLVM: %[[EXN_INSERT:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXN_PTR_PHI]], 0
+// LLVM: %[[EXN_INSERT_2:.*]] = insertvalue { ptr, i32 } %[[EXN_INSERT]], i32 %[[TYPEID_PHI]], 1
+// LLVM: resume { ptr, i32 } %[[EXN_INSERT_2]]
+// LLVM: [[EH_SCOPE_END]]:
+// LLVM: %[[LOAD:.*]] = load ptr, ptr %[[NEW_RESULT]]
+// LLVM: store ptr %[[LOAD]], ptr %[[RETVAL]]
+// LLVM: %[[RET:.*]] = load ptr, ptr %[[RETVAL]]
+// LLVM: ret ptr %[[RET]]
+
+// OGCG: define {{.*}} ptr @_Z1bv() {{.*}} personality ptr @__gxx_personality_v0 {
+// OGCG: %[[EXN_SLOT:.*]] = alloca ptr
+// OGCG: %[[EHSELECTOR_SLOT:.*]] = alloca i32
+// OGCG: %[[PTR:.*]] = call {{.*}} ptr @_Znwm(i64 8)
+// OGCG: %[[FOO:.*]] = invoke i32 @_Z3foov()
+// OGCG: to label %[[INVOKE_CONT:.*]] unwind label %[[UNWIND:.*]]
+// OGCG: [[INVOKE_CONT]]:
+// OGCG: invoke void @_ZN1AC1Ei(ptr {{.*}} %[[PTR]], i32 %[[FOO]])
+// OGCG: to label %[[INVOKE_CONT_2:.*]] unwind label %[[UNWIND:.*]]
+// OGCG: [[INVOKE_CONT_2]]:
+// OGCG: ret ptr %[[PTR]]
+// OGCG: [[UNWIND]]:
+// OGCG: %[[EXN:.*]] = landingpad { ptr, i32 }
+// OGCG: cleanup
+// OGCG: %[[EXN_PTR:.*]] = extractvalue { ptr, i32 } %[[EXN]], 0
+// OGCG: store ptr %[[EXN_PTR]], ptr %[[EXN_SLOT]]
+// OGCG: %[[TYPEID:.*]] = extractvalue { ptr, i32 } %[[EXN]], 1
+// OGCG: store i32 %[[TYPEID]], ptr %[[EHSELECTOR_SLOT]]
+// OGCG: call void @_ZdlPv(ptr %[[PTR]])
+// OGCG: br label %[[EH_RESUME:.*]]
+// OGCG: [[EH_RESUME]]:
+// OGCG: %[[EXN_PTR:.*]] = load ptr, ptr %[[EXN_SLOT]]
+// OGCG: %[[EHSELECTOR:.*]] = load i32, ptr %[[EHSELECTOR_SLOT]]
+// OGCG: %[[EXN_INSERT:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXN_PTR]], 0
+// OGCG: %[[EXN_INSERT_2:.*]] = insertvalue { ptr, i32 } %[[EXN_INSERT]], i32 %[[EHSELECTOR]], 1
+// OGCG: resume { ptr, i32 } %[[EXN_INSERT_2]]
diff --git a/clang/test/CodeGen/AArch64/neon-misc.c b/clang/test/CodeGen/AArch64/neon-misc.c
index 6eadaaf27a21..ac2c83aa03cc 100644
--- a/clang/test/CodeGen/AArch64/neon-misc.c
+++ b/clang/test/CodeGen/AArch64/neon-misc.c
@@ -7,313 +7,8 @@
#include <arm_neon.h>
-// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_s8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]]
-//
-uint8x8_t test_vceqz_s8(int8x8_t a) {
- return vceqz_s8(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vceqz_s16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
-// CHECK-NEXT: ret <4 x i16> [[VCEQZ_I]]
-//
-uint16x4_t test_vceqz_s16(int16x4_t a) {
- return vceqz_s16(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_s32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
-// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]]
-//
-uint32x2_t test_vceqz_s32(int32x2_t a) {
- return vceqz_s32(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_s64(
-// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
-//
-uint64x1_t test_vceqz_s64(int64x1_t a) {
- return vceqz_s64(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_u64(
-// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
-//
-uint64x1_t test_vceqz_u64(uint64x1_t a) {
- return vceqz_u64(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_p64(
-// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
-//
-uint64x1_t test_vceqz_p64(poly64x1_t a) {
- return vceqz_p64(a);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_s8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]]
-//
-uint8x16_t test_vceqzq_s8(int8x16_t a) {
- return vceqzq_s8(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vceqzq_s16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
-// CHECK-NEXT: ret <8 x i16> [[VCEQZ_I]]
-//
-uint16x8_t test_vceqzq_s16(int16x8_t a) {
- return vceqzq_s16(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_s32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]]
-//
-uint32x4_t test_vceqzq_s32(int32x4_t a) {
- return vceqzq_s32(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_s64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
-//
-uint64x2_t test_vceqzq_s64(int64x2_t a) {
- return vceqzq_s64(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_u8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]]
-//
-uint8x8_t test_vceqz_u8(uint8x8_t a) {
- return vceqz_u8(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vceqz_u16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
-// CHECK-NEXT: ret <4 x i16> [[VCEQZ_I]]
-//
-uint16x4_t test_vceqz_u16(uint16x4_t a) {
- return vceqz_u16(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_u32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
-// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]]
-//
-uint32x2_t test_vceqz_u32(uint32x2_t a) {
- return vceqz_u32(a);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_u8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]]
-//
-uint8x16_t test_vceqzq_u8(uint8x16_t a) {
- return vceqzq_u8(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vceqzq_u16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
-// CHECK-NEXT: ret <8 x i16> [[VCEQZ_I]]
-//
-uint16x8_t test_vceqzq_u16(uint16x8_t a) {
- return vceqzq_u16(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_u32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]]
-//
-uint32x4_t test_vceqzq_u32(uint32x4_t a) {
- return vceqzq_u32(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_u64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
-//
-uint64x2_t test_vceqzq_u64(uint64x2_t a) {
- return vceqzq_u64(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x float> [[TMP2]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
-// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]]
-//
-uint32x2_t test_vceqz_f32(float32x2_t a) {
- return vceqz_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_f64(
-// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
-// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <1 x double> [[TMP2]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
-//
-uint64x1_t test_vceqz_f64(float64x1_t a) {
- return vceqz_f64(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]]
-//
-uint32x4_t test_vceqzq_f32(float32x4_t a) {
- return vceqzq_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_p8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]]
-//
-uint8x8_t test_vceqz_p8(poly8x8_t a) {
- return vceqz_p8(a);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_p8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]]
-//
-uint8x16_t test_vceqzq_p8(poly8x16_t a) {
- return vceqzq_p8(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x double> [[TMP2]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
-//
-uint64x2_t test_vceqzq_f64(float64x2_t a) {
- return vceqzq_f64(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_p64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
-//
-uint64x2_t test_vceqzq_p64(poly64x2_t a) {
- return vceqzq_p64(a);
-}
-
// CHECK-LABEL: define dso_local <8 x i8> @test_vcgez_s8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <8 x i8> [[A]], zeroinitializer
// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index ab424fc08f17..77b6c09de857 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -17,10 +17,6 @@
// hence for CIR we use `opt -passes=simplifycfg` to reduce the control flow
// and to make LLVM IR match for all paths.
//
-// Minor differences between RUN lines (e.g., the presence of `noundef` on
-// arguments or the `align` attribute on pointers) are matched using
-// catch-alls such as `{{.*}}`.
-//
// TODO: Remove `-simplifycfg` once CIR lowering includes the relevant
// optimizations to reduce the CFG.
//
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index 2d476ad3028b..a711245b3372 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -7,9 +7,8 @@
//=============================================================================
// NOTES
//
-// Minor differences between RUNs (e.g. presence of `noundef` attached to
-// argumens, `align` attribute attached to pointers), are matched using
-// catch-alls like {{.*}}.
+// ACLE section headings based on v2025Q2 of the ACLE specification:
+// * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#bitwise-equal-to-zero
//
// Different labels for CIR stem from an additional function call that is
// present at the AST and CIR levels, but is inlined at the LLVM IR level.
@@ -17,6 +16,20 @@
#include <arm_neon.h>
+// LLVM-LABEL: @test_vnegd_s64
+// CIR-LABEL: @vnegd_s64
+int64_t test_vnegd_s64(int64_t a) {
+// CIR: cir.unary(minus, {{.*}}) : !s64
+
+// LLVM-SAME: i64 {{.*}} [[A:%.*]])
+// LLVM: [[VNEGD_I:%.*]] = sub i64 0, [[A]]
+// LLVM-NEXT: ret i64 [[VNEGD_I]]
+ return (int64_t)vnegd_s64(a);
+}
+
+//===------------------------------------------------------===//
+// 2.1.2.2 Bitwise equal to zero
+//===------------------------------------------------------===//
// LLVM-LABEL: @test_vceqzd_s64
// CIR-LABEL: @vceqzd_s64
uint64_t test_vceqzd_s64(int64_t a) {
@@ -32,15 +45,363 @@ uint64_t test_vceqzd_s64(int64_t a) {
return (uint64_t)vceqzd_s64(a);
}
-// LLVM-LABEL: @test_vnegd_s64
-// CIR-LABEL: @vnegd_s64
-int64_t test_vnegd_s64(int64_t a) {
-// CIR: cir.unary(minus, {{.*}}) : !s64
+// LLVM-LABEL: @test_vceqz_s8(
+// CIR-LABEL: @vceqz_s8(
+uint8x8_t test_vceqz_s8(int8x8_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !s8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !s8i>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: ret <8 x i8> [[VCEQZ_I]]
+ return vceqz_s8(a);
+}
-// LLVM-SAME: i64{{.*}} [[A:%.*]])
-// LLVM: [[VNEGD_I:%.*]] = sub i64 0, [[A]]
-// LLVM-NEXT: ret i64 [[VNEGD_I]]
- return (int64_t)vnegd_s64(a);
+// LLVM-LABEL: @test_vceqz_s16(
+// CIR-LABEL: @vceqz_s16(
+uint16x4_t test_vceqz_s16(int16x4_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !s16i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !s16i>, !cir.vector<4 x !s16i>
+
+// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// LLVM-NEXT: ret <4 x i16> [[VCEQZ_I]]
+ return vceqz_s16(a);
+}
+
+// LLVM-LABEL: @test_vceqz_s32(
+// CIR-LABEL: @vceqz_s32(
+uint32x2_t test_vceqz_s32(int32x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !s32i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !s32i>, !cir.vector<2 x !s32i>
+
+// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// LLVM-NEXT: ret <2 x i32> [[VCEQZ_I]]
+ return vceqz_s32(a);
+}
+
+// LLVM-LABEL: @test_vceqz_s64(
+// CIR-LABEL: @vceqz_s64(
+uint64x1_t test_vceqz_s64(int64x1_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !s64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !s64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !s64i>, !cir.vector<1 x !s64i>
+
+// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]]
+ return vceqz_s64(a);
+}
+
+// LLVM-LABEL: @test_vceqz_u64(
+// CIR-LABEL: @vceqz_u64(
+uint64x1_t test_vceqz_u64(uint64x1_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !u64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !u64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !u64i>, !cir.vector<1 x !s64i>
+
+// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]]
+ return vceqz_u64(a);
+}
+
+// LLVM-LABEL: @test_vceqz_p64(
+// CIR-LABEL: @vceqz_p64(
+uint64x1_t test_vceqz_p64(poly64x1_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !s64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !s64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !s64i>, !cir.vector<1 x !s64i>
+
+// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]]
+ return vceqz_p64(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_s8(
+// CIR-LABEL: @vceqzq_s8(
+uint8x16_t test_vceqzq_s8(int8x16_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<16 x !s8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<16 x !s8i>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: ret <16 x i8> [[VCEQZ_I]]
+ return vceqzq_s8(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_s16(
+// CIR-LABEL: @vceqzq_s16(
+uint16x8_t test_vceqzq_s16(int16x8_t a) {
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// LLVM-NEXT: ret <8 x i16> [[VCEQZ_I]]
+ return vceqzq_s16(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_s32(
+// CIR-LABEL: @vceqzq_s32(
+uint32x4_t test_vceqzq_s32(int32x4_t a) {
+// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// LLVM-NEXT: ret <4 x i32> [[VCEQZ_I]]
+ return vceqzq_s32(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_s64(
+// CIR-LABEL: @vceqzq_s64(
+uint64x2_t test_vceqzq_s64(int64x2_t a) {
+// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]]
+ return vceqzq_s64(a);
+}
+
+// LLVM-LABEL: @test_vceqz_u8(
+// CIR-LABEL: @vceqz_u8(
+uint8x8_t test_vceqz_u8(uint8x8_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !u8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !u8i>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: ret <8 x i8> [[VCEQZ_I]]
+ return vceqz_u8(a);
+}
+
+// LLVM-LABEL: @test_vceqz_u16(
+// CIR-LABEL: @vceqz_u16(
+uint16x4_t test_vceqz_u16(uint16x4_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !u16i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !u16i>, !cir.vector<4 x !s16i>
+
+// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// LLVM-NEXT: ret <4 x i16> [[VCEQZ_I]]
+ return vceqz_u16(a);
+}
+
+// LLVM-LABEL: @test_vceqz_u32(
+// CIR-LABEL: @vceqz_u32(
+uint32x2_t test_vceqz_u32(uint32x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !u32i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !u32i>, !cir.vector<2 x !s32i>
+
+// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// LLVM-NEXT: ret <2 x i32> [[VCEQZ_I]]
+ return vceqz_u32(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_u8(
+// CIR-LABEL: @vceqzq_u8(
+uint8x16_t test_vceqzq_u8(uint8x16_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<16 x !u8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<16 x !u8i>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: ret <16 x i8> [[VCEQZ_I]]
+ return vceqzq_u8(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_u16(
+// CIR-LABEL: @vceqzq_u16(
+uint16x8_t test_vceqzq_u16(uint16x8_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !u16i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !u16i>, !cir.vector<8 x !s16i>
+
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// LLVM-NEXT: ret <8 x i16> [[VCEQZ_I]]
+ return vceqzq_u16(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_u32(
+// CIR-LABEL: @vceqzq_u32(
+uint32x4_t test_vceqzq_u32(uint32x4_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !u32i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !u32i>, !cir.vector<4 x !s32i>
+
+// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// LLVM-NEXT: ret <4 x i32> [[VCEQZ_I]]
+ return vceqzq_u32(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_u64(
+// CIR-LABEL: @vceqzq_u64(
+uint64x2_t test_vceqzq_u64(uint64x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !u64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !u64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
+
+// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]]
+ return vceqzq_u64(a);
+}
+
+// LLVM-LABEL: @test_vceqz_f32(
+// CIR-LABEL: @vceqz_f32(
+uint32x2_t test_vceqz_f32(float32x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.float>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !cir.float>, !cir.vector<2 x !s32i>
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x float> [[TMP2]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// LLVM-NEXT: ret <2 x i32> [[VCEQZ_I]]
+ return vceqz_f32(a);
+}
+
+// LLVM-LABEL: @test_vceqz_f64(
+// CIR-LABEL: @vceqz_f64(
+uint64x1_t test_vceqz_f64(float64x1_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !cir.double>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !cir.double>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !cir.double>, !cir.vector<1 x !s64i>
+
+// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// LLVM-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <1 x double> [[TMP2]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]]
+ return vceqz_f64(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_f32(
+// CIR-LABEL: @vceqzq_f32(
+uint32x4_t test_vceqzq_f32(float32x4_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.float>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !cir.float>, !cir.vector<4 x !s32i>
+
+// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// LLVM-NEXT: ret <4 x i32> [[VCEQZ_I]]
+ return vceqzq_f32(a);
+}
+
+// LLVM-LABEL: @test_vceqz_p8(
+// CIR-LABEL: @vceqz_p8(
+uint8x8_t test_vceqz_p8(poly8x8_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !s8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !s8i>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: ret <8 x i8> [[VCEQZ_I]]
+ return vceqz_p8(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_p8(
+// CIR-LABEL: @vceqzq_p8(
+uint8x16_t test_vceqzq_p8(poly8x16_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<16 x !s8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<16 x !s8i>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: ret <16 x i8> [[VCEQZ_I]]
+ return vceqzq_p8(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_f64(
+// CIR-LABEL: @vceqzq_f64(
+uint64x2_t test_vceqzq_f64(float64x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !cir.double>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.double>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !cir.double>, !cir.vector<2 x !s64i>
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x double> [[TMP2]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]]
+ return vceqzq_f64(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_p64(
+// CIR-LABEL: @vceqzq_p64(
+uint64x2_t test_vceqzq_p64(poly64x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
+
+// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]]
+ return vceqzq_p64(a);
}
//===------------------------------------------------------===//
@@ -51,7 +412,7 @@ int64_t test_vnegd_s64(int64_t a) {
int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s8i>, !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i>
-// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]])
+// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]])
// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
// LLVM-NEXT: ret <8 x i8> [[VABD_I]]
return vabd_s8(v1, v2);
@@ -64,7 +425,7 @@ int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
-// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]])
+// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
@@ -81,7 +442,7 @@ int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
-// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]])
+// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
@@ -96,7 +457,7 @@ int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !u8i>, !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i>
-// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]])
+// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]])
// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
// LLVM-NEXT: ret <8 x i8> [[VABD_I]]
return vabd_u8(v1, v2);
@@ -109,7 +470,7 @@ uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
-// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]])
+// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
@@ -126,7 +487,7 @@ uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
-// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]])
+// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
@@ -143,7 +504,7 @@ float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]]
-// LLVM-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]])
+// LLVM-SAME: <2 x float> {{.*}} [[V1:%.*]], <2 x float> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
// LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
@@ -162,7 +523,7 @@ float64x1_t test_vabd_f64(float64x1_t v1, float64x1_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !cir.double>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]]
-// LLVM-SAME: <1 x double> noundef [[V1:%.*]], <1 x double> noundef [[V2:%.*]])
+// LLVM-SAME: <1 x double> {{.*}} [[V1:%.*]], <1 x double> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[V1]] to i64
// LLVM-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[V2]] to i64
@@ -181,7 +542,7 @@ float64x1_t test_vabd_f64(float64x1_t v1, float64x1_t v2) {
int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i>
-// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]])
+// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]])
// LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
// LLVM-NEXT: ret <16 x i8> [[VABD_I]]
return vabdq_s8(v1, v2);
@@ -194,7 +555,7 @@ int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
-// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]])
+// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
@@ -211,7 +572,7 @@ int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
-// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]])
+// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
@@ -226,7 +587,7 @@ int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : (!cir.vector<16 x !u8i>, !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i>
-// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]])
+// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]])
// LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
// LLVM-NEXT: ret <16 x i8> [[VABD_I]]
return vabdq_u8(v1, v2);
@@ -239,7 +600,7 @@ uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
-// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]])
+// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
@@ -256,7 +617,7 @@ uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
-// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]])
+// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
@@ -273,7 +634,7 @@ float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]]
-// LLVM-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]])
+// LLVM-SAME: <4 x float> {{.*}} [[V1:%.*]], <4 x float> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
// LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
@@ -292,7 +653,7 @@ float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !cir.double>
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]]
-// LLVM-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]])
+// LLVM-SAME: <2 x double> {{.*}} [[V1:%.*]], <2 x double> {{.*}} [[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
// LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
@@ -321,7 +682,7 @@ uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabd_u8
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]])
+// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]], <8 x i8> {{.*}} [[V3:%.*]])
// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
// LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
// LLVM-NEXT: ret <8 x i8> [[ADD_I]]
@@ -334,7 +695,7 @@ uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabd_u16
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]])
+// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]], <4 x i16> {{.*}} [[V3:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
@@ -351,7 +712,7 @@ uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabd_u32
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]])
+// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]], <2 x i32> {{.*}} [[V3:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
@@ -368,7 +729,7 @@ int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabd_s8
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]])
+// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]], <8 x i8> {{.*}} [[V3:%.*]])
// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
// LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
// LLVM-NEXT: ret <8 x i8> [[ADD_I]]
@@ -381,7 +742,7 @@ int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabd_s16
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]])
+// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]], <4 x i16> {{.*}} [[V3:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
@@ -398,7 +759,7 @@ int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabd_s32
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]])
+// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]], <2 x i32> {{.*}} [[V3:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
@@ -415,7 +776,7 @@ int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabdq_s8
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]])
+// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]], <16 x i8> {{.*}} [[V3:%.*]])
// LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]])
// LLVM-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]]
// LLVM-NEXT: ret <16 x i8> [[ADD_I]]
@@ -428,7 +789,7 @@ int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabdq_s16
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]])
+// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]], <8 x i16> {{.*}} [[V3:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
@@ -445,7 +806,7 @@ int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabdq_s32
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]])
+// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]], <4 x i32> {{.*}} [[V3:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
@@ -462,7 +823,7 @@ uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabdq_u8
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]])
+// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]], <16 x i8> {{.*}} [[V3:%.*]])
// LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]])
// LLVM-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]]
// LLVM-NEXT: ret <16 x i8> [[ADD_I]]
@@ -475,7 +836,7 @@ uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabdq_u16
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]])
+// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]], <8 x i16> {{.*}} [[V3:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
@@ -492,7 +853,7 @@ uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
// CIR: [[ABD:%.*]] = cir.call @vabdq_u32
// CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]]
-// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]])
+// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]], <4 x i32> {{.*}} [[V3:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
diff --git a/clang/test/CodeGen/amdgpu-abi-version.c b/clang/test/CodeGen/amdgpu-abi-version.c
index c8bc7d0f0456..9b7011f36f52 100644
--- a/clang/test/CodeGen/amdgpu-abi-version.c
+++ b/clang/test/CodeGen/amdgpu-abi-version.c
@@ -1,29 +1,48 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
-// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s
+// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck -check-prefixes=CHECK,LLVM %s
+// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa-llvm -emit-llvm -mcode-object-version=none %s -o - | FileCheck -check-prefixes=CHECK,LLVMENV %s
//.
-// CHECK: @__oclc_ABI_version = external addrspace(4) global i32
+// LLVM: @__oclc_ABI_version = external addrspace(4) global i32
//.
-// CHECK-LABEL: define dso_local i32 @foo(
-// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
-// CHECK-NEXT: [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500
-// CHECK-NEXT: [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i32 12
-// CHECK-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4
-// CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]]
-// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG1:![0-9]+]], !invariant.load [[META2:![0-9]+]], !noundef [[META2]]
-// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32
-// CHECK-NEXT: ret i32 [[CONV]]
+// LLVM-LABEL: define dso_local i32 @foo(
+// LLVM-SAME: ) #[[ATTR0:[0-9]+]] {
+// LLVM-NEXT: [[ENTRY:.*:]]
+// LLVM-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
+// LLVM-NEXT: [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500
+// LLVM-NEXT: [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LLVM-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i32 12
+// LLVM-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LLVM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4
+// LLVM-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]]
+// LLVM-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG1:![0-9]+]], !invariant.load [[META2:![0-9]+]], !noundef [[META2]]
+// LLVM-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32
+// LLVM-NEXT: ret i32 [[CONV]]
+//
+// LLVMENV-LABEL: define dso_local i32 @foo(
+// LLVMENV-SAME: ) #[[ATTR0:[0-9]+]] {
+// LLVMENV-NEXT: [[ENTRY:.*:]]
+// LLVMENV-NEXT: [[TMP0:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LLVMENV-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 12
+// LLVMENV-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG1:![0-9]+]], !invariant.load [[META2:![0-9]+]], !noundef [[META2]]
+// LLVMENV-NEXT: [[CONV:%.*]] = zext i16 [[TMP2]] to i32
+// LLVMENV-NEXT: ret i32 [[CONV]]
//
int foo() { return __builtin_amdgcn_workgroup_size_x(); }
//.
-// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+// LLVM: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// LLVM: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+//.
+// LLVMENV: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// LLVMENV: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+//.
+// LLVM: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+// LLVM: [[RNG1]] = !{i16 1, i16 1025}
+// LLVM: [[META2]] = !{}
//.
-// CHECK: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-// CHECK: [[RNG1]] = !{i16 1, i16 1025}
-// CHECK: [[META2]] = !{}
+// LLVMENV: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+// LLVMENV: [[RNG1]] = !{i16 1, i16 1025}
+// LLVMENV: [[META2]] = !{}
//.
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGenHLSL/resources/Texture2D-Gather.hlsl b/clang/test/CodeGenHLSL/resources/Texture2D-Gather.hlsl
new file mode 100644
index 000000000000..54d428285d88
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/Texture2D-Gather.hlsl
@@ -0,0 +1,183 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -emit-llvm -disable-llvm-passes -finclude-default-header -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL
+// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -finclude-default-header -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPIRV
+
+// DXIL: %"class.hlsl::Texture2D" = type { target("dx.Texture", <4 x float>, 0, 0, 0, 2) }
+// DXIL: %"class.hlsl::SamplerState" = type { target("dx.Sampler", 0) }
+// DXIL: %"class.hlsl::SamplerComparisonState" = type { target("dx.Sampler", 0) }
+
+// SPIRV: %"class.hlsl::Texture2D" = type { target("spirv.Image", float, 1, 2, 0, 0, 1, 0) }
+// SPIRV: %"class.hlsl::SamplerState" = type { target("spirv.Sampler") }
+// SPIRV: %"class.hlsl::SamplerComparisonState" = type { target("spirv.Sampler") }
+
+Texture2D<float4> t;
+SamplerState s;
+SamplerComparisonState sc;
+
+// CHECK: define hidden {{.*}} <4 x float> @main(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]])
+// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::Gather(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}})
+// CHECK: ret <4 x float> %[[CALL]]
+
+float4 main(float2 loc : LOC) : SV_Target {
+ return t.Gather(s, loc);
+}
+
+// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::Gather(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]])
+// CHECK: %[[THIS_ADDR:.*]] = alloca ptr
+// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float>
+// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]]
+// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]]
+// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]]
+// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0
+// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]]
+// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0
+// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]]
+// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]]
+// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> zeroinitializer)
+// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> zeroinitializer)
+// CHECK: ret <4 x float> %[[RES]]
+
+// CHECK: define hidden {{.*}} <4 x float> @test_offset(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]])
+// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::Gather(hlsl::SamplerState, float vector[2], int vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}}, <2 x i32> {{.*}} <i32 1, i32 2>)
+// CHECK: ret <4 x float> %[[CALL]]
+
+float4 test_offset(float2 loc : LOC) : SV_Target {
+ return t.Gather(s, loc, int2(1, 2));
+}
+
+// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::Gather(hlsl::SamplerState, float vector[2], int vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^,]+]], <2 x i32> {{.*}} %[[OFFSET:[^)]+]])
+// CHECK: %[[THIS_ADDR:.*]] = alloca ptr
+// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float>
+// CHECK: %[[OFFSET_ADDR:.*]] = alloca <2 x i32>
+// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]]
+// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]]
+// CHECK: store <2 x i32> %[[OFFSET]], ptr %[[OFFSET_ADDR]]
+// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]]
+// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0
+// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]]
+// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0
+// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]]
+// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]]
+// CHECK: %[[OFFSET_VAL:.*]] = load <2 x i32>, ptr %[[OFFSET_ADDR]]
+// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> %[[OFFSET_VAL]])
+// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> %[[OFFSET_VAL]])
+// CHECK: ret <4 x float> %[[RES]]
+
+// CHECK: define hidden {{.*}} <4 x float> @test_green(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]])
+// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherGreen(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}})
+// CHECK: ret <4 x float> %[[CALL]]
+
+float4 test_green(float2 loc : LOC) : SV_Target {
+ return t.GatherGreen(s, loc);
+}
+
+// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherGreen(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]])
+// CHECK: %[[THIS_ADDR:.*]] = alloca ptr
+// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float>
+// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]]
+// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]]
+// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]]
+// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0
+// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]]
+// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0
+// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]]
+// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]]
+// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 1, <2 x i32> zeroinitializer)
+// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 1, <2 x i32> zeroinitializer)
+// CHECK: ret <4 x float> %[[RES]]
+
+// CHECK: define hidden {{.*}} <4 x float> @test_red(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]])
+// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherRed(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}})
+// CHECK: ret <4 x float> %[[CALL]]
+
+float4 test_red(float2 loc : LOC) : SV_Target {
+ return t.GatherRed(s, loc);
+}
+
+// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherRed(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]])
+// CHECK: %[[THIS_ADDR:.*]] = alloca ptr
+// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float>
+// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]]
+// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]]
+// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]]
+// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0
+// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]]
+// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0
+// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]]
+// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]]
+// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> zeroinitializer)
+// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 0, <2 x i32> zeroinitializer)
+// CHECK: ret <4 x float> %[[RES]]
+
+// CHECK: define hidden {{.*}} <4 x float> @test_blue(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]])
+// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherBlue(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}})
+// CHECK: ret <4 x float> %[[CALL]]
+
+float4 test_blue(float2 loc : LOC) : SV_Target {
+ return t.GatherBlue(s, loc);
+}
+
+// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherBlue(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]])
+// CHECK: %[[THIS_ADDR:.*]] = alloca ptr
+// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float>
+// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]]
+// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]]
+// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]]
+// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0
+// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]]
+// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0
+// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]]
+// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]]
+// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 2, <2 x i32> zeroinitializer)
+// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 2, <2 x i32> zeroinitializer)
+// CHECK: ret <4 x float> %[[RES]]
+
+// CHECK: define hidden {{.*}} <4 x float> @test_alpha(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]])
+// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherAlpha(hlsl::SamplerState, float vector[2])(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}}, <2 x float> {{.*}} %{{.*}})
+// CHECK: ret <4 x float> %[[CALL]]
+
+float4 test_alpha(float2 loc : LOC) : SV_Target {
+ return t.GatherAlpha(s, loc);
+}
+
+// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherAlpha(hlsl::SamplerState, float vector[2])(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^)]+]])
+// CHECK: %[[THIS_ADDR:.*]] = alloca ptr
+// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float>
+// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]]
+// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]]
+// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]]
+// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0
+// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]]
+// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerState", ptr %[[SAMPLER]], i32 0, i32 0
+// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]]
+// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]]
+// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 3, <2 x i32> zeroinitializer)
+// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], i32 3, <2 x i32> zeroinitializer)
+// CHECK: ret <4 x float> %[[RES]]
+
+// CHECK: define hidden {{.*}} <4 x float> @test_cmp(float vector[2])(<2 x float> noundef nofpclass(nan inf) %[[LOC:.*]])
+// CHECK: %[[CALL:.*]] = call {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherCmp(hlsl::SamplerComparisonState, float vector[2], float)(ptr {{.*}} @t, ptr {{.*}} byval(%"class.hlsl::SamplerComparisonState") {{.*}}, <2 x float> {{.*}} %{{.*}}, float {{.*}} 5.000000e-01)
+// CHECK: ret <4 x float> %[[CALL]]
+
+float4 test_cmp(float2 loc : LOC) : SV_Target {
+ return t.GatherCmp(sc, loc, 0.5);
+}
+
+// CHECK: define linkonce_odr hidden {{.*}} <4 x float> @hlsl::Texture2D<float vector[4]>::GatherCmp(hlsl::SamplerComparisonState, float vector[2], float)(ptr {{.*}} %[[THIS:[^,]+]], ptr {{.*}} byval(%"class.hlsl::SamplerComparisonState") {{.*}} %[[SAMPLER:[^,]+]], <2 x float> {{.*}} %[[COORD:[^,]+]], float {{.*}} %[[CMP:[^)]+]])
+// CHECK: %[[THIS_ADDR:.*]] = alloca ptr
+// CHECK: %[[COORD_ADDR:.*]] = alloca <2 x float>
+// CHECK: %[[CMP_ADDR:.*]] = alloca float
+// CHECK: store ptr %[[THIS]], ptr %[[THIS_ADDR]]
+// CHECK: store <2 x float> %[[COORD]], ptr %[[COORD_ADDR]]
+// CHECK: store float %[[CMP]], ptr %[[CMP_ADDR]]
+// CHECK: %[[THIS_VAL:.*]] = load ptr, ptr %[[THIS_ADDR]]
+// CHECK: %[[HANDLE_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::Texture2D", ptr %[[THIS_VAL]], i32 0, i32 0
+// CHECK: %[[HANDLE:.*]] = load target{{.*}}, ptr %[[HANDLE_GEP]]
+// CHECK: %[[SAMPLER_GEP:.*]] = getelementptr inbounds nuw %"class.hlsl::SamplerComparisonState", ptr %[[SAMPLER]], i32 0, i32 0
+// CHECK: %[[SAMPLER_H:.*]] = load target{{.*}}, ptr %[[SAMPLER_GEP]]
+// CHECK: %[[COORD_VAL:.*]] = load <2 x float>, ptr %[[COORD_ADDR]]
+// CHECK: %[[CMP_VAL:.*]] = load float, ptr %[[CMP_ADDR]]
+// CHECK: %[[CONV:.*]] = fpext {{.*}} float %[[CMP_VAL]] to double
+// CHECK: %[[TRUNC:.*]] = fptrunc {{.*}} double %[[CONV]] to float
+// DXIL: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.dx.resource.gather.cmp.v4f32.tdx.Texture_v4f32_0_0_0_2t.tdx.Sampler_0t.v2f32.v2i32(target("dx.Texture", <4 x float>, 0, 0, 0, 2) %[[HANDLE]], target("dx.Sampler", 0) %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], float %[[TRUNC]], i32 0, <2 x i32> zeroinitializer)
+// SPIRV: %[[RES:.*]] = call {{.*}} <4 x float> @llvm.spv.resource.gather.cmp.v4f32.tspirv.Image_f32_1_2_0_0_1_0t.tspirv.Samplert.v2f32.v2i32(target("spirv.Image", float, 1, 2, 0, 0, 1, 0) %[[HANDLE]], target("spirv.Sampler") %[[SAMPLER_H]], <2 x float> %[[COORD_VAL]], float %[[TRUNC]], <2 x i32> zeroinitializer)
+// CHECK: ret <4 x float> %[[RES]]
diff --git a/clang/test/CodeGenSYCL/function-attrs.cpp b/clang/test/CodeGenSYCL/function-attrs.cpp
index 4c55cf34aa0a..60d3cf10055e 100644
--- a/clang/test/CodeGenSYCL/function-attrs.cpp
+++ b/clang/test/CodeGenSYCL/function-attrs.cpp
@@ -26,6 +26,9 @@ int foo() {
return 1;
}
+template <typename Name, typename... Ts>
+void sycl_kernel_launch(Ts...) {}
+
template <typename Name, typename Func>
[[clang::sycl_kernel_entry_point(Name)]] void kernel_single_task(const Func &kernelFunc) {
kernelFunc();
diff --git a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp
index 67b53f3ae81c..47c2c45ae774 100644
--- a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp
+++ b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp
@@ -2,31 +2,36 @@
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-unknown-linux-gnu -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s
// RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-pc-windows-msvc -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-pc-windows-msvc -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s
// RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64-uefi -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-HOST,CHECK-HOST-WINDOWS %s
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple amdgcn-amd-amdhsa -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-AMDGCN %s
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple nvptx-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple nvptx64-nvidia-cuda -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-NVPTX %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR %s
-
-// Test the generation of SYCL kernel caller functions. These functions are
-// generated from functions declared with the sycl_kernel_entry_point attribute
-// and emited during device compilation. They are not emitted during device
-// compilation.
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spir64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRNV %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv32-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s
+// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -aux-triple x86_64-uefi -triple spirv64-unknown-unknown -std=c++17 %s -o - | FileCheck --check-prefixes=CHECK-DEVICE,CHECK-SPIR,CHECK-SPIRV %s
+
+// Test code generation for functions declared with the sycl_kernel_entry_point
+// attribute. During host compilation, the bodies of such functions are replaced
+// with calls to a function template or variable template (with suitable call
+// operator) named sycl_kernel_launch. During device compilation, the bodies of
+// these functions are used to generate offload kernel entry points (SYCL kernel
+// caller functions).
+
+template <typename KernelName, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
struct single_purpose_kernel_name;
struct single_purpose_kernel {
@@ -44,57 +49,169 @@ void kernel_single_task(KernelType kernelFunc) {
kernelFunc(42);
}
+// Exercise code gen with kernel name types named with esoteric characters.
+struct \u03b4\u03c4\u03c7; // Delta Tau Chi (δτχ)
+
+class handler {
+ template <typename KernelName, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...) {}
+public:
+ template <typename KernelName, typename KernelType>
+ [[clang::sycl_kernel_entry_point(KernelName)]]
+ void kernel_entry_point(KernelType k, int a, int b) {
+ k(a, b);
+ }
+};
+
+struct copyable {
+ int i;
+ ~copyable();
+};
+
int main() {
single_purpose_kernel obj;
single_purpose_kernel_task(obj);
int capture;
auto lambda = [=](auto) { (void) capture; };
kernel_single_task<decltype(lambda)>(lambda);
+ kernel_single_task<\u03b4\u03c4\u03c7>([](int){});
+ handler h;
+ copyable c{42};
+ h.kernel_entry_point<struct KN>([=] (int a, int b) { return c.i + a + b; }, 1, 2);
}
// Verify that SYCL kernel caller functions are not emitted during host
// compilation.
//
-// CHECK-HOST-NOT: _ZTS26single_purpose_kernel_name
-// CHECK-HOST-NOT: _ZTSZ4mainE18lambda_kernel_name
+// CHECK-HOST-NOT: define {{.*}} @_ZTS26single_purpose_kernel_name
+// CHECK-HOST-NOT: define {{.*}} @_ZTSZ4mainEUlT_E_
+// CHECK-HOST-NOT: define {{.*}} @"_ZTS6\CE\B4\CF\84\CF\87"
+// CHECK-HOST-NOT: define {{.*}} @_ZTSZ4mainE2KN
// Verify that sycl_kernel_entry_point attributed functions are not emitted
// during device compilation.
//
// CHECK-DEVICE-NOT: single_purpose_kernel_task
// CHECK-DEVICE-NOT: kernel_single_task
+// CHECK-DEVICE-NOT: kernel_entry_point
-// Verify that no code is generated for the bodies of sycl_kernel_entry_point
-// attributed functions during host compilation. ODR-use of these functions may
-// require them to be emitted, but they have no effect if called.
+// Verify that kernel launch code is generated for sycl_kernel_entry_point
+// attributed functions during host compilation.
+//
+// CHECK-HOST-LINUX: @.str = private unnamed_addr constant [33 x i8] c"_ZTS26single_purpose_kernel_name\00", align 1
+// CHECK-HOST-LINUX: @.str.1 = private unnamed_addr constant [18 x i8] c"_ZTSZ4mainEUlT_E_\00", align 1
+// CHECK-HOST-LINUX: @.str.2 = private unnamed_addr constant [12 x i8] c"_ZTS6\CE\B4\CF\84\CF\87\00", align 1
//
// CHECK-HOST-LINUX: define dso_local void @_Z26single_purpose_kernel_task21single_purpose_kernel() #{{[0-9]+}} {
// CHECK-HOST-LINUX-NEXT: entry:
// CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1
+// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %struct.single_purpose_kernel, align 1
+// CHECK-HOST-LINUX-NEXT: call void @_Z18sycl_kernel_launchI26single_purpose_kernel_nameJ21single_purpose_kernelEEvPKcDpT0_(ptr noundef @.str)
// CHECK-HOST-LINUX-NEXT: ret void
// CHECK-HOST-LINUX-NEXT: }
//
// CHECK-HOST-LINUX: define internal void @_Z18kernel_single_taskIZ4mainEUlT_E_S1_EvT0_(i32 %kernelFunc.coerce) #{{[0-9]+}} {
// CHECK-HOST-LINUX-NEXT: entry:
// CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %class.anon, align 4
+// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon, align 4
// CHECK-HOST-LINUX-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %kernelFunc, i32 0, i32 0
// CHECK-HOST-LINUX-NEXT: store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4
+// CHECK-HOST-LINUX-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %kernelFunc, i64 4, i1 false)
+// CHECK-HOST-LINUX-NEXT: %coerce.dive1 = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-LINUX-NEXT: %0 = load i32, ptr %coerce.dive1, align 4
+// CHECK-HOST-LINUX-NEXT: call void @_Z18sycl_kernel_launchIZ4mainEUlT_E_JS1_EEvPKcDpT0_(ptr noundef @.str.1, i32 %0)
// CHECK-HOST-LINUX-NEXT: ret void
// CHECK-HOST-LINUX-NEXT: }
//
+// CHECK-HOST-LINUX: define internal void @"_Z18kernel_single_taskI6\CE\B4\CF\84\CF\87Z4mainEUliE_EvT0_"() #{{[0-9]+}} {
+// CHECK-HOST-LINUX-NEXT: entry:
+// CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %class.anon.0, align 1
+// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon.0, align 1
+// CHECK-HOST-LINUX-NEXT: call void @"_Z18sycl_kernel_launchI6\CE\B4\CF\84\CF\87JZ4mainEUliE_EEvPKcDpT0_"(ptr noundef @.str.2)
+// CHECK-HOST-LINUX-NEXT: ret void
+// CHECK-HOST-LINUX-NEXT: }
+
+
+// CHECK-HOST-LINUX: define internal void @_ZN7handler18kernel_entry_pointIZ4mainE2KNZ4mainEUliiE_EEvT0_ii(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %k, i32 noundef %a, i32 noundef %b) #{{[0-9]+}} align 2 {
+// CHECK-HOST-LINUX-NEXT: entry:
+// CHECK-HOST-LINUX-NEXT: %this.addr = alloca ptr, align 8
+// CHECK-HOST-LINUX-NEXT: %k.indirect_addr = alloca ptr, align 8
+// CHECK-HOST-LINUX-NEXT: %a.addr = alloca i32, align 4
+// CHECK-HOST-LINUX-NEXT: %b.addr = alloca i32, align 4
+// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon.1, align 4
+// CHECK-HOST-LINUX-NEXT: store ptr %this, ptr %this.addr, align 8
+// CHECK-HOST-LINUX-NEXT: store ptr %k, ptr %k.indirect_addr, align 8
+// CHECK-HOST-LINUX-NEXT: store i32 %a, ptr %a.addr, align 4
+// CHECK-HOST-LINUX-NEXT: store i32 %b, ptr %b.addr, align 4
+// CHECK-HOST-LINUX-NEXT: %this1 = load ptr, ptr %this.addr, align 8
+// CHECK-HOST-LINUX-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %k, i64 4, i1 false)
+// CHECK-HOST-LINUX-NEXT: %0 = load i32, ptr %a.addr, align 4
+// CHECK-HOST-LINUX-NEXT: %1 = load i32, ptr %b.addr, align 4
+// CHECK-HOST-LINUX-NEXT: call void @_ZN7handler18sycl_kernel_launchIZ4mainE2KNJZ4mainEUliiE_iiEEEvPKcDpT0_(ptr noundef nonnull align 1 dereferenceable(1) %this1, ptr noundef @.str.3, ptr noundef %agg.tmp, i32 noundef %0, i32 noundef %1)
+// CHECK-HOST-LINUX-NEXT: call void @_ZZ4mainENUliiE_D1Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) %agg.tmp) #{{[0-9]+}}
+// CHECK-HOST-LINUX-NEXT: ret void
+// CHECK-HOST-LINUX-NEXT: }
+
// CHECK-HOST-WINDOWS: define dso_local void @"?single_purpose_kernel_task@@YAXUsingle_purpose_kernel@@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} {
// CHECK-HOST-WINDOWS-NEXT: entry:
// CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1
+// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %struct.single_purpose_kernel, align 1
// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %struct.single_purpose_kernel, ptr %kernelFunc, i32 0, i32 0
// CHECK-HOST-WINDOWS-NEXT: store i8 %kernelFunc.coerce, ptr %coerce.dive, align 1
+// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %struct.single_purpose_kernel, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT: %0 = load i8, ptr %coerce.dive1, align 1
+// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@Usingle_purpose_kernel_name@@Usingle_purpose_kernel@@@@YAXPEBDUsingle_purpose_kernel@@@Z"(ptr noundef @"??_C@_0CB@KFIJOMLB@_ZTS26single_purpose_kernel_name@", i8 %0)
// CHECK-HOST-WINDOWS-NEXT: ret void
// CHECK-HOST-WINDOWS-NEXT: }
//
// CHECK-HOST-WINDOWS: define internal void @"??$kernel_single_task@V<lambda_1>@?0??main@@9@V1?0??2@9@@@YAXV<lambda_1>@?0??main@@9@@Z"(i32 %kernelFunc.coerce) #{{[0-9]+}} {
// CHECK-HOST-WINDOWS-NEXT: entry:
// CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %class.anon, align 4
+// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon, align 4
// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %kernelFunc, i32 0, i32 0
// CHECK-HOST-WINDOWS-NEXT: store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4
+// CHECK-HOST-WINDOWS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %kernelFunc, i64 4, i1 false)
+// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT: %0 = load i32, ptr %coerce.dive1, align 4
+// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@V<lambda_1>@?0??main@@9@V1?0??2@9@@@YAXPEBDV<lambda_1>@?0??main@@9@@Z"(ptr noundef @"??_C@_0BC@NHCDOLAA@_ZTSZ4mainEUlT_E_?$AA@", i32 %0)
+//
+// CHECK-HOST-WINDOWS-NEXT: ret void
+// CHECK-HOST-WINDOWS-NEXT: }
+//
+// CHECK-HOST-WINDOWS: define internal void @"??$kernel_single_task@U\CE\B4\CF\84\CF\87@@V<lambda_2>@?0??main@@9@@@YAXV<lambda_2>@?0??main@@9@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} {
+// CHECK-HOST-WINDOWS-NEXT: entry:
+// CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %class.anon.0, align 1
+// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon.0, align 1
+// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.0, ptr %kernelFunc, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT: store i8 %kernelFunc.coerce, ptr %coerce.dive, align 1
+// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %class.anon.0, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT: %0 = load i8, ptr %coerce.dive1, align 1
+// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@U\CE\B4\CF\84\CF\87@@V<lambda_2>@?0??main@@9@@@YAXPEBDV<lambda_2>@?0??main@@9@@Z"(ptr noundef @"??_C@_0M@BCGAEMBE@_ZTS6?N?$LE?O?$IE?O?$IH?$AA@", i8 %0)
+// CHECK-HOST-WINDOWS-NEXT: ret void
+// CHECK-HOST-WINDOWS-NEXT: }
+
+// CHECK-HOST-WINDOWS: define internal void @"??$kernel_entry_point@UKN@?1??main@@9@V<lambda_3>@?0??2@9@@handler@@QEAAXV<lambda_3>@?0??main@@9@HH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this, i32 %k.coerce, i32 noundef %a, i32 noundef %b) #{{[0-9]+}} align 2
+// CHECK-HOST-WINDOWS-NEXT: entry:
+// CHECK-HOST-WINDOWS-NEXT: %k = alloca %class.anon.1, align 4
+// CHECK-HOST-WINDOWS-NEXT: %b.addr = alloca i32, align 4
+// CHECK-HOST-WINDOWS-NEXT: %a.addr = alloca i32, align 4
+// CHECK-HOST-WINDOWS-NEXT: %this.addr = alloca ptr, align 8
+// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon.1, align 4
+// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.1, ptr %k, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %struct.copyable, ptr %coerce.dive, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT: store i32 %k.coerce, ptr %coerce.dive1, align 4
+// CHECK-HOST-WINDOWS-NEXT: store i32 %b, ptr %b.addr, align 4
+// CHECK-HOST-WINDOWS-NEXT: store i32 %a, ptr %a.addr, align 4
+// CHECK-HOST-WINDOWS-NEXT: store ptr %this, ptr %this.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT: %this2 = load ptr, ptr %this.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT: %0 = load i32, ptr %b.addr, align 4
+// CHECK-HOST-WINDOWS-NEXT: %1 = load i32, ptr %a.addr, align 4
+// CHECK-HOST-WINDOWS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %k, i64 4, i1 false)
+// CHECK-HOST-WINDOWS-NEXT: %coerce.dive3 = getelementptr inbounds nuw %class.anon.1, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT: %coerce.dive4 = getelementptr inbounds nuw %struct.copyable, ptr %coerce.dive3, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT: %2 = load i32, ptr %coerce.dive4, align 4
+// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@UKN@?1??main@@9@V<lambda_3>@?0??2@9@HH@handler@@AEAAXPEBDV<lambda_3>@?0??main@@9@HH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this2, ptr noundef @"??_C@_0P@DLGHPODL@_ZTSZ4mainE2KN?$AA@", i32 %2, i32 noundef %1, i32 noundef %0)
+// CHECK-HOST-WINDOWS-NEXT: call void @"??1<lambda_3>@?0??main@@9@QEAA@XZ"(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) %k) #{{[0-9]+}}
// CHECK-HOST-WINDOWS-NEXT: ret void
// CHECK-HOST-WINDOWS-NEXT: }
@@ -179,6 +296,122 @@ int main() {
// CHECK-SPIR-NEXT: }
// CHECK-SPIR: define internal spir_func void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// IR for the SYCL kernel caller function generated for kernel_single_task with
+// the Delta Tau Chi type as the SYCL kernel name type.
+//
+// CHECK-AMDGCN: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// CHECK-AMDGCN-NEXT: define dso_local amdgpu_kernel void @"_ZTS6\CE\B4\CF\84\CF\87"
+// CHECK-AMDGCN-SAME: (ptr addrspace(4) noundef byref(%class.anon.0) align 1 %0) #[[AMDGCN_ATTR0]] {
+// CHECK-AMDGCN-NEXT: entry:
+// CHECK-AMDGCN-NEXT: %coerce = alloca %class.anon.0, align 1, addrspace(5)
+// CHECK-AMDGCN-NEXT: %kernelFunc = addrspacecast ptr addrspace(5) %coerce to ptr
+// CHECK-AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 1 %kernelFunc, ptr addrspace(4) align 1 %0, i64 1, i1 false)
+// CHECK-AMDGCN-NEXT: call void @_ZZ4mainENKUliE_clEi
+// CHECK-AMDGCN-SAME: (ptr noundef nonnull align 1 dereferenceable(1) %kernelFunc, i32 noundef 42) #[[AMDGCN_ATTR1:[0-9]+]]
+// CHECK-AMDGCN-NEXT: ret void
+// CHECK-AMDGCN-NEXT: }
+// CHECK-AMDGCN: define internal void @_ZZ4mainENKUliE_clEi
+//
+// CHECK-NVPTX: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// CHECK-NVPTX-NEXT: define dso_local ptx_kernel void @"_ZTS6\CE\B4\CF\84\CF\87"
+// CHECK-NVPTX-SAME: (ptr noundef byval(%class.anon.0) align 1 %kernelFunc) #[[NVPTX_ATTR0:[0-9]+]] {
+// CHECK-NVPTX-NEXT: entry:
+// CHECK-NVPTX-NEXT: call void @_ZZ4mainENKUliE_clEi
+// CHECK-NVPTX-SAME: (ptr noundef nonnull align 1 dereferenceable(1) %kernelFunc, i32 noundef 42) #[[NVPTX_ATTR1:[0-9]+]]
+// CHECK-NVPTX-NEXT: ret void
+// CHECK-NVPTX-NEXT: }
+// CHECK-NVPTX: define internal void @_ZZ4mainENKUliE_clEi
+//
+// CHECK-SPIR: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// CHECK-SPIR-NEXT: define {{[a-z_ ]*}}spir_kernel void @"_ZTS6\CE\B4\CF\84\CF\87"
+// CHECK-SPIR-SAME: (ptr noundef byval(%class.anon.0) align 1 %kernelFunc) #[[SPIR_ATTR0:[0-9]+]] {
+// CHECK-SPIR-NEXT: entry:
+// CHECK-SPIR-NEXT: %kernelFunc.ascast = addrspacecast ptr %kernelFunc to ptr addrspace(4)
+// CHECK-SPIR-NEXT: call spir_func void @_ZZ4mainENKUliE_clEi
+// CHECK-SPIR-SAME: (ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %kernelFunc.ascast, i32 noundef 42) #[[SPIR_ATTR1:[0-9]+]]
+// CHECK-SPIR-NEXT: ret void
+// CHECK-SPIR-NEXT: }
+// CHECK-SPIR: define internal spir_func void @_ZZ4mainENKUliE_clEi
+
+// IR for the SYCL kernel caller function generated for
+// handler::kernel_entry_point with main::KN as the SYCL kernel name type.
+//
+// CHECK-AMDGCN: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// CHECK-AMDGCN-NEXT: define dso_local amdgpu_kernel void @_ZTSZ4mainE2KN
+// CHECK-AMDGCN-SAME: (i32 %k.coerce, i32 noundef %a, i32 noundef %b) #[[AMDGCN_ATTR0]] {
+// CHECK-AMDGCN-NEXT: entry:
+// CHECK-AMDGCN-NEXT: %k = alloca %class.anon.1, align 4, addrspace(5)
+// CHECK-AMDGCN-NEXT: %a.addr = alloca i32, align 4, addrspace(5)
+// CHECK-AMDGCN-NEXT: %b.addr = alloca i32, align 4, addrspace(5)
+// CHECK-AMDGCN-NEXT: %k2 = addrspacecast ptr addrspace(5) %k to ptr
+// CHECK-AMDGCN-NEXT: %a.addr.ascast = addrspacecast ptr addrspace(5) %a.addr to ptr
+// CHECK-AMDGCN-NEXT: %b.addr.ascast = addrspacecast ptr addrspace(5) %b.addr to ptr
+// CHECK-AMDGCN-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.1, ptr %k2, i32 0, i32 0
+// CHECK-AMDGCN-NEXT: %coerce.dive1 = getelementptr inbounds nuw %struct.copyable, ptr %coerce.dive, i32 0, i32 0
+// CHECK-AMDGCN-NEXT: store i32 %k.coerce, ptr %coerce.dive1, align 4
+// CHECK-AMDGCN-NEXT: store i32 %a, ptr %a.addr.ascast, align 4
+// CHECK-AMDGCN-NEXT: store i32 %b, ptr %b.addr.ascast, align 4
+// CHECK-AMDGCN-NEXT: %0 = load i32, ptr %a.addr.ascast, align 4
+// CHECK-AMDGCN-NEXT: %1 = load i32, ptr %b.addr.ascast, align 4
+// CHECK-AMDGCN-NEXT: %call = call noundef i32 @_ZZ4mainENKUliiE_clEii
+// CHECK-AMDGCN-SAME: (ptr noundef nonnull align 4 dereferenceable(4) %k2, i32 noundef %0, i32 noundef %1) #[[AMDGCN_ATTR1:[0-9]+]]
+// CHECK-AMDGCN-NEXT: ret void
+// CHECK-AMDGCN-NEXT: }
+//
+// CHECK-NVPTX: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// CHECK-NVPTX-NEXT: define dso_local ptx_kernel void @_ZTSZ4mainE2KN
+// CHECK-NVPTX-SAME: (ptr noundef byval(%class.anon.1) align 4 %k, i32 noundef %a, i32 noundef %b) #[[NVPTX_ATTR0:[0-9]+]] {
+// CHECK-NVPTX-NEXT: entry:
+// CHECK-NVPTX-NEXT: %a.addr = alloca i32, align 4
+// CHECK-NVPTX-NEXT: %b.addr = alloca i32, align 4
+// CHECK-NVPTX-NEXT: store i32 %a, ptr %a.addr, align 4
+// CHECK-NVPTX-NEXT: store i32 %b, ptr %b.addr, align 4
+// CHECK-NVPTX-NEXT: %0 = load i32, ptr %a.addr, align 4
+// CHECK-NVPTX-NEXT: %1 = load i32, ptr %b.addr, align 4
+// CHECK-NVPTX-NEXT: %call = call noundef i32 @_ZZ4mainENKUliiE_clEii
+// CHECK-NVPTX-SAME: (ptr noundef nonnull align 4 dereferenceable(4) %k, i32 noundef %0, i32 noundef %1) #[[NVPTX_ATTR1:[0-9]+]]
+// CHECK-NVPTX-NEXT: ret void
+// CHECK-NVPTX-NEXT: }
+//
+// CHECK-SPIRNV: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// CHECK-SPIRNV-NEXT: define dso_local spir_kernel void @_ZTSZ4mainE2KN
+// CHECK-SPIRNV-SAME: (ptr noundef %k, i32 noundef %a, i32 noundef %b) #[[SPIR_ATTR0:[0-9]+]] {
+// CHECK-SPIRNV-NEXT: entry:
+// CHECK-SPIRNV-NEXT: %k.indirect_addr = alloca ptr addrspace(4), align {{[48]}}
+// CHECK-SPIRNV-NEXT: %a.addr = alloca i32, align 4
+// CHECK-SPIRNV-NEXT: %b.addr = alloca i32, align 4
+// CHECK-SPIRNV-NEXT: %k.indirect_addr.ascast = addrspacecast ptr %k.indirect_addr to ptr addrspace(4)
+// CHECK-SPIRNV-NEXT: %a.addr.ascast = addrspacecast ptr %a.addr to ptr addrspace(4)
+// CHECK-SPIRNV-NEXT: %b.addr.ascast = addrspacecast ptr %b.addr to ptr addrspace(4)
+// CHECK-SPIRNV-NEXT: store ptr %k, ptr addrspace(4) %k.indirect_addr.ascast, align {{[48]}}
+// CHECK-SPIRNV-NEXT: %k.ascast = addrspacecast ptr %k to ptr addrspace(4)
+// CHECK-SPIRNV-NEXT: store i32 %a, ptr addrspace(4) %a.addr.ascast, align 4
+// CHECK-SPIRNV-NEXT: store i32 %b, ptr addrspace(4) %b.addr.ascast, align 4
+// CHECK-SPIRNV-NEXT: %0 = load i32, ptr addrspace(4) %a.addr.ascast, align 4
+// CHECK-SPIRNV-NEXT: %1 = load i32, ptr addrspace(4) %b.addr.ascast, align 4
+// CHECK-SPIRNV-NEXT: %call = call spir_func noundef i32 @_ZZ4mainENKUliiE_clEii
+// CHECK-SPIRNV-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %k.ascast, i32 noundef %0, i32 noundef %1) #[[SPIR_ATTR1:[0-9]+]]
+// CHECK-SPIRNV-NEXT: ret void
+// CHECK-SPIRNV-NEXT: }
+//
+// CHECK-SPIRV: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// CHECK-SPIRV-NEXT: define spir_kernel void @_ZTSZ4mainE2KN
+// CHECK-SPIRV-SAME: (ptr noundef byval(%class.anon.1) align 4 %k, i32 noundef %a, i32 noundef %b) #[[SPIR_ATTR0:[0-9]+]] {
+// CHECK-SPIRV-NEXT: entry:
+// CHECK-SPIRV-NEXT: %a.addr = alloca i32, align 4
+// CHECK-SPIRV-NEXT: %b.addr = alloca i32, align 4
+// CHECK-SPIRV-NEXT: %a.addr.ascast = addrspacecast ptr %a.addr to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: %b.addr.ascast = addrspacecast ptr %b.addr to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: %k.ascast = addrspacecast ptr %k to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: store i32 %a, ptr addrspace(4) %a.addr.ascast, align 4
+// CHECK-SPIRV-NEXT: store i32 %b, ptr addrspace(4) %b.addr.ascast, align 4
+// CHECK-SPIRV-NEXT: %0 = load i32, ptr addrspace(4) %a.addr.ascast, align 4
+// CHECK-SPIRV-NEXT: %1 = load i32, ptr addrspace(4) %b.addr.ascast, align 4
+// CHECK-SPIRV-NEXT: %call = call spir_func noundef i32 @_ZZ4mainENKUliiE_clEii
+// CHECK-SPIRV-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %k.ascast, i32 noundef %0, i32 noundef %1) #[[SPIR_ATTR1:[0-9]+]]
+// CHECK-SPIRV-NEXT: ret void
+// CHECK-SPIRV-NEXT: }
+
// CHECK-AMDGCN: #[[AMDGCN_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK-AMDGCN: #[[AMDGCN_ATTR1]] = { convergent nounwind }
//
diff --git a/clang/test/CodeGenSYCL/sycl-kernel-entry-point-exceptions.cpp b/clang/test/CodeGenSYCL/sycl-kernel-entry-point-exceptions.cpp
new file mode 100644
index 000000000000..8fe7a148a2f6
--- /dev/null
+++ b/clang/test/CodeGenSYCL/sycl-kernel-entry-point-exceptions.cpp
@@ -0,0 +1,95 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fcxx-exceptions -fexceptions -fsycl-is-host -emit-llvm -o - %s | FileCheck %s
+
+// Validate generation of exception handling code for functions declared
+// with the sycl_kernel_entry_point attribute that implicitly call a
+// sycl_kernel_launch function that may throw an exception. Exception
+// handling is not relevant for the generated offload kernel entry point
+// function, so device compilation is intentionally not exercised.
+
+// A unique kernel name type is required for each declared kernel entry point.
+template<int> struct KN;
+
+// A generic kernel object type.
+template<int, int = 0>
+struct KT {
+ void operator()() const;
+};
+
+
+// Validate that exception handling instructions are omitted when a
+// potentially throwing sycl_kernel_entry_point attributed function
+// calls a potentially throwing sycl_kernel_launch function (a thrown
+// exception will propagate with no explicit handling required).
+namespace ns1 {
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ [[clang::sycl_kernel_entry_point(KN<1>)]]
+ void skep(KT<1> k) {
+ k();
+ }
+}
+// CHECK: ; Function Attrs: mustprogress noinline optnone
+// CHECK: define dso_local void @_ZN3ns14skepE2KTILi1ELi0EE() #{{[0-9]+}} {
+// CHECK: call void @_ZN3ns118sycl_kernel_launchI2KNILi1EEJ2KTILi1ELi0EEEEEvPKcDpT0_(ptr noundef @.str)
+// CHECK: ret void
+// CHECK: }
+
+
+// Validate that exception handling instructions are emitted when a
+// non-throwing sycl_kernel_entry_point attributed function calls
+// a potentially throwing sycl_kernel_launch function.
+namespace ns2 {
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ [[clang::sycl_kernel_entry_point(KN<2>)]]
+ void skep(KT<2> k) noexcept {
+ k();
+ }
+}
+// CHECK: ; Function Attrs: mustprogress noinline nounwind optnone
+// CHECK: define dso_local void @_ZN3ns24skepE2KTILi2ELi0EE() #{{[0-9]+}} personality ptr @__gxx_personality_v0 {
+// CHECK: invoke void @_ZN3ns218sycl_kernel_launchI2KNILi2EEJ2KTILi2ELi0EEEEEvPKcDpT0_(ptr noundef @.str.1)
+// CHECK: to label %invoke.cont unwind label %terminate.lpad
+// CHECK: invoke.cont:
+// CHECK: ret void
+// CHECK: terminate.lpad:
+// CHECK: call void @__clang_call_terminate(ptr %1) #{{[0-9]+}}
+// CHECK: unreachable
+// CHECK: }
+
+
+// Validate that exception handling instructions are omitted when a
+// potentially throwing sycl_kernel_entry_point attributed function
+// calls a non-throwing sycl_kernel_launch function (a thrown
+// exception will terminate within sycl_kernel_launch).
+namespace ns3 {
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...) noexcept;
+ [[clang::sycl_kernel_entry_point(KN<3>)]]
+ void skep(KT<3> k) {
+ k();
+ }
+}
+// CHECK: ; Function Attrs: mustprogress noinline nounwind optnone
+// CHECK: define dso_local void @_ZN3ns34skepE2KTILi3ELi0EE() #{{[0-9]+}} {
+// CHECK: call void @_ZN3ns318sycl_kernel_launchI2KNILi3EEJ2KTILi3ELi0EEEEEvPKcDpT0_(ptr noundef @.str.2)
+// CHECK: ret void
+// CHECK: }
+
+
+// Validate that exception handling instructions are omitted when a
+// non-throwing sycl_kernel_entry_point attributed function calls a
+// non-throwing sycl_kernel_launch function.
+namespace ns4 {
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...) noexcept;
+ [[clang::sycl_kernel_entry_point(KN<4>)]]
+ void skep(KT<4> k) noexcept {
+ k();
+ }
+}
+// CHECK: ; Function Attrs: mustprogress noinline nounwind optnone
+// CHECK: define dso_local void @_ZN3ns44skepE2KTILi4ELi0EE() #{{[0-9]+}} {
+// CHECK: call void @_ZN3ns418sycl_kernel_launchI2KNILi4EEJ2KTILi4ELi0EEEEEvPKcDpT0_(ptr noundef @.str.3)
+// CHECK: ret void
+// CHECK: }
diff --git a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp
index 14366a092a1f..c298593e2f1a 100644
--- a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp
+++ b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp
@@ -1,6 +1,8 @@
// RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s '-D$ADDRSPACE=addrspace(1) '
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-host -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s '-D$ADDRSPACE='
+template<typename KN, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
template<typename KN, typename Func>
[[clang::sycl_kernel_entry_point(KN)]] void kernel(Func F){
diff --git a/clang/test/Driver/empty_arg.c b/clang/test/Driver/empty_arg.c
new file mode 100644
index 000000000000..94ed8f13cbec
--- /dev/null
+++ b/clang/test/Driver/empty_arg.c
@@ -0,0 +1,2 @@
+// RUN: not %clang -- "" 2>&1 | FileCheck %s
+// CHECK: error: no such file or directory: ''
diff --git a/clang/test/Modules/auto-module-import.m b/clang/test/Modules/auto-module-import.m
index cfbb28fa20e6..578d41bfba50 100644
--- a/clang/test/Modules/auto-module-import.m
+++ b/clang/test/Modules/auto-module-import.m
@@ -96,6 +96,6 @@ namespace NS { // expected-note {{begins here}}
}
extern "C" { // expected-note {{begins here}}
#include <NoUmbrella/A.h> // expected-remark {{treating #include as an import}} \
- expected-error {{import of C++ module 'NoUmbrella.A' appears within extern "C"}}
+ expected-warning {{import of C++ module 'NoUmbrella.A' appears within extern "C"}}
}
#endif
diff --git a/clang/test/Modules/extern_c.cpp b/clang/test/Modules/extern_c.cpp
index cc831bd2a089..aa39880c037a 100644
--- a/clang/test/Modules/extern_c.cpp
+++ b/clang/test/Modules/extern_c.cpp
@@ -42,7 +42,7 @@ extern "C++" {
// expected-error-re@-3 {{import of module '{{c_library.inner|cxx_library}}' appears within namespace 'M'}}
// expected-note@-21 {{namespace 'M' begins here}}
#elif defined(EXTERN_C) && !defined(EXTERN_CXX) && defined(CXX_HEADER) && !defined(NO_EXTERN_C_ERROR)
-// expected-error@-6 {{import of C++ module 'cxx_library' appears within extern "C" language linkage specification}}
+// expected-warning@-6 {{import of C++ module 'cxx_library' appears within extern "C" language linkage specification}}
// expected-note@-20 {{extern "C" language linkage specification begins here}}
#endif
diff --git a/clang/test/SemaHLSL/Texture2D-Gather.hlsl b/clang/test/SemaHLSL/Texture2D-Gather.hlsl
new file mode 100644
index 000000000000..61b3c28a49e7
--- /dev/null
+++ b/clang/test/SemaHLSL/Texture2D-Gather.hlsl
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -fsyntax-only -verify -finclude-default-header %s
+
+Texture2D<float4> Tex;
+SamplerState Samp;
+SamplerComparisonState SampCmp;
+
+void main() {
+ float2 uv = float2(0.5, 0.5);
+ int2 offset = int2(1, 1);
+ float compare = 0.5;
+
+ // Gather
+ // Expected: Gather(SamplerState, float2, [int2])
+ Tex.Gather(Samp, uv);
+ Tex.Gather(Samp, uv, offset);
+
+ // Invalid Overloads
+ Tex.Gather(Samp); // expected-error {{no matching member function for call to 'Gather'}}
+ Tex.Gather(Samp, uv, offset, 1); // expected-error {{no matching member function for call to 'Gather'}}
+
+ // Gather variants
+ Tex.GatherRed(Samp, uv);
+ Tex.GatherGreen(Samp, uv, offset);
+ Tex.GatherBlue(Samp, uv);
+ Tex.GatherAlpha(Samp, uv, offset);
+
+ // GatherCmp
+ // Expected: GatherCmp(SamplerComparisonState, float2, float, [int2])
+ Tex.GatherCmp(SampCmp, uv, compare);
+ Tex.GatherCmp(SampCmp, uv, compare, offset);
+
+ // Invalid Overloads
+ Tex.GatherCmp(SampCmp, uv); // expected-error {{no matching member function for call to 'GatherCmp'}}
+ Tex.GatherCmp(SampCmp, uv, compare, offset, 1); // expected-error {{no matching member function for call to 'GatherCmp'}}
+
+ // GatherCmp variants
+ Tex.GatherCmpRed(SampCmp, uv, compare);
+ Tex.GatherCmpGreen(SampCmp, uv, compare);
+ Tex.GatherCmpBlue(SampCmp, uv, compare, offset);
+ Tex.GatherCmpAlpha(SampCmp, uv, compare);
+
+ // Type checks
+ // Offset must be int2 (SamplerState fails)
+ Tex.Gather(Samp, uv, Samp); // expected-error {{no matching member function for call to 'Gather'}}
+
+ // Compare value must be scalar float
+ Tex.GatherCmp(SampCmp, uv, Samp); // expected-error {{no matching member function for call to 'GatherCmp'}}
+}
+
+// expected-note@* 0+{{candidate function not viable}}
diff --git a/clang/test/SemaHLSL/Texture2D-GatherCmp-Vulkan.hlsl b/clang/test/SemaHLSL/Texture2D-GatherCmp-Vulkan.hlsl
new file mode 100644
index 000000000000..117686b4ef9c
--- /dev/null
+++ b/clang/test/SemaHLSL/Texture2D-GatherCmp-Vulkan.hlsl
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -fsyntax-only -verify -finclude-default-header %s
+
+Texture2D<float4> Tex;
+SamplerComparisonState SampCmp;
+
+void main() {
+ float2 uv = float2(0.5, 0.5);
+ float compare = 0.5;
+
+ Tex.GatherCmp(SampCmp, uv, compare);
+ Tex.GatherCmpRed(SampCmp, uv, compare);
+
+ // expected-error@* {{gatherCmpGreen operations on the Vulkan target are not supported; only GatherCmp and GatherCmpRed are allowed}}
+ Tex.GatherCmpGreen(SampCmp, uv, compare);
+
+ // expected-error@* {{gatherCmpBlue operations on the Vulkan target are not supported; only GatherCmp and GatherCmpRed are allowed}}
+ Tex.GatherCmpBlue(SampCmp, uv, compare);
+
+ // expected-error@* {{gatherCmpAlpha operations on the Vulkan target are not supported; only GatherCmp and GatherCmpRed are allowed}}
+ Tex.GatherCmpAlpha(SampCmp, uv, compare);
+}
+
+// expected-note@* 0+{{in instantiation of member function}}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
index 9aba284145fc..45da8c71348b 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
@@ -1,6 +1,9 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-device -fcxx-exceptions -verify %s
// These tests validate appertainment for the sycl_kernel_entry_point attribute.
@@ -37,6 +40,9 @@ struct coroutine_traits {
// A unique kernel name type is required for each declared kernel entry point.
template<int, int = 0> struct KN;
+// A generic kernel launch function.
+template<typename KNT, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
////////////////////////////////////////////////////////////////////////////////
// Valid declarations.
@@ -131,6 +137,16 @@ struct S15 {
static T ok15();
};
+struct S16 {
+ // Non-static member function declaration.
+ [[clang::sycl_kernel_entry_point(KN<16>)]]
+ void ok16();
+};
+
+#if __cplusplus >= 202302L
+auto ok17 = [] [[clang::sycl_kernel_entry_point(KN<17>)]] -> void {};
+#endif
+
////////////////////////////////////////////////////////////////////////////////
// Invalid declarations.
@@ -163,13 +179,6 @@ struct B2 {
static int bad2;
};
-struct B3 {
- // Non-static member function declaration.
- // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
- [[clang::sycl_kernel_entry_point(BADKN<3>)]]
- void bad3();
-};
-
// expected-error@+1 {{'clang::sycl_kernel_entry_point' attribute only applies to functions}}
namespace [[clang::sycl_kernel_entry_point(BADKN<4>)]] bad4 {}
@@ -244,13 +253,13 @@ void bad19() {
#endif
struct B20 {
- // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+ // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a constructor}}
[[clang::sycl_kernel_entry_point(BADKN<20>)]]
B20();
};
struct B21 {
- // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+ // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a destructor}}
[[clang::sycl_kernel_entry_point(BADKN<21>)]]
~B21();
};
@@ -338,11 +347,6 @@ struct B34 {
};
#if __cplusplus >= 202302L
-// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
-auto bad35 = [] [[clang::sycl_kernel_entry_point(BADKN<35>)]] -> void {};
-#endif
-
-#if __cplusplus >= 202302L
// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute only applies to functions with a non-deduced 'void' return type}}
auto bad36 = [] [[clang::sycl_kernel_entry_point(BADKN<36>)]] static {};
#endif
@@ -373,3 +377,29 @@ struct B42 {
// expected-warning@+1 {{declaration does not declare anything}}
[[clang::sycl_kernel_entry_point(BADKN<42>)]];
};
+
+#if __cplusplus >= 202302L
+struct B43 {
+ // expected-error@+2 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function with an explicit object parameter}}
+ template<typename KNT>
+ [[clang::sycl_kernel_entry_point(KNT)]]
+ void bad43(this B43) {}
+};
+#endif
+
+#if __cplusplus >= 202302L
+struct B44 {
+ // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function with an explicit object parameter}}
+ [[clang::sycl_kernel_entry_point(BADKN<44>)]]
+ void bad44(this B44);
+};
+#endif
+
+#if __cplusplus >= 202302L
+template<typename KNT>
+struct B45 {
+ // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function with an explicit object parameter}}
+ [[clang::sycl_kernel_entry_point(KNT)]]
+ void bad45(this B45);
+};
+#endif
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-device-odr-use.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-device-odr-use.cpp
new file mode 100644
index 000000000000..1aa48c739c04
--- /dev/null
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-device-odr-use.cpp
@@ -0,0 +1,142 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsycl-is-host -verify=host %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsycl-is-device -verify=device %s
+
+// These tests validate that a diagnostic is issued if a function declared with
+// the sycl_kernel_entry_point attribute is ODR-used from code that is emitted
+// during device compilation. Such uses are ill-formed because such functions
+// are used to define an offload kernel entry point function; they aren't
+// available for ordinary function use.
+
+// host-no-diagnostics
+
+// Emulate inclusion of <typeinfo>.
+namespace std {
+struct type_info {
+ virtual ~type_info();
+};
+} // namespace std
+
+// A generic kernel launch function.
+template<typename KernelName, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
+
+// A kernel name type template.
+template<int> struct KN;
+
+// SYCL kernel entry point functions. These are used to both trigger the
+// emission of a function during device compilation (but not during host
+// compilation) and to trigger a diagnostic if ODR-used from a function
+// emitted during device compilation.
+// device-note@+1 4 {{attribute is here}}
+[[clang::sycl_kernel_entry_point(KN<1>)]]
+void skep();
+struct SKL {
+ // device-note@+1 6 {{attribute is here}}
+ [[clang::sycl_kernel_entry_point(KN<2>)]]
+ void mskep();
+ // device-note@+1 6 {{attribute is here}}
+ [[clang::sycl_kernel_entry_point(KN<3>)]]
+ static void smskep();
+ // device-note@+1 2 {{attribute is here}}
+ [[clang::sycl_kernel_entry_point(KN<4>)]]
+ void operator()() const;
+};
+
+// A function that is emitted on the device due to usage reachable from a
+// SYCL kernel entry point function. ODR-uses of sycl_kernel_entry_point
+// attributed functions within this function require a diagnostic during
+// device compilation.
+void df() {
+ // Not ODR-uses; ok.
+ decltype(&skep) p1 = nullptr;
+ decltype(&SKL::mskep) p2 = nullptr;
+ decltype(&SKL::smskep) p3 = nullptr;
+
+ // Not ODR-uses; ok.
+ (void)noexcept(skep());
+ (void)noexcept(SKL{}.mskep());
+ (void)noexcept(SKL::smskep());
+
+ // Not ODR-uses; ok.
+ (void)typeid(&skep);
+ (void)typeid(&SKL::mskep);
+ (void)typeid(&SKL::smskep);
+
+ // device-error@+1 2 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ skep();
+ // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL{}.mskep();
+ // device-error@+1 2 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL::smskep();
+
+ // device-error@+1 2 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ (void)&skep;
+ // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ (void)&SKL::mskep;
+ // device-error@+1 2 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ (void)&SKL::smskep;
+
+ SKL sklo;
+ // device-error@+1 2 {{function 'operator()' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ sklo();
+}
+
+// device-note@+1 5 {{attribute is here}}
+[[clang::sycl_kernel_entry_point(KN<1>)]]
+void skep() {
+ // device-note@+1 {{called by 'skep'}}
+ df();
+ // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ skep();
+ // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL{}.mskep();
+ // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL::smskep();
+}
+
+// device-note@+1 7 {{attribute is here}}
+[[clang::sycl_kernel_entry_point(KN<2>)]]
+void SKL::mskep() {
+ df();
+ // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ skep();
+ // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL{}.mskep();
+ // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL::smskep();
+}
+
+// device-note@+1 3 {{attribute is here}}
+[[clang::sycl_kernel_entry_point(KN<3>)]]
+void SKL::smskep() {
+ df();
+ // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ skep();
+ // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL{}.mskep();
+ // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL::smskep();
+}
+
+[[clang::sycl_kernel_entry_point(KN<4>)]]
+void SKL::operator()() const {
+ df();
+ // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ skep();
+ // device-error@+1 2 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL{}.mskep();
+ // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL::smskep();
+}
+
+[[clang::sycl_external]]
+void sedf() {
+ // device-note@+1 {{called by 'sedf'}}
+ df();
+ // device-error@+1 {{function 'skep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ skep();
+ // device-error@+1 {{function 'mskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL{}.mskep();
+ // device-error@+1 {{function 'smskep' cannot be used in device code because it is declared with the 'clang::sycl_kernel_entry_point' attribute}}
+ SKL::smskep();
+}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp
index 8f81fa218c17..b1c9e270a02b 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp
@@ -1,4 +1,6 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -verify %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -verify %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s
// These tests validate parsing of the sycl_kernel_entry_point argument list
@@ -8,6 +10,9 @@
template<int> struct ST; // #ST-decl
template<int N> using TTA = ST<N>; // #TTA-decl
+// A generic kernel launch function.
+template<typename KN, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
////////////////////////////////////////////////////////////////////////////////
// Valid declarations.
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp
index 8788e147a2ae..05a660e91e82 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp
@@ -17,6 +17,10 @@ module M2 { header "m2.h" }
#--- common.h
template<int> struct KN;
+// A generic kernel launch function.
+template<typename KN, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
+
[[clang::sycl_kernel_entry_point(KN<1>)]]
void common_test1() {}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp
index 0575a7a5a67e..dcea60e016d1 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp
@@ -15,6 +15,10 @@
#--- pch.h
template<int> struct KN;
+// A generic kernel launch function.
+template<typename KN, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
+
[[clang::sycl_kernel_entry_point(KN<1>)]]
void pch_test1() {} // << expected previous declaration note here.
@@ -26,11 +30,11 @@ template void pch_test2<KN<2>>();
#--- test.cpp
// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
-// expected-note@pch.h:4 {{previous declaration is here}}
+// expected-note@pch.h:8 {{previous declaration is here}}
[[clang::sycl_kernel_entry_point(KN<1>)]]
void test1() {}
// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
-// expected-note@pch.h:8 {{previous declaration is here}}
+// expected-note@pch.h:12 {{previous declaration is here}}
[[clang::sycl_kernel_entry_point(KN<2>)]]
void test2() {}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp
index c7b83932fefe..2abb24cde666 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp
@@ -1,4 +1,6 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -verify %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -verify %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s
// These tests validate that the kernel name type argument provided to the
@@ -7,6 +9,11 @@
// specification.
struct S1;
+
+// A generic kernel launch function.
+template<typename KernelName, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
+
// expected-warning@+3 {{redundant 'clang::sycl_kernel_entry_point' attribute}}
// expected-note@+1 {{previous attribute is here}}
[[clang::sycl_kernel_entry_point(S1),
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp
index 4c6157041962..b39a77bd3587 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp
@@ -1,4 +1,6 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -verify %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -verify %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s
// These tests are intended to validate that a sycl_kernel_entry_point attribute
@@ -8,6 +10,10 @@
// attribute during instantiation of a specialization unless that specialization
// is selected by overload resolution.
+// A generic kernel launch function.
+template<typename KernelName, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
+
// FIXME: C++23 [temp.expl.spec]p12 states:
// FIXME: ... Similarly, attributes appearing in the declaration of a template
// FIXME: have no effect on an explicit specialization of that template.
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp
new file mode 100644
index 000000000000..2112733b41fc
--- /dev/null
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp
@@ -0,0 +1,188 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++17 -fsycl-is-host -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++17 -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++20 -fsycl-is-host -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++20 -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++23 -fsycl-is-host -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++23 -fsycl-is-device -verify %s
+
+// These tests validate diagnostics for invalid use of 'this' in the body of
+// a function declared with the sycl_kernel_entry_point attribute.
+
+
+template<typename T> struct remove_reference_t {
+ using type = T;
+};
+template<typename T> struct remove_reference_t<T&> {
+ using type = T;
+};
+
+namespace std {
+struct type_info {
+ virtual ~type_info();
+};
+} // namespace std
+
+// A generic kernel launch function.
+template<typename KernelName, typename... Ts>
+void sycl_kernel_launch(const char *, Ts...) {}
+
+////////////////////////////////////////////////////////////////////////////////
+// Valid declarations.
+////////////////////////////////////////////////////////////////////////////////
+template<int, int=0> struct KN;
+
+struct S1 {
+ [[clang::sycl_kernel_entry_point(KN<1>)]] void ok1() {
+ (void)sizeof(this);
+ }
+};
+
+struct S2 {
+ [[clang::sycl_kernel_entry_point(KN<2>)]] void ok2() {
+ (void)noexcept(this);
+ }
+};
+
+struct S3 {
+ [[clang::sycl_kernel_entry_point(KN<3>)]] void ok3() {
+ decltype(this) x = nullptr;
+ }
+};
+
+struct S4 {
+ static void smf();
+ [[clang::sycl_kernel_entry_point(KN<4>)]] void ok4() {
+ remove_reference_t<decltype(*this)>::type::smf();
+ }
+};
+
+struct S5 {
+ int dm;
+ void mf();
+ [[clang::sycl_kernel_entry_point(KN<5>)]] void ok5() {
+ (void)typeid(*this); // S5 is not abstract, so 'this' is not evaluated.
+ (void)typeid(dm); // 'int' is not an abstract class type; implicit 'this' is not evaluated.
+ (void)typeid(mf()); // 'void' is not an abstract class type; implicit 'this' is not evaluated.
+ }
+};
+
+template<typename KN, bool B>
+struct S6 {
+ void mf() noexcept(B);
+ [[clang::sycl_kernel_entry_point(KN)]] void ok6() noexcept(noexcept(mf())) {}
+};
+template void S6<KN<6,0>, false>::ok6();
+template void S6<KN<6,1>, true>::ok6();
+
+template<typename KN, bool B>
+struct S7 {
+ void mf() noexcept(B);
+ [[clang::sycl_kernel_entry_point(KN)]] void ok7() noexcept(noexcept(this->mf())) {}
+};
+template void S7<KN<7,0>, false>::ok7();
+template void S7<KN<7,1>, true>::ok7();
+
+#if __cplusplus >= 202002L
+template<typename KN, typename T>
+struct S8 {
+ void mf(T);
+ [[clang::sycl_kernel_entry_point(KN)]] void ok8() requires(requires { mf(1); }) {}
+};
+template void S8<KN<8>, int>::ok8();
+
+template<typename KN, typename T>
+struct S9 {
+ void mf(T);
+ [[clang::sycl_kernel_entry_point(KN)]] void ok9() requires(requires { this->mf(1); }) {}
+};
+template void S9<KN<9>, int>::ok9();
+#endif
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Invalid declarations.
+////////////////////////////////////////////////////////////////////////////////
+
+template<int, int=0> struct BADKN;
+
+// expected-error@+3 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}}
+struct B1 {
+ [[clang::sycl_kernel_entry_point(BADKN<1>)]] void bad1() {
+ (void)this;
+ }
+};
+
+// expected-error@+4 {{'this' cannot be implicitly used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}}
+struct B2 {
+ int dm;
+ [[clang::sycl_kernel_entry_point(BADKN<2>)]] void bad2() {
+ (void)dm;
+ }
+};
+
+// expected-error@+4 {{'this' cannot be implicitly used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}}
+struct B3 {
+ void mf();
+ [[clang::sycl_kernel_entry_point(BADKN<3>)]] void bad3() {
+ (void)mf();
+ }
+};
+
+// expected-error@+4 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}}
+struct B4 {
+ virtual void vmf() = 0;
+ [[clang::sycl_kernel_entry_point(BADKN<4>)]] void bad4() {
+ (void)typeid(*this); // B4 is abstract, so 'this' is evaluated.
+ }
+};
+
+// A diagnostic is not currently issued for uninstantiated definitions. In this
+// case, a declaration is instantiated, but a definition isn't. A diagnostic
+// will be issued if a definition is instantiated (as the next test exercises).
+struct B5 {
+ template<typename KN>
+ [[clang::sycl_kernel_entry_point(KN)]] void bad5() {
+ (void)this;
+ }
+};
+extern template void B5::bad5<BADKN<5>>();
+
+// expected-error@+4 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}}
+struct B6 {
+ template<typename KN>
+ [[clang::sycl_kernel_entry_point(KN)]] void bad6() {
+ (void)this;
+ }
+};
+// expected-note@+1 {{in instantiation of function template specialization 'B6::bad6<BADKN<6>>' requested here}}
+template void B6::bad6<BADKN<6>>();
+
+// A diagnostic is not currently issued for uninstantiated definitions. In this
+// case, a declaration is instantiated, but a definition isn't. A diagnostic
+// will be issued if a definition is instantiated (as the next test exercises).
+template<typename KN>
+struct B7 {
+ [[clang::sycl_kernel_entry_point(KN)]] void bad7() {
+ (void)this;
+ }
+};
+extern template void B7<BADKN<7>>::bad7();
+
+// expected-error@+4 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}}
+template<typename KN>
+struct B8 {
+ [[clang::sycl_kernel_entry_point(KN)]] void bad8() {
+ (void)this;
+ }
+};
+// expected-note@+1 {{in instantiation of member function 'B8<BADKN<8>>::bad8' requested here}}
+template void B8<BADKN<8>>::bad8();
+
+#if __cplusplus >= 202302L
+struct B9 {
+ // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function with an explicit object parameter}}
+ [[clang::sycl_kernel_entry_point(BADKN<9>)]] void bad9(this B9 self) {
+ (void)self;
+ }
+};
+#endif
diff --git a/clang/test/SemaSYCL/sycl-kernel-launch-ms-compat.cpp b/clang/test/SemaSYCL/sycl-kernel-launch-ms-compat.cpp
new file mode 100644
index 000000000000..cd186a833b02
--- /dev/null
+++ b/clang/test/SemaSYCL/sycl-kernel-launch-ms-compat.cpp
@@ -0,0 +1,88 @@
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++20 -fsyntax-only -fsycl-is-host -fms-compatibility -fcxx-exceptions -verify=host,expected %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -std=c++20 -fsyntax-only -fsycl-is-device -fms-compatibility -verify=device,expected %s
+
+// Test Microsoft extensions for lookup of a sycl_kernel_launch member template
+// in a dependent base class.
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Valid declarations.
+////////////////////////////////////////////////////////////////////////////////
+
+// A unique kernel name type is required for each declared kernel entry point.
+template<int> struct KN;
+
+// A generic kernel object type.
+template<int>
+struct KT {
+ void operator()() const;
+};
+
+
+namespace ok1 {
+ template<typename Derived>
+ struct base_handler {
+ protected:
+ // expected-note@+2 {{must qualify identifier to find this declaration in dependent base class}}
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ };
+ template<int N>
+ struct handler : protected base_handler<handler<N>> {
+ // A warning is issued because, in standard C++, unqualified lookup for
+ // sycl_kernel_launch would not consider dependent base classes. Such
+ // lookups are allowed as a Microsoft compatible extension.
+ // expected-warning@+4 {{use of member 'sycl_kernel_launch' found via unqualified lookup into dependent bases of class templates is a Microsoft extension}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'KN<1>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'KT<1>') required here}}
+ [[clang::sycl_kernel_entry_point(KN<1>)]]
+ void skep(KT<1> k) {
+ k();
+ }
+ };
+ // expected-note@+1 {{in instantiation of member function 'ok1::handler<1>::skep' requested here}}
+ template void handler<1>::skep(KT<1>);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Invalid declarations.
+////////////////////////////////////////////////////////////////////////////////
+
+// A unique kernel name type is required for each declared kernel entry point.
+template<int> struct BADKN;
+
+// A generic kernel object type.
+template<int>
+struct BADKT {
+ void operator()() const;
+};
+
+
+namespace bad1 {
+ template<typename Derived>
+ struct base_handler {
+ private:
+ // expected-note@+3 {{must qualify identifier to find this declaration in dependent base class}}
+ // expected-note@+2 {{declared private here}}
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ };
+ template<int N>
+ struct handler : protected base_handler<handler<N>> {
+ // In standard C++, unqualified lookup for sycl_kernel_launch would not
+ // consider dependent base classes. Such lookups are allowed as a Microsoft
+ // compatible extension, but access checks are still performed which makes
+ // this case an error.
+ // expected-warning@+5 {{use of member 'sycl_kernel_launch' found via unqualified lookup into dependent bases of class templates is a Microsoft extension}}
+ // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<1>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<1>') required here}}
+ // expected-error@+2 {{'sycl_kernel_launch' is a private member of 'bad1::base_handler<bad1::handler<1>>'}}
+ [[clang::sycl_kernel_entry_point(BADKN<1>)]]
+ void skep(BADKT<1> k) {
+ k();
+ }
+ };
+ // expected-note@+1 {{in instantiation of member function 'bad1::handler<1>::skep' requested here}}
+ template void handler<1>::skep(BADKT<1>);
+}
diff --git a/clang/test/SemaSYCL/sycl-kernel-launch.cpp b/clang/test/SemaSYCL/sycl-kernel-launch.cpp
new file mode 100644
index 000000000000..20d9becb8192
--- /dev/null
+++ b/clang/test/SemaSYCL/sycl-kernel-launch.cpp
@@ -0,0 +1,560 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-device -verify %s
+
+// Test overload resolution for implicit calls to sycl_kernel_launch<KN>(...)
+// synthesized for functions declared with the sycl_kernel_entry_point
+// attribute.
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Valid declarations.
+////////////////////////////////////////////////////////////////////////////////
+
+// A unique kernel name type is required for each declared kernel entry point.
+template<int, int = 0> struct KN;
+
+// A generic kernel object type.
+template<int, int = 0>
+struct KT {
+ void operator()() const;
+};
+
+
+// sycl_kernel_launch as function template at namespace scope.
+namespace ok1 {
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ [[clang::sycl_kernel_entry_point(KN<1>)]]
+ void skep(KT<1> k) {
+ k();
+ }
+}
+
+// sycl_kernel_launch as function template at namespace scope with default
+// template arguments and default function arguments..
+namespace ok2 {
+ template<typename KN, typename T = int>
+ void sycl_kernel_launch(const char *, KT<2>, T = 2);
+ [[clang::sycl_kernel_entry_point(KN<2>)]]
+ void skep(KT<2> k) {
+ k();
+ }
+}
+
+// sycl_kernel_launch as overload set.
+namespace ok3 {
+ template<typename KN>
+ void sycl_kernel_launch(const char *);
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ [[clang::sycl_kernel_entry_point(KN<3>)]]
+ void skep(KT<3> k) {
+ k();
+ }
+}
+
+// sycl_kernel_launch as static member function template.
+namespace ok4 {
+ struct handler {
+ private:
+ template<typename KN, typename... Ts>
+ static void sycl_kernel_launch(const char *, Ts...);
+ public:
+ [[clang::sycl_kernel_entry_point(KN<4,0>)]]
+ static void skep(KT<4,0> k) {
+ k();
+ }
+ [[clang::sycl_kernel_entry_point(KN<4,1>)]]
+ void skep(KT<4,1> k) {
+ k();
+ }
+ };
+}
+
+// sycl_kernel_launch as non-static member function template.
+namespace ok5 {
+ struct handler {
+ private:
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ public:
+ [[clang::sycl_kernel_entry_point(KN<5>)]]
+ void skep(KT<5> k) {
+ k();
+ }
+ };
+}
+
+#if __cplusplus >= 202302L
+// sycl_kernel_launch as non-static member function template with explicit
+// object parameter.
+namespace ok6 {
+ struct handler {
+ private:
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(this handler self, const char *, Ts...);
+ public:
+ [[clang::sycl_kernel_entry_point(KN<6>)]]
+ void skep(KT<6> k) {
+ k();
+ }
+ };
+}
+#endif
+
+// sycl_kernel_launch as variable template.
+namespace ok7 {
+ template<typename KN>
+ struct launcher {
+ template<typename... Ts>
+ void operator()(const char *, Ts...);
+ };
+ template<typename KN>
+ launcher<KN> sycl_kernel_launch;
+ [[clang::sycl_kernel_entry_point(KN<7>)]]
+ void skep(KT<7> k) {
+ k();
+ }
+}
+
+#if __cplusplus >= 202302L
+// sycl_kernel_launch as variable template with static call operator template.
+namespace ok8 {
+ template<typename KN>
+ struct launcher {
+ template<typename... Ts>
+ static void operator()(const char *, Ts...);
+ };
+ template<typename KN>
+ launcher<KN> sycl_kernel_launch;
+ [[clang::sycl_kernel_entry_point(KN<8>)]]
+ void skep(KT<8> k) {
+ k();
+ }
+}
+#endif
+
+#if __cplusplus >= 202302L
+// sycl_kernel_launch as variable template with call operator template with
+// explicit object parameter.
+namespace ok9 {
+ template<typename KN>
+ struct launcher {
+ template<typename... Ts>
+ void operator()(this launcher self, const char *, Ts...);
+ };
+ template<typename KN>
+ launcher<KN> sycl_kernel_launch;
+ [[clang::sycl_kernel_entry_point(KN<9>)]]
+ void skep(KT<9> k) {
+ k();
+ }
+}
+#endif
+
+// sycl_kernel_launch as base class non-static member function template.
+namespace ok10 {
+ template<typename Derived>
+ struct base_handler {
+ protected:
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ };
+ struct handler : protected base_handler<handler> {
+ public:
+ [[clang::sycl_kernel_entry_point(KN<10>)]]
+ void skep(KT<10> k) {
+ k();
+ }
+ };
+}
+
+// sycl_kernel_launch with non-reference parameters.
+namespace ok11 {
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ struct move_only {
+ move_only(move_only&&) = default;
+ };
+ [[clang::sycl_kernel_entry_point(KN<11>)]]
+ void skep(KT<11> k, move_only) {
+ k();
+ }
+}
+
+// sycl_kernel_launch with forward reference parameters.
+namespace ok12 {
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts &&...);
+ struct non_copyable {
+ non_copyable(const non_copyable&) = delete;
+ };
+ struct non_moveable {
+ non_moveable(non_moveable&&) = delete;
+ };
+ struct move_only {
+ move_only(move_only&&) = default;
+ };
+ [[clang::sycl_kernel_entry_point(KN<12>)]]
+ void skep(KT<12> k, non_copyable, non_moveable, move_only) {
+ k();
+ }
+}
+
+// ADL for sycl_kernel_launch.
+namespace ok13 {
+ template<typename KN, typename KT, typename T>
+ [[clang::sycl_kernel_entry_point(KN)]]
+ void skep(KT k, T t) {
+ k();
+ }
+ namespace nested {
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ struct S13 {};
+ }
+ template void skep<KN<13>>(KT<13>, nested::S13);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Invalid declarations.
+////////////////////////////////////////////////////////////////////////////////
+
+// A unique kernel name type is required for each declared kernel entry point.
+template<int, int = 0> struct BADKN;
+
+// A generic kernel object type.
+template<int, int = 0>
+struct BADKT {
+ void operator()() const;
+};
+
+
+// Undeclared sycl_kernel_launch identifier from non-template function.
+namespace bad1 {
+ // expected-error@+4 {{use of undeclared identifier 'sycl_kernel_launch'}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<1>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<1>') required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<1>)]]
+ void skep(BADKT<1> k) {
+ k();
+ }
+}
+
+// Undeclared sycl_kernel_launch identifier from function template.
+namespace bad2 {
+ // expected-error@+5 {{use of undeclared identifier 'sycl_kernel_launch'}}
+ // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<2>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<2>') required here}}
+ template<typename KN, typename KT>
+ [[clang::sycl_kernel_entry_point(KN)]]
+ void skep(KT k) {
+ k();
+ }
+ // expected-note@+1 {{in instantiation of function template specialization 'bad2::skep<BADKN<2>, BADKT<2>>' requested here}}
+ template void skep<BADKN<2>>(BADKT<2>);
+}
+
+// No matching function for call to sycl_kernel_launch; not a template.
+namespace bad3 {
+ // expected-note@+1 {{declared as a non-template here}}
+ void sycl_kernel_launch(const char *, BADKT<3>);
+ // expected-error@+4 {{'sycl_kernel_launch' does not refer to a template}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<3>' required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<3>)]]
+ void skep(BADKT<3> k) {
+ k();
+ }
+}
+
+// No matching function for call to sycl_kernel_launch; not enough arguments.
+namespace bad4 {
+ // expected-note@+2 {{candidate function template not viable: requires 2 arguments, but 1 was provided}}
+ template<typename KN, typename KT>
+ void sycl_kernel_launch(const char *, KT);
+ // expected-error@+5 {{no matching function for call to 'sycl_kernel_launch'}}
+ // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<4>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]') required here}}
+ template<typename KN>
+ [[clang::sycl_kernel_entry_point(KN)]]
+ void skep() {}
+ // expected-note@+1 {{in instantiation of function template specialization 'bad4::skep<BADKN<4>>' requested here}}
+ template void skep<BADKN<4>>();
+}
+
+// No matching function for call to sycl_kernel_launch; too many arguments.
+namespace bad5 {
+ // expected-note@+2 {{candidate function template not viable: requires 2 arguments, but 3 were provided}}
+ template<typename KN, typename KT>
+ void sycl_kernel_launch(const char *, KT);
+ // expected-error@+5 {{no matching function for call to 'sycl_kernel_launch'}}
+ // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<5>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<5>', xvalue of type 'int') required here}}
+ template<typename KN, typename KT>
+ [[clang::sycl_kernel_entry_point(KN)]]
+ void skep(KT k, int i) {
+ k();
+ }
+ // expected-note@+1 {{in instantiation of function template specialization 'bad5::skep<BADKN<5>, BADKT<5>>' requested here}}
+ template void skep<BADKN<5>>(BADKT<5>, int);
+}
+
+// No matching function for call to sycl_kernel_launch; mismatched function parameter type.
+namespace bad6 {
+ // expected-note-re@+2 {{candidate function template not viable: no known conversion from 'const char[{{[0-9]*}}]' to 'int' for 1st argument}}
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(int, Ts...);
+ // expected-error@+5 {{no matching function for call to 'sycl_kernel_launch'}}
+ // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<6>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<6>') required here}}
+ template<typename KN, typename KT>
+ [[clang::sycl_kernel_entry_point(KN)]]
+ void skep(KT k) {
+ k();
+ }
+ // expected-note@+1 {{in instantiation of function template specialization 'bad6::skep<BADKN<6>, BADKT<6>>' requested here}}
+ template void skep<BADKN<6>>(BADKT<6>);
+}
+
+// No matching function for call to sycl_kernel_launch; mismatched template parameter kind.
+namespace bad7 {
+ // expected-note@+2 {{candidate template ignored: invalid explicitly-specified argument for 1st template parameter}}
+ template<int, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ // expected-error@+4 {{no matching function for call to 'sycl_kernel_launch'}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<7>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<7>') required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<7>)]]
+ void skep(BADKT<7> k) {
+ k();
+ }
+}
+
+// No matching function for call to sycl_kernel_launch; substitution failure.
+namespace bad8 {
+ // expected-note@+2 {{candidate template ignored: substitution failure [with KN = BADKN<8>, KT = BADKT<8>]: no type named 'no_such_type' in 'BADKT<8>'}}
+ template<typename KN, typename KT, typename T = typename KT::no_such_type>
+ void sycl_kernel_launch(const char *, KT);
+ // expected-error@+4 {{no matching function for call to 'sycl_kernel_launch'}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<8>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<8>') required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<8>)]]
+ void skep(BADKT<8> k) {
+ k();
+ }
+}
+
+// No matching function for call to sycl_kernel_launch; deduction failure.
+namespace bad9 {
+ // expected-note@+2 {{candidate template ignored: couldn't infer template argument 'T'}}
+ template<typename KN, typename KT, typename T>
+ void sycl_kernel_launch(const char *, KT);
+ // expected-error@+4 {{no matching function for call to 'sycl_kernel_launch'}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<9>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<9>') required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<9>)]]
+ void skep(BADKT<9> k) {
+ k();
+ }
+}
+
+// No matching function for call to sycl_kernel_launch object; mismatched function parameter type.
+namespace bad10 {
+ template<typename KN>
+ struct launcher {
+ // expected-note-re@+2 {{candidate function template not viable: no known conversion from 'const char[{{[0-9]*}}]' to 'int' for 1st argument}}
+ template<typename... Ts>
+ void operator()(int, Ts...);
+ };
+ template<typename KN>
+ launcher<KN> sycl_kernel_launch;
+ // expected-error@+5 {{no matching function for call to object of type 'launcher<BADKN<10, 0>>'}}
+ // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<10>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<10>') required here}}
+ template<typename KN, typename KT>
+ [[clang::sycl_kernel_entry_point(KN)]]
+ void skep(KT k) {
+ k();
+ }
+ // expected-note@+1 {{in instantiation of function template specialization 'bad10::skep<BADKN<10>, BADKT<10>>' requested here}}
+ template void skep<BADKN<10>>(BADKT<10>);
+}
+
+// No matching function for call to sycl_kernel_launch object; mismatched template parameter kind.
+namespace bad11 {
+ template<int KN>
+ struct launcher {
+ template<typename... Ts>
+ void operator()(int, Ts...);
+ };
+ // expected-note@+1 {{template parameter is declared here}}
+ template<int KN>
+ launcher<KN> sycl_kernel_launch;
+ // expected-error@+5 {{template argument for non-type template parameter must be an expression}}
+ // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'KN' required here}}
+ template<typename KN, typename KT>
+ [[clang::sycl_kernel_entry_point(KN)]]
+ void skep(KT k) {
+ k();
+ }
+ template void skep<BADKN<11>>(BADKT<11>);
+}
+
+// sycl_kernel_launch as variable template with private call operator template.
+namespace bad12 {
+ template<typename KN>
+ struct launcher {
+ private:
+ // expected-note@+2 {{declared private here}}
+ template<typename... Ts>
+ void operator()(const char *, Ts...);
+ };
+ template<typename KN>
+ launcher<KN> sycl_kernel_launch;
+ // expected-error@+4 {{'operator()' is a private member of 'bad12::launcher<BADKN<12>>'}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<12>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<12>') required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<12>)]]
+ void skep(BADKT<12> k) {
+ k();
+ }
+}
+
+// Ambiguous reference to sycl_kernel_launch.
+namespace bad13 {
+ inline namespace in1 {
+ // expected-note@+2 {{candidate found by name lookup is 'bad13::in1::sycl_kernel_launch'}}
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ }
+ inline namespace in2 {
+ template<typename KN>
+ struct launcher {
+ template<typename KT, typename... Ts>
+ void operator()(const char *, Ts...);
+ };
+ // expected-note@+2 {{candidate found by name lookup is 'bad13::in2::sycl_kernel_launch'}}
+ template<typename KN>
+ launcher<KN> sycl_kernel_launch;
+ }
+ // expected-error@+5 {{reference to 'sycl_kernel_launch' is ambiguous}}
+ // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'KN' required here}}
+ template<typename KN, typename KT>
+ [[clang::sycl_kernel_entry_point(KN)]]
+ void skep(KT k) {
+ k();
+ }
+ template void skep<BADKN<13>>(BADKT<13>);
+}
+
+// Ambiguous call to sycl_kernel_launch.
+namespace bad14 {
+ // expected-note@+2 {{candidate function [with KN = BADKN<14>, KT = BADKT<14>]}}
+ template<typename KN, typename KT>
+ void sycl_kernel_launch(const char *, KT, signed char);
+ // expected-note@+2 {{candidate function [with KN = BADKN<14>, KT = BADKT<14>]}}
+ template<typename KN, typename KT>
+ void sycl_kernel_launch(const char *, KT, unsigned char);
+ // expected-error@+4 {{call to 'sycl_kernel_launch' is ambiguous}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<14>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<14>', xvalue of type 'int') required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<14>)]]
+ void skep(BADKT<14> k, int i) {
+ k();
+ }
+}
+
+// Call to member sycl_kernel_launch from non-static member.
+namespace bad15 {
+ struct S {
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ // expected-error@+4 {{call to non-static member function without an object argument}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<15>' required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<15>)]]
+ static void skep(BADKT<15> k) {
+ k();
+ }
+ };
+}
+
+// sycl_kernel_launch as dependent base class non-static member function
+// template.
+namespace bad16 {
+ template<typename Derived>
+ struct base_handler {
+ protected:
+ // expected-note@+2 {{member is declared here}}
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ };
+ template<int N>
+ struct handler : protected base_handler<handler<N>> {
+ // Lookup for sycl_kernel_launch fails because lookup in dependent base
+ // classes requires explicit qualification.
+ // expected-error@+4 {{explicit qualification required to use member 'sycl_kernel_launch' from dependent base class}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<16>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<16>') required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<16>)]]
+ void skep(BADKT<16> k) {
+ k();
+ }
+ };
+ // expected-note@+1 {{in instantiation of member function 'bad16::handler<16>::skep' requested here}}
+ template void handler<16>::skep(BADKT<16>);
+}
+
+// sycl_kernel_launch with non-reference parameters and non-moveable arguments.
+namespace bad17 {
+ // expected-note@+2 2 {{passing argument to parameter here}}
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(const char *, Ts...);
+ struct non_copyable {
+ // expected-note@+1 {{'non_copyable' has been explicitly marked deleted here}}
+ non_copyable(const non_copyable&) = delete;
+ };
+ // expected-error@+4 {{call to deleted constructor of 'bad17::non_copyable'}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<17, 0>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<17, 0>', xvalue of type 'non_copyable') required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<17,0>)]]
+ void skep(BADKT<17,0> k, non_copyable) {
+ k();
+ }
+ struct non_moveable {
+ // expected-note@+1 {{'non_moveable' has been explicitly marked deleted here}}
+ non_moveable(non_moveable&&) = delete;
+ };
+ // expected-error@+4 {{call to deleted constructor of 'bad17::non_moveable'}}
+ // expected-note@+2 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+1 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<17, 1>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<17, 1>', xvalue of type 'non_moveable') required here}}
+ [[clang::sycl_kernel_entry_point(BADKN<17,1>)]]
+ void skep(BADKT<17,1> k, non_moveable) {
+ k();
+ }
+}
+
+// sycl_kernel_launch declared after use and not found by ADL.
+namespace bad18 {
+ // expected-error@+5 {{call to function 'sycl_kernel_launch' that is neither visible in the template definition nor found by argument-dependent lookup}}
+ // expected-note@+3 {{this indicates a problem with the SYCL runtime header files; please consider reporting this to your SYCL runtime provider}}
+ // expected-note-re@+2 {{in implicit call to 'sycl_kernel_launch' with template argument 'BADKN<18>' and function arguments (lvalue of type 'const char[{{[0-9]*}}]', xvalue of type 'BADKT<18>') required here}}
+ template<typename KN, typename KT>
+ [[clang::sycl_kernel_entry_point(KN)]]
+ void skep(KT k) {
+ k();
+ }
+ // expected-note@+2 {{'sycl_kernel_launch' should be declared prior to the call site or in the global namespace}}
+ template<typename KN, typename... Ts>
+ void sycl_kernel_launch(Ts...) {}
+ // expected-note@+1 {{in instantiation of function template specialization 'bad18::skep<BADKN<18>, BADKT<18>>' requested here}}
+ template void skep<BADKN<18>>(BADKT<18>);
+}
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 17f485e5c78a..d31d2c0c9bb6 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -383,6 +383,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
break;
case Stmt::SYCLKernelCallStmtClass:
+ case Stmt::UnresolvedSYCLKernelCallStmtClass:
K = CXCursor_UnexposedStmt;
break;
diff --git a/flang-rt/lib/runtime/execute.cpp b/flang-rt/lib/runtime/execute.cpp
index 8da7069f5700..b843a0c7f463 100644
--- a/flang-rt/lib/runtime/execute.cpp
+++ b/flang-rt/lib/runtime/execute.cpp
@@ -111,7 +111,17 @@ std::int64_t TerminationCheck(std::int64_t status, const Descriptor *cmdstat,
// On WIN32 API std::system() returns exit status directly. On other OS'es,
// special status codes are handled below.
std::int64_t exitStatusVal{status};
-#ifndef _WIN32
+#ifdef _WIN32
+ if (status == 9009) {
+ // cmd.exe returns status code 9009 for "command not found" error
+ if (!cmdstat) {
+ terminator.Crash("Command not found.");
+ } else {
+ StoreIntToDescriptor(cmdstat, COMMAND_NOT_FOUND_ERR, terminator);
+ CheckAndCopyCharsToDescriptor(cmdmsg, "Command not found.");
+ }
+ }
+#else
#if defined(WIFSIGNALED) && defined(WTERMSIG)
if (WIFSIGNALED(status)) {
@@ -195,9 +205,36 @@ void RTNAME(ExecuteCommandLine)(const Descriptor &command, bool wait,
RUNTIME_CHECK(terminator, IsValidCharDescriptor(cmdmsg));
}
+ const char *cmd{newCmd};
+#ifdef _WIN32
+ // Construct a string that looks like
+ // "cmd.exe /v:on /c \"mycommand & exit /b !ERRORLEVEL!\""
+ // Explanantion:
+ // /v:on - turns delayed environment variable expansion on, so
+ // variables written as !VAR! are expanded at execution time
+ // instead of at parse time. This is required for !ERRORLEVEL!
+ // to reflect the current error code at the moment exit runs.
+ // exit /b !ERRORLEVEL! - exits the current cmd instance (/b) and
+ // sets its process exit code to the current ERRORLEVEL value.
+ // Because delayed expansion is on, !ERRORLEVEL! is evaluated at
+ // execution time, so this cmd instance returns the same error
+ // code as mycommand.
+ // This allows cmd.exe to either return the exit code of mycommand, or
+ // to return its own exit code to the caller. The code 9009 is used
+ // by cmd.exe to indicate "not found" condition.
+ const char prefix[]{"cmd.exe /v:on /c \""};
+ const char suffix[]{" & exit /b !ERRORLEVEL!\""};
+ const size_t newCmdWinLen{
+ (sizeof(prefix) - 1) + std::strlen(newCmd) + (sizeof(suffix) - 1) + 1};
+ char *newCmdWin{
+ static_cast<char *>(AllocateMemoryOrCrash(terminator, newCmdWinLen))};
+ std::snprintf(newCmdWin, newCmdWinLen, "%s%s%s", prefix, newCmd, suffix);
+ cmd = newCmdWin;
+#endif
+
if (wait) {
// either wait is not specified or wait is true: synchronous mode
- std::int64_t status{std::system(newCmd)};
+ std::int64_t status{std::system(cmd)};
std::int64_t exitStatusVal{
TerminationCheck(status, cmdstat, cmdmsg, terminator)};
// If sync, assigned processor-dependent exit status. Otherwise unchanged
@@ -211,13 +248,6 @@ void RTNAME(ExecuteCommandLine)(const Descriptor &command, bool wait,
si.cb = sizeof(si);
ZeroMemory(&pi, sizeof(pi));
- // add "cmd.exe /c " to the beginning of command
- const char *prefix{"cmd.exe /c "};
- char *newCmdWin{static_cast<char *>(AllocateMemoryOrCrash(
- terminator, std::strlen(prefix) + std::strlen(newCmd) + 1))};
- std::strcpy(newCmdWin, prefix);
- std::strcat(newCmdWin, newCmd);
-
// Convert the char to wide char
const size_t sizeNeeded{mbstowcs(NULL, newCmdWin, 0) + 1};
wchar_t *wcmd{static_cast<wchar_t *>(
@@ -225,7 +255,6 @@ void RTNAME(ExecuteCommandLine)(const Descriptor &command, bool wait,
if (std::mbstowcs(wcmd, newCmdWin, sizeNeeded) == static_cast<size_t>(-1)) {
terminator.Crash("Char to wide char failed for newCmd");
}
- FreeMemory(newCmdWin);
if (CreateProcessW(nullptr, wcmd, nullptr, nullptr, FALSE, 0, nullptr,
nullptr, &si, &pi)) {
@@ -278,6 +307,11 @@ void RTNAME(ExecuteCommandLine)(const Descriptor &command, bool wait,
}
#endif
}
+
+#ifdef _WIN32
+ FreeMemory(newCmdWin);
+#endif
+
// Deallocate memory if EnsureNullTerminated dynamically allocated memory
if (newCmd != command.OffsetElement()) {
FreeMemory(newCmd);
diff --git a/flang-rt/unittests/Runtime/CommandTest.cpp b/flang-rt/unittests/Runtime/CommandTest.cpp
index 4509c9a34c79..3bed8acafc59 100644
--- a/flang-rt/unittests/Runtime/CommandTest.cpp
+++ b/flang-rt/unittests/Runtime/CommandTest.cpp
@@ -365,9 +365,9 @@ TEST_F(ZeroArguments, ECLNotExecutedCommandErrorSync) {
RTNAME(ExecuteCommandLine)
(*command.get(), wait, exitStat.get(), cmdStat.get(), cmdMsg.get());
#ifdef _WIN32
- CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 1);
- CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 0);
- CheckDescriptorEqStr(cmdMsg.get(), "cmd msg buffer XXXXXXXX");
+ CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 9009);
+ CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 5);
+ CheckDescriptorEqStr(cmdMsg.get(), "Command not found.");
#else
CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 126);
CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 4);
@@ -394,9 +394,9 @@ TEST_F(ZeroArguments, ECLNotFoundCommandErrorSync) {
RTNAME(ExecuteCommandLine)
(*command.get(), wait, exitStat.get(), cmdStat.get(), cmdMsg.get());
#ifdef _WIN32
- CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 1);
- CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 0);
- CheckDescriptorEqStr(cmdMsg.get(), "unmodified buffer XXXXXXXXX");
+ CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 9009);
+ CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 5);
+ CheckDescriptorEqStr(cmdMsg.get(), "Command not found.");
#else
CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 127);
CheckDescriptorEqInt<std::int64_t>(cmdStat.get(), 5);
@@ -412,7 +412,7 @@ TEST_F(ZeroArguments, ECLInvalidCommandTerminatedSync) {
#ifdef _WIN32
EXPECT_DEATH(RTNAME(ExecuteCommandLine)(
*command.get(), wait, nullptr, nullptr, cmdMsg.get()),
- "Invalid command quit with exit status code: 1");
+ "Command not found.");
#else
EXPECT_DEATH(RTNAME(ExecuteCommandLine)(
*command.get(), wait, nullptr, nullptr, cmdMsg.get()),
@@ -490,7 +490,7 @@ TEST_F(ZeroArguments, SystemInvalidCommandExitStat) {
RTNAME(ExecuteCommandLine)
(*command.get(), wait, exitStat.get(), cmdStat.get(), nullptr);
#ifdef _WIN32
- CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 1);
+ CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 9009);
#else
CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 127);
#endif
diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md
index 028e3ea80623..a997980ca18e 100644
--- a/flang/docs/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -484,6 +484,18 @@ end
* A pointer component that has no default initialization or explicit value
in a structure constructor is defaulted to `NULL()`.
* An assumed-rank entity is an acceptable `NAMELIST` group item.
+* A named constant (`PARAMETER`) may appear as a `namelist-group-object` in a
+ `NAMELIST` statement. The Fortran standard requires namelist group objects
+ to be variables, but this usage is accepted by Flang as an extension.
+ When `-pedantic` is enabled, Flang emits a warning for this case.
+ For example:
+```
+program p
+ implicit none
+ integer, parameter :: k = 3
+ namelist /g/ k
+end program
+```
### Extensions supported when enabled by options
diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h
index b8fb6078d59a..f23e52585d56 100644
--- a/flang/include/flang/Parser/openmp-utils.h
+++ b/flang/include/flang/Parser/openmp-utils.h
@@ -226,6 +226,9 @@ const T *GetFirstArgument(const OmpDirectiveSpecification &spec) {
return nullptr;
}
+const OmpClause *FindClause(
+ const OmpDirectiveSpecification &spec, llvm::omp::Clause clauseId);
+
const BlockConstruct *GetFortranBlockConstruct(
const ExecutionPartConstruct &epc);
const Block &GetInnermostExecPart(const Block &block);
diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h
index e5cf915e9f78..cbcb3592f04c 100644
--- a/flang/include/flang/Support/Fortran-features.h
+++ b/flang/include/flang/Support/Fortran-features.h
@@ -82,7 +82,7 @@ ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable,
HostAssociatedIntentOutInSpecExpr, NonVolatilePointerToVolatile,
RealConstantWidening, VolatileOrAsynchronousTemporary, UnusedVariable,
UsedUndefinedVariable, BadValueInDeadCode, AssumedTypeSizeDummy,
- MisplacedIgnoreTKR)
+ MisplacedIgnoreTKR, NamelistParameter)
using LanguageFeatures = EnumSet<LanguageFeature, LanguageFeature_enumSize>;
using UsageWarnings = EnumSet<UsageWarning, UsageWarning_enumSize>;
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index e9ba5f386803..394c7a485525 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -786,13 +786,9 @@ static void processTileSizesFromOpenMPConstruct(
innerConstruct->BeginDir();
if (innerBeginSpec.DirId() == llvm::omp::Directive::OMPD_tile) {
// Get the size values from parse tree and convert to a vector.
- for (const auto &clause : innerBeginSpec.Clauses().v) {
- if (const auto tclause{
- std::get_if<parser::OmpClause::Sizes>(&clause.u)}) {
- processFun(tclause);
- break;
- }
- }
+ if (auto *clause = parser::omp::FindClause(
+ innerBeginSpec, llvm::omp::Clause::OMPC_sizes))
+ processFun(&std::get<parser::OmpClause::Sizes>(clause->u));
}
}
}
diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
index d7fadbc84ff1..a994f30a6dd7 100644
--- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
@@ -77,6 +77,8 @@ void registerOpenACCExtensions(mlir::DialectRegistry &registry) {
*ctx);
fir::FieldIndexOp::attachInterface<
OutlineRematerializationModel<fir::FieldIndexOp>>(*ctx);
+ fir::ConvertOp::attachInterface<
+ OutlineRematerializationModel<fir::ConvertOp>>(*ctx);
});
// Register HLFIR operation interfaces
diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp
index c81f48f6323d..6d4326af7834 100644
--- a/flang/lib/Parser/openmp-utils.cpp
+++ b/flang/lib/Parser/openmp-utils.cpp
@@ -145,6 +145,16 @@ const OmpObjectList *GetOmpObjectList(const OmpDependClause::TaskDep &x) {
return &std::get<OmpObjectList>(x.t);
}
+const OmpClause *FindClause(
+ const OmpDirectiveSpecification &spec, llvm::omp::Clause clauseId) {
+ for (auto &clause : spec.Clauses().v) {
+ if (clause.Id() == clauseId) {
+ return &clause;
+ }
+ }
+ return nullptr;
+}
+
const BlockConstruct *GetFortranBlockConstruct(
const ExecutionPartConstruct &epc) {
// ExecutionPartConstruct -> ExecutableConstruct
diff --git a/flang/lib/Parser/parse-tree.cpp b/flang/lib/Parser/parse-tree.cpp
index afe28182f862..5bdfa47bea1c 100644
--- a/flang/lib/Parser/parse-tree.cpp
+++ b/flang/lib/Parser/parse-tree.cpp
@@ -347,16 +347,12 @@ llvm::omp::Clause OpenMPAtomicConstruct::GetKind() const {
bool OpenMPAtomicConstruct::IsCapture() const {
const OmpDirectiveSpecification &dirSpec{std::get<OmpBeginDirective>(t)};
- return llvm::any_of(dirSpec.Clauses().v, [](auto &clause) {
- return clause.Id() == llvm::omp::Clause::OMPC_capture;
- });
+ return omp::FindClause(dirSpec, llvm::omp::Clause::OMPC_capture);
}
bool OpenMPAtomicConstruct::IsCompare() const {
const OmpDirectiveSpecification &dirSpec{std::get<OmpBeginDirective>(t)};
- return llvm::any_of(dirSpec.Clauses().v, [](auto &clause) {
- return clause.Id() == llvm::omp::Clause::OMPC_compare;
- });
+ return omp::FindClause(dirSpec, llvm::omp::Clause::OMPC_compare);
}
} // namespace Fortran::parser
diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp
index 0a41484399b5..732531b1bdfc 100644
--- a/flang/lib/Semantics/check-acc-structure.cpp
+++ b/flang/lib/Semantics/check-acc-structure.cpp
@@ -688,13 +688,7 @@ void AccStructureChecker::Enter(const parser::OpenACCCacheConstruct &x) {
if (const auto *triplet =
std::get_if<parser::SubscriptTriplet>(
&subscript.u)) {
- const auto &lower{std::get<0>(triplet->t)};
- const auto &upper{std::get<1>(triplet->t)};
const auto &stride{std::get<2>(triplet->t)};
- if (!lower && !upper) {
- context_.Say(designator.source,
- "The CACHE directive requires at least one of the bounds in the array section subscript triplet to be specified"_err_en_US);
- }
if (stride) {
if (auto strideVal{GetIntValue(*stride)}) {
if (*strideVal != 1) {
diff --git a/flang/lib/Semantics/check-namelist.cpp b/flang/lib/Semantics/check-namelist.cpp
index c2804c5d874e..eedc1a66b563 100644
--- a/flang/lib/Semantics/check-namelist.cpp
+++ b/flang/lib/Semantics/check-namelist.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "check-namelist.h"
+#include "flang/Semantics/tools.h"
namespace Fortran::semantics {
@@ -28,6 +29,13 @@ void NamelistChecker::Leave(const parser::NamelistStmt &nmlStmt) {
"PUBLIC namelist"_err_en_US,
nmlObjSymbol->name());
}
+ // `namelist-group-object` may only contain variables.
+ if (IsNamedConstant(*nmlObjSymbol)) {
+ context_.Warn(common::UsageWarning::NamelistParameter,
+ nmlObjName.source,
+ "A namelist group object '%s' should not be a PARAMETER"_port_en_US,
+ nmlObjSymbol->name());
+ }
}
}
}
diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp
index 0cad16dc3deb..f81bde981594 100644
--- a/flang/lib/Semantics/check-omp-loop.cpp
+++ b/flang/lib/Semantics/check-omp-loop.cpp
@@ -159,12 +159,11 @@ void OmpStructureChecker::HasInvalidLoopBinding(
const parser::OmpDirectiveName &beginName{beginSpec.DirName()};
auto teamsBindingChecker = [&](parser::MessageFixedText msg) {
- for (const auto &clause : beginSpec.Clauses().v) {
- if (const auto *bindClause{
- std::get_if<parser::OmpClause::Bind>(&clause.u)}) {
- if (bindClause->v.v != parser::OmpBindClause::Binding::Teams) {
- context_.Say(beginName.source, msg);
- }
+ if (auto *clause{
+ parser::omp::FindClause(beginSpec, llvm::omp::Clause::OMPC_bind)}) {
+ auto &bind{std::get<parser::OmpClause::Bind>(clause->u).v};
+ if (bind.v != parser::OmpBindClause::Binding::Teams) {
+ context_.Say(beginName.source, msg);
}
}
};
@@ -204,11 +203,9 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) {
[&](const parser::OmpBlockConstruct &c) {
const parser::OmpDirectiveSpecification &beginSpec{c.BeginDir()};
if (beginSpec.DirId() == llvm::omp::Directive::OMPD_ordered) {
- for (const auto &clause : beginSpec.Clauses().v) {
- if (std::get_if<parser::OmpClause::Simd>(&clause.u)) {
- eligibleSIMD = true;
- break;
- }
+ if (parser::omp::FindClause(
+ beginSpec, llvm::omp::Clause::OMPC_simd)) {
+ eligibleSIMD = true;
}
}
},
@@ -217,11 +214,9 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) {
&c.u)}) {
llvm::omp::Directive dirId{ssc->v.DirId()};
if (dirId == llvm::omp::Directive::OMPD_ordered) {
- for (const parser::OmpClause &x : ssc->v.Clauses().v) {
- if (x.Id() == llvm::omp::Clause::OMPC_simd) {
- eligibleSIMD = true;
- break;
- }
+ if (parser::omp::FindClause(
+ ssc->v, llvm::omp::Clause::OMPC_simd)) {
+ eligibleSIMD = true;
}
} else if (dirId == llvm::omp::Directive::OMPD_scan) {
eligibleSIMD = true;
@@ -274,9 +269,8 @@ static bool IsFullUnroll(const parser::OpenMPLoopConstruct &x) {
const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()};
if (beginSpec.DirName().v == llvm::omp::Directive::OMPD_unroll) {
- return llvm::none_of(beginSpec.Clauses().v, [](const parser::OmpClause &c) {
- return c.Id() == llvm::omp::Clause::OMPC_partial;
- });
+ return parser::omp::FindClause(
+ beginSpec, llvm::omp::Clause::OMPC_partial) == nullptr;
}
return false;
}
@@ -312,15 +306,13 @@ static std::optional<size_t> CountGeneratedNests(
if (!nestedCount || *nestedCount == 0) {
return std::nullopt;
}
- auto rangeAt{
- llvm::find_if(beginSpec.Clauses().v, [](const parser::OmpClause &c) {
- return c.Id() == llvm::omp::Clause::OMPC_looprange;
- })};
- if (rangeAt == beginSpec.Clauses().v.end()) {
+ auto *clause{
+ parser::omp::FindClause(beginSpec, llvm::omp::Clause::OMPC_looprange)};
+ if (!clause) {
return 1;
}
- auto *loopRange{parser::Unwrap<parser::OmpLooprangeClause>(*rangeAt)};
+ auto *loopRange{parser::Unwrap<parser::OmpLooprangeClause>(*clause)};
std::optional<int64_t> count{GetIntValue(std::get<1>(loopRange->t))};
if (!count || *count <= 0) {
return std::nullopt;
@@ -617,23 +609,22 @@ void OmpStructureChecker::CheckDistLinear(
void OmpStructureChecker::CheckLooprangeBounds(
const parser::OpenMPLoopConstruct &x) {
- for (const parser::OmpClause &clause : x.BeginDir().Clauses().v) {
- if (auto *lrClause{parser::Unwrap<parser::OmpLooprangeClause>(clause)}) {
- auto first{GetIntValue(std::get<0>(lrClause->t))};
- auto count{GetIntValue(std::get<1>(lrClause->t))};
- if (!first || !count || *first <= 0 || *count <= 0) {
- return;
- }
- auto requiredCount{static_cast<size_t>(*first + *count - 1)};
- if (auto loopCount{CountGeneratedNests(std::get<parser::Block>(x.t))}) {
- if (*loopCount < requiredCount) {
- context_.Say(clause.source,
- "The specified loop range requires %zu loops, but the loop sequence has a length of %zu"_err_en_US,
- requiredCount, *loopCount);
- }
- }
+ if (auto *clause{parser::omp::FindClause(
+ x.BeginDir(), llvm::omp::Clause::OMPC_looprange)}) {
+ auto *lrClause{parser::Unwrap<parser::OmpLooprangeClause>(clause)};
+ auto first{GetIntValue(std::get<0>(lrClause->t))};
+ auto count{GetIntValue(std::get<1>(lrClause->t))};
+ if (!first || !count || *first <= 0 || *count <= 0) {
return;
}
+ auto requiredCount{static_cast<size_t>(*first + *count - 1)};
+ if (auto loopCount{CountGeneratedNests(std::get<parser::Block>(x.t))}) {
+ if (*loopCount < requiredCount) {
+ context_.Say(clause->source,
+ "The specified loop range requires %zu loops, but the loop sequence has a length of %zu"_err_en_US,
+ requiredCount, *loopCount);
+ }
+ }
}
}
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 7ffda12c1fb8..431c41f443f7 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -1800,16 +1800,8 @@ void OmpStructureChecker::CheckIndividualAllocateDirective(
return true;
}};
- const auto *allocator{[&]() {
- // Can't use FindClause in Enter (because clauses haven't been visited
- // yet).
- for (const parser::OmpClause &c : beginSpec.Clauses().v) {
- if (c.Id() == llvm::omp::Clause::OMPC_allocator) {
- return &c;
- }
- }
- return static_cast<const parser::OmpClause *>(nullptr);
- }()};
+ const auto *allocator{
+ parser::omp::FindClause(beginSpec, llvm::omp::Clause::OMPC_allocator)};
if (InTargetRegion()) {
bool hasDynAllocators{
@@ -4674,11 +4666,9 @@ void OmpStructureChecker::CheckDoacross(const parser::OmpDoacross &doa) {
const parser::OmpDirectiveSpecification &beginSpec{(*loopc)->BeginDir()};
llvm::omp::Directive loopDir{beginSpec.DirId()};
if (loopDir == llvm::omp::OMPD_do || loopDir == llvm::omp::OMPD_simd) {
- auto IsOrdered{[](const parser::OmpClause &c) {
- return c.Id() == llvm::omp::OMPC_ordered;
- }};
// If it has ORDERED clause, stop the traversal.
- if (llvm::any_of(beginSpec.Clauses().v, IsOrdered)) {
+ if (parser::omp::FindClause(
+ beginSpec, llvm::omp::Clause::OMPC_ordered)) {
break;
}
}
diff --git a/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir b/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir
index 6ecccde39d3f..fa9f9c429fa0 100644
--- a/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir
+++ b/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir
@@ -253,3 +253,121 @@ func.func @test_accbounds_rematerialize_fir() {
// CHECK: acc.bounds
// CHECK: acc.serial {
// CHECK: acc.bounds
+
+// -----
+
+// Test fir.convert rematerialization (ViewLikeOpInterface +
+// OutlineRematerializationOpInterface).
+func.func private @use_i64(i64) -> ()
+
+func.func @test_convert_rematerialize(%arg0: !fir.ref<i32>) {
+ %0 = fir.convert %arg0 : (!fir.ref<i32>) -> i64
+ fir.call @use_i64(%0) : (i64) -> ()
+ acc.parallel {
+ fir.call @use_i64(%0) : (i64) -> ()
+ acc.yield
+ }
+ return
+}
+
+// CHECK-LABEL: @test_convert_rematerialize
+// CHECK: %[[CVT_OUTER:.*]] = fir.convert
+// CHECK: fir.call @use_i64(%[[CVT_OUTER]])
+// CHECK: acc.parallel {
+// CHECK: %[[CVT_INNER:.*]] = fir.convert
+// CHECK: fir.call @use_i64(%[[CVT_INNER]])
+
+// -----
+
+// Test fir.convert sinking (only used inside region).
+func.func private @use_i64(i64) -> ()
+
+func.func @test_convert_sink(%arg0: !fir.ref<i32>) {
+ %0 = fir.convert %arg0 : (!fir.ref<i32>) -> i64
+ acc.parallel {
+ fir.call @use_i64(%0) : (i64) -> ()
+ acc.yield
+ }
+ return
+}
+
+// CHECK-LABEL: @test_convert_sink
+// CHECK: acc.parallel {
+// CHECK: %[[CVT:.*]] = fir.convert
+// CHECK: fir.call @use_i64(%[[CVT]])
+
+// -----
+
+// Test fir.convert sinking when input is fir.alloca (not a block argument).
+func.func private @use_i64(i64) -> ()
+
+func.func @test_convert_alloca_sink() {
+ %0 = fir.alloca i32
+ %1 = fir.convert %0 : (!fir.ref<i32>) -> i64
+ acc.parallel {
+ fir.call @use_i64(%1) : (i64) -> ()
+ acc.yield
+ }
+ return
+}
+
+// CHECK-LABEL: @test_convert_alloca_sink
+// CHECK: %[[ALLOCA:.*]] = fir.alloca i32
+// CHECK: acc.parallel {
+// CHECK: %[[CVT:.*]] = fir.convert %[[ALLOCA]]
+// CHECK: fir.call @use_i64(%[[CVT]])
+
+// -----
+
+// Test fir.convert rematerialization when input is fir.alloca.
+func.func private @use_i64(i64) -> ()
+
+func.func @test_convert_alloca_remat() {
+ %0 = fir.alloca i32
+ %1 = fir.convert %0 : (!fir.ref<i32>) -> i64
+ fir.call @use_i64(%1) : (i64) -> ()
+ acc.parallel {
+ fir.call @use_i64(%1) : (i64) -> ()
+ acc.yield
+ }
+ return
+}
+
+// CHECK-LABEL: @test_convert_alloca_remat
+// CHECK: %[[ALLOCA:.*]] = fir.alloca i32
+// CHECK: %[[CVT_OUTER:.*]] = fir.convert %[[ALLOCA]]
+// CHECK: fir.call @use_i64(%[[CVT_OUTER]])
+// CHECK: acc.parallel {
+// CHECK: %[[CVT_INNER:.*]] = fir.convert %[[ALLOCA]]
+// CHECK: fir.call @use_i64(%[[CVT_INNER]])
+
+// -----
+
+// Test that an intermediate fir.convert in a trace chain
+// (declare -> convert -> unboxchar) does not get rematerialized,
+// while a direct ptr-to-int fir.convert is correctly sunk.
+func.func private @use_i64(i64) -> ()
+func.func private @use_ref(!fir.ref<!fir.char<1,10>>) -> ()
+
+func.func @test_convert_chain_and_direct(%arg0: !fir.boxchar<1>, %arg1: !fir.ref<i32>) {
+ %c10 = arith.constant 10 : index
+ %0:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ %1 = fir.convert %0#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,10>>
+ %2 = fir.declare %1 typeparams %c10 {uniq_name = "scalar"} : (!fir.ref<!fir.char<1,10>>, index) -> !fir.ref<!fir.char<1,10>>
+ %3 = fir.convert %arg1 : (!fir.ref<i32>) -> i64
+ acc.parallel {
+ fir.call @use_ref(%2) : (!fir.ref<!fir.char<1,10>>) -> ()
+ fir.call @use_i64(%3) : (i64) -> ()
+ acc.yield
+ }
+ return
+}
+
+// CHECK-LABEL: @test_convert_chain_and_direct
+// CHECK: %[[UNBOX:.*]]:2 = fir.unboxchar %arg0
+// CHECK: %[[CVT_REF:.*]] = fir.convert %[[UNBOX]]#0
+// CHECK: %[[DECL:.*]] = fir.declare %[[CVT_REF]]
+// CHECK: acc.parallel {
+// CHECK: %[[CVT_INT:.*]] = fir.convert %arg1
+// CHECK: fir.call @use_ref(%[[DECL]])
+// CHECK: fir.call @use_i64(%[[CVT_INT]])
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index 36874d3c21cd..d2da6ef62d7e 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -767,3 +767,19 @@ subroutine test_cache_temp_in_designator(data, a)
! CHECK: hlfir.designate %[[DECL]]#0
! CHECK: acc.yield
end subroutine
+
+
+subroutine full_array_cache()
+ integer :: k, j , kd, jd,y, x
+ real(8) :: tile(0:8,0:8)
+
+ !$acc parallel loop gang collapse(2)
+ do k = 1, kd
+ do j = 1, jd
+ !$acc cache(tile(:,:))
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPfull_array_cache()
+! CHECK: acc.cache var(%{{.*}}) bounds(%{{.*}})
diff --git a/flang/test/Semantics/OpenACC/acc-cache-validity.f90 b/flang/test/Semantics/OpenACC/acc-cache-validity.f90
index de66043de1ec..aaffd4ec30a9 100644
--- a/flang/test/Semantics/OpenACC/acc-cache-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-cache-validity.f90
@@ -38,11 +38,9 @@ program openacc_cache_validity
!ERROR: Only array element or subarray are allowed in CACHE directive
!$acc cache(/i/)
- !ERROR: The CACHE directive requires at least one of the bounds in the array section subscript triplet to be specified
- !$acc cache(a(:))
+ !$acc cache(a(:)) ! ok
- !ERROR: The CACHE directive requires at least one of the bounds in the array section subscript triplet to be specified
- !$acc cache(aa(:,:))
+ !$acc cache(aa(:,:)) ! ok
!ERROR: The CACHE directive does not support strided array sections
!$acc cache(a(1:10:2))
diff --git a/flang/test/Semantics/namelist02.f90 b/flang/test/Semantics/namelist02.f90
new file mode 100644
index 000000000000..efe1f0204abd
--- /dev/null
+++ b/flang/test/Semantics/namelist02.f90
@@ -0,0 +1,29 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic -Werror
+
+module m
+ implicit none
+ integer, parameter :: mc = 42
+end module
+
+! Local named constant
+program p
+ use m
+ implicit none
+ integer, parameter :: k = 3
+ !PORTABILITY: A namelist group object 'k' should not be a PARAMETER [-Wnamelist-parameter]
+ namelist /g/ k
+ ! USE-associated named constant
+ !PORTABILITY: A namelist group object 'mc' should not be a PARAMETER [-Wnamelist-parameter]
+ namelist /g2/ mc
+end program
+
+! Host-associated named constant
+subroutine host
+ implicit none
+ integer, parameter :: hc = 10
+ contains
+ subroutine inner
+ !PORTABILITY: A namelist group object 'hc' should not be a PARAMETER [-Wnamelist-parameter]
+ namelist /g3/ hc
+ end subroutine
+end subroutine
diff --git a/libc/shared/math.h b/libc/shared/math.h
index a7d735ffa174..ede0ebd5371a 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -124,6 +124,7 @@
#include "math/fdimf16.h"
#include "math/fdiml.h"
#include "math/ffma.h"
+#include "math/ffmaf128.h"
#include "math/ffmal.h"
#include "math/floor.h"
#include "math/floorbf16.h"
diff --git a/libc/shared/math/ffmaf128.h b/libc/shared/math/ffmaf128.h
new file mode 100644
index 000000000000..b22e6a1f08d3
--- /dev/null
+++ b/libc/shared/math/ffmaf128.h
@@ -0,0 +1,29 @@
+//===-- Shared ffmaf128 function --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_FFMAF128_H
+#define LLVM_LIBC_SHARED_MATH_FFMAF128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "shared/libc_common.h"
+#include "src/__support/math/ffmaf128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::ffmaf128;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+
+#endif // LLVM_LIBC_SHARED_MATH_FFMAF128_H
diff --git a/libc/src/__support/FPUtil/generic/add_sub.h b/libc/src/__support/FPUtil/generic/add_sub.h
index 9f3ecff0eb23..4ef9ce06ebf9 100644
--- a/libc/src/__support/FPUtil/generic/add_sub.h
+++ b/libc/src/__support/FPUtil/generic/add_sub.h
@@ -96,6 +96,8 @@ add_or_sub(InType x, InType y) {
if (x_bits.is_zero()) {
if (y_bits.is_zero()) {
+ if (is_effectively_add)
+ return OutFPBits::zero(x_bits.sign()).get_val();
switch (quick_get_round()) {
case FE_DOWNWARD:
return OutFPBits::zero(Sign::NEG).get_val();
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 79278b6e77a3..0df8262cfb5f 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -1164,6 +1164,17 @@ add_header_library(
)
add_header_library(
+ ffmaf128
+ HDRS
+ ffmaf128.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.FPUtil.fma
+ libc.src.__support.macros.config
+ libc.include.llvm-libc-types.float128
+)
+
+add_header_library(
ffmal
HDRS
ffmal.h
diff --git a/libc/src/__support/math/asinpif.h b/libc/src/__support/math/asinpif.h
index 9a5daf6198a4..79d3ebbe63b5 100644
--- a/libc/src/__support/math/asinpif.h
+++ b/libc/src/__support/math/asinpif.h
@@ -23,22 +23,6 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
LIBC_INLINE float asinpif(float x) {
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- constexpr size_t N_EXCEPTS = 5;
- constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINPIF_EXCEPTS = {
- {// (inputs, RZ output, RU offset, RD offset, RN offset)
- // x = 0x1.e768f6p-122, asinpif(x) = 0x1.364b7ap-123 (RZ)
- {0x02F3B47B, 0x021B25BD, 1, 0, 0},
- // x = 0x1.e768f6p-24, asinpif(x) = 0x1.364b7ap-25 (RZ)
- {0x33F3B47B, 0x331B25BD, 1, 0, 1},
- // x = 0x1.dddb4ep-19, asinpif(x) = 0x1.303686p-20 (RZ)
- {0x366EEDA7, 0x35981B43, 1, 0, 1},
- // x = -0x1.dddb4ep-19, asinpif(x) = -0x1.303686p-20 (RZ)
- {0xB66EEDA7, 0xB5981B43, 0, 1, 1},
- // x = -0x1.e768f6p-24, asinpif(x) = -0x1.364b7ap-25 (RZ)
- {0xB3F3B47B, 0xB31B25BD, 0, 1, 1}}};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
using FPBits = fputil::FPBits<float>;
FPBits xbits(x);
@@ -61,12 +45,6 @@ LIBC_INLINE float asinpif(float x) {
return FPBits::quiet_nan().get_val();
}
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- auto r = ASINPIF_EXCEPTS.lookup(xbits.uintval());
- if (LIBC_UNLIKELY(r.has_value()))
- return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
// if |x| <= 0.5:
// asinpi(x) = x * (c0 + x^2 * P1(x^2))
if (LIBC_UNLIKELY(x_abs <= 0.5)) {
diff --git a/libc/src/__support/math/ffmaf128.h b/libc/src/__support/math/ffmaf128.h
new file mode 100644
index 000000000000..c4b5a58e145f
--- /dev/null
+++ b/libc/src/__support/math/ffmaf128.h
@@ -0,0 +1,34 @@
+//===-- Implementation header for ffmaf128 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_FFMAF128_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_FFMAF128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE float ffmaf128(float128 x, float128 y, float128 z) {
+ return fputil::fma<float>(x, y, z);
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_FFMAF128_H
diff --git a/libc/src/__support/math/inv_trigf_utils.h b/libc/src/__support/math/inv_trigf_utils.h
index 7a93831333db..54a94f572cda 100644
--- a/libc/src/__support/math/inv_trigf_utils.h
+++ b/libc/src/__support/math/inv_trigf_utils.h
@@ -184,14 +184,14 @@ LIBC_INLINE double asin_eval(double xsq) {
// > prec = 200;
// > display = hexadecimal;
// > g = asin(x) / (pi * x);
-// > P = fpminimax(g, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
+// > P = fpminimax(g, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22|],
// > [|D...|], [0, 0.5]);
// > for i from 0 to degree(P) do coeff(P, i);
// > print("Error:", dirtyinfnorm(P - g, [1e-30; 0.25]));
-// Error: 0x1.45c281e1cf9b58p-50 ~= 2^−49.652
+// Error : 0x1.a53f84eafa3ea69bb81b6c52b3278872083fca2c757bd778acp-54 ~= 2^-54
//
// Non-zero coefficients (even powers only):
-LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[13] = {
+LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[12] = {
0x1.45f306dc9c881p-2, // x^0
0x1.b2995e7b7e756p-5, // x^2
0x1.8723a1d12f828p-6, // x^4
@@ -206,12 +206,40 @@ LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[13] = {
0x1.4b50c2eb13708p-7 // x^22
};
-// Evaluates P1(v2) = c1 + c2*v2 + c3*v2^2 + ... (tail of P without c0)
+// Evaluates P1(v2) = c1 + c2*v2 + c3*v2^2 + ... + c12*v2^11 (tail of P
+// without c0) using Estrin's scheme for instruction-level parallelism.
+//
+// The tail polynomial has 12 coefficients ASINPI_COEFFS[1..11] in powers of
+// v2:
+// P1(v2) = c1 + c2*v2 + c3*v2^2 + c4*v2^3 + ... + c11*v2^10
+//
+// Estrin pairs them bottom-up:
+// Level 0 (6 pairs, using v2):
+// p0 = c1 + c2*v2 p1 = c3 + c4*v2
+// p2 = c5 + c6*v2 p3 = c7 + c8*v2
+// p4 = c9 + c10*v2 p5 = c11
+// Level 1 (3 pairs, using v4):
+// q0 = p0 + p1*v4 q1 = p2 + p3*v4
+// q2 = p4 + p5*v4
+// Level 2 (using v8):
+// r0 = q0 + q1*v8 r1 = q2
+// result = q0 + q1*v8 + q1*v16
LIBC_INLINE double asinpi_eval(double v2) {
- return fputil::polyeval(
- v2, ASINPI_COEFFS[1], ASINPI_COEFFS[2], ASINPI_COEFFS[3],
- ASINPI_COEFFS[4], ASINPI_COEFFS[5], ASINPI_COEFFS[6], ASINPI_COEFFS[7],
- ASINPI_COEFFS[8], ASINPI_COEFFS[9], ASINPI_COEFFS[10], ASINPI_COEFFS[11]);
+ double v4 = v2 * v2;
+ double v8 = v4 * v4;
+
+ double p0 = fputil::multiply_add(v2, ASINPI_COEFFS[2], ASINPI_COEFFS[1]);
+ double p1 = fputil::multiply_add(v2, ASINPI_COEFFS[4], ASINPI_COEFFS[3]);
+ double p2 = fputil::multiply_add(v2, ASINPI_COEFFS[6], ASINPI_COEFFS[5]);
+ double p3 = fputil::multiply_add(v2, ASINPI_COEFFS[8], ASINPI_COEFFS[7]);
+ double p4 = fputil::multiply_add(v2, ASINPI_COEFFS[10], ASINPI_COEFFS[9]);
+ double p5 = ASINPI_COEFFS[11];
+
+ double q0 = fputil::multiply_add(v4, p1, p0);
+ double q1 = fputil::multiply_add(v4, p3, p2);
+ double q2 = fputil::multiply_add(v4, p5, p4);
+
+ return fputil::polyeval(v8, q0, q1, q2);
}
} // namespace inv_trigf_utils_internal
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index f8ec25be61d1..418cf85b84a2 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3155,8 +3155,7 @@ add_entrypoint_object(
HDRS
../ffmaf128.h
DEPENDS
- libc.src.__support.macros.properties.types
- libc.src.__support.FPUtil.fma
+ libc.src.__support.math.ffmaf128
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/ffmaf128.cpp b/libc/src/math/generic/ffmaf128.cpp
index 55da93020faf..15c0308b0b9c 100644
--- a/libc/src/math/generic/ffmaf128.cpp
+++ b/libc/src/math/generic/ffmaf128.cpp
@@ -7,14 +7,12 @@
//===----------------------------------------------------------------------===//
#include "src/math/ffmaf128.h"
-#include "src/__support/FPUtil/FMA.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/math/ffmaf128.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float, ffmaf128, (float128 x, float128 y, float128 z)) {
- return fputil::fma<float>(x, y, z);
+ return math::ffmaf128(x, y, z);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index a04a15cdabcb..c90e5687d8c3 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -121,6 +121,7 @@ add_fp_unittest(
libc.src.__support.math.fdimf16
libc.src.__support.math.fdiml
libc.src.__support.math.ffma
+ libc.src.__support.math.ffmaf128
libc.src.__support.math.ffmal
libc.src.__support.math.floor
libc.src.__support.math.floorbf16
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 460449e4fcb2..17045ce5edfd 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -339,6 +339,8 @@ TEST(LlvmLibcSharedMathTest, AllFloat128) {
EXPECT_FP_EQ(float128(0x0p+0),
LIBC_NAMESPACE::shared::atan2f128(float128(0.0), float128(0.0)));
+ EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::ffmaf128(
+ float128(0.0), float128(0.0), float128(0.0)));
EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::fsqrtf128(float128(1.0f)));
EXPECT_FP_EQ_ALL_ROUNDING(float128(0.75), LIBC_NAMESPACE::shared::frexpf128(
float128(24), &exponent));
diff --git a/libc/test/src/math/smoke/AddTest.h b/libc/test/src/math/smoke/AddTest.h
index d0a9dcb9c283..511ea581d523 100644
--- a/libc/test/src/math/smoke/AddTest.h
+++ b/libc/test/src/math/smoke/AddTest.h
@@ -165,6 +165,15 @@ public:
EXPECT_FP_EQ(OutType(-1.0), func(InType(-2.0), InType(1.0)));
EXPECT_FP_EQ(OutType(-3.0), func(InType(-2.0), InType(-1.0)));
}
+
+ void test_signed_zero_result(AddFunc func) {
+ EXPECT_FP_EQ_ALL_ROUNDING(zero, func(in.zero, in.zero));
+ EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, func(in.neg_zero, in.neg_zero));
+ EXPECT_FP_EQ_ALL_ROUNDING(zero, zero, neg_zero, zero,
+ func(in.neg_zero, in.zero));
+ EXPECT_FP_EQ_ALL_ROUNDING(zero, zero, neg_zero, zero,
+ func(in.zero, in.neg_zero));
+ }
};
#define LIST_ADD_TESTS(OutType, InType, func) \
@@ -176,7 +185,8 @@ public:
TEST_F(LlvmLibcAddTest, RangeErrors) { test_range_errors(&func); } \
TEST_F(LlvmLibcAddTest, InexactResults) { test_inexact_results(&func); } \
TEST_F(LlvmLibcAddTest, MixedNormality) { test_mixed_normality(&func); } \
- TEST_F(LlvmLibcAddTest, MixedSigns) { test_mixed_signs(&func); }
+ TEST_F(LlvmLibcAddTest, MixedSigns) { test_mixed_signs(&func); } \
+ TEST_F(LlvmLibcAddTest, SignedZeroResult) { test_signed_zero_result(&func); }
#define LIST_ADD_SAME_TYPE_TESTS(suffix, OutType, InType, func) \
using LlvmLibcAddTest##suffix = AddTest<OutType, InType>; \
@@ -193,6 +203,9 @@ public:
TEST_F(LlvmLibcAddTest##suffix, MixedNormality) { \
test_mixed_normality(&func); \
} \
- TEST_F(LlvmLibcAddTest##suffix, MixedSigns) { test_mixed_signs(&func); }
+ TEST_F(LlvmLibcAddTest##suffix, MixedSigns) { test_mixed_signs(&func); } \
+ TEST_F(LlvmLibcAddTest##suffix, SignedZeroResult) { \
+ test_signed_zero_result(&func); \
+ }
#endif // LLVM_LIBC_TEST_SRC_MATH_SMOKE_ADDTEST_H
diff --git a/libc/test/src/math/smoke/SubTest.h b/libc/test/src/math/smoke/SubTest.h
index 79086aa0bfb7..bc2af7ee08b0 100644
--- a/libc/test/src/math/smoke/SubTest.h
+++ b/libc/test/src/math/smoke/SubTest.h
@@ -156,6 +156,15 @@ public:
EXPECT_FP_EQ(OutType(-3.0), func(InType(-2.0), InType(1.0)));
EXPECT_FP_EQ(OutType(-1.0), func(InType(-2.0), InType(-1.0)));
}
+
+ void test_signed_zero_result(SubFunc func) {
+ EXPECT_FP_EQ_ALL_ROUNDING(zero, func(in.zero, in.neg_zero));
+ EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, func(in.neg_zero, in.zero));
+ EXPECT_FP_EQ_ALL_ROUNDING(zero, zero, neg_zero, zero,
+ func(in.zero, in.zero));
+ EXPECT_FP_EQ_ALL_ROUNDING(zero, zero, neg_zero, zero,
+ func(in.neg_zero, in.neg_zero));
+ }
};
#define LIST_SUB_TESTS(OutType, InType, func) \
@@ -166,7 +175,8 @@ public:
} \
TEST_F(LlvmLibcSubTest, RangeErrors) { test_range_errors(&func); } \
TEST_F(LlvmLibcSubTest, InexactResults) { test_inexact_results(&func); } \
- TEST_F(LlvmLibcSubTest, MixedSigns) { test_mixed_signs(&func); }
+ TEST_F(LlvmLibcSubTest, MixedSigns) { test_mixed_signs(&func); } \
+ TEST_F(LlvmLibcSubTest, SignedZeroResult) { test_signed_zero_result(&func); }
#define LIST_SUB_SAME_TYPE_TESTS(suffix, OutType, InType, func) \
using LlvmLibcSubTest##suffix = SubTest<OutType, InType>; \
@@ -180,6 +190,9 @@ public:
TEST_F(LlvmLibcSubTest##suffix, InexactResults) { \
test_inexact_results(&func); \
} \
- TEST_F(LlvmLibcSubTest##suffix, MixedSigns) { test_mixed_signs(&func); }
+ TEST_F(LlvmLibcSubTest##suffix, MixedSigns) { test_mixed_signs(&func); } \
+ TEST_F(LlvmLibcSubTest##suffix, SignedZeroResult) { \
+ test_signed_zero_result(&func); \
+ }
#endif // LLVM_LIBC_TEST_SRC_MATH_SMOKE_SUBTEST_H
diff --git a/libclc/clc/include/clc/workitem/clc_get_enqueued_local_size.h b/libclc/clc/include/clc/workitem/clc_get_enqueued_local_size.h
new file mode 100644
index 000000000000..3a1137ef3998
--- /dev/null
+++ b/libclc/clc/include/clc/workitem/clc_get_enqueued_local_size.h
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_WORKITEM_CLC_GET_ENQUEUED_LOCAL_SIZE_H__
+#define __CLC_WORKITEM_CLC_GET_ENQUEUED_LOCAL_SIZE_H__
+
+#include <clc/internal/clc.h>
+
+_CLC_OVERLOAD _CLC_CONST _CLC_DECL size_t
+__clc_get_enqueued_local_size(uint dim);
+
+#endif // __CLC_WORKITEM_CLC_GET_ENQUEUED_LOCAL_SIZE_H__
diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index b4557b0a26f7..7006f538d927 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -2,6 +2,7 @@ address_space/qualifier.cl
math/clc_ldexp.cl
mem_fence/clc_mem_fence.cl
synchronization/clc_work_group_barrier.cl
+workitem/clc_get_enqueued_local_size.cl
workitem/clc_get_global_offset.cl
workitem/clc_get_global_size.cl
workitem/clc_get_group_id.cl
diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl
new file mode 100644
index 000000000000..c7226241694b
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_enqueued_local_size.h"
+#include "clc/workitem/clc_get_local_size.h"
+
+_CLC_OVERLOAD _CLC_DEF size_t __clc_get_enqueued_local_size(uint dim) {
+ return __clc_get_local_size(dim);
+}
diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl
index c10cdd2d02ef..f21a060849db 100644
--- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl
+++ b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl
@@ -6,11 +6,15 @@
//
//===----------------------------------------------------------------------===//
+#include <amdhsa_abi.h>
#include <clc/opencl/opencl-base.h>
_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
- __constant uint *ptr = (__constant uint *)__builtin_amdgcn_dispatch_ptr();
- if (dim < 3)
- return ptr[3 + dim];
- return 1;
+ if (dim > 2)
+ return 1;
+ __constant amdhsa_implicit_kernarg_v5 *args =
+ (__constant amdhsa_implicit_kernarg_v5 *)
+ __builtin_amdgcn_implicitarg_ptr();
+ return args->block_count[dim] * (uint)args->group_size[dim] +
+ (uint)args->remainder[dim];
}
diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl
index a95c58ca1853..ed1e17776361 100644
--- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl
+++ b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl
@@ -6,17 +6,21 @@
//
//===----------------------------------------------------------------------===//
+#include <amdhsa_abi.h>
#include <clc/opencl/opencl-base.h>
_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
- __constant uint *ptr = (__constant uint *)__builtin_amdgcn_dispatch_ptr();
- switch (dim) {
- case 0:
- return ptr[1] & 0xffffu;
- case 1:
- return ptr[1] >> 16;
- case 2:
- return ptr[2] & 0xffffu;
- }
- return 1;
+ if (dim > 2)
+ return 1;
+
+ __constant amdhsa_implicit_kernarg_v5 *args =
+ (__constant amdhsa_implicit_kernarg_v5 *)
+ __builtin_amdgcn_implicitarg_ptr();
+
+ uint group_ids[3] = {__builtin_amdgcn_workgroup_id_x(),
+ __builtin_amdgcn_workgroup_id_y(),
+ __builtin_amdgcn_workgroup_id_z()};
+
+ return group_ids[dim] < args->block_count[dim] ? (size_t)args->group_size[dim]
+ : (size_t)args->remainder[dim];
}
diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES
index 0522e13f5d3d..84fc4a6650c3 100644
--- a/libclc/opencl/lib/amdgcn/SOURCES
+++ b/libclc/opencl/lib/amdgcn/SOURCES
@@ -1,5 +1,4 @@
mem_fence/fence.cl
-synchronization/barrier.cl
workitem/get_global_offset.cl
workitem/get_group_id.cl
workitem/get_global_size.cl
diff --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
deleted file mode 100644
index 9f67b6ebcb6d..000000000000
--- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/opencl/synchronization/utils.h>
-#include <clc/synchronization/clc_work_group_barrier.h>
-
-_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
- int memory_scope = __opencl_get_memory_scope(flags);
- int memory_order = __ATOMIC_SEQ_CST;
- __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
- __clc_work_group_barrier(memory_scope, memory_order, memory_semantics);
-}
diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES
index bb5e8ab08a71..be94a34e9af0 100644
--- a/libclc/opencl/lib/generic/SOURCES
+++ b/libclc/opencl/lib/generic/SOURCES
@@ -199,5 +199,7 @@ shared/max.cl
shared/min.cl
shared/vload.cl
shared/vstore.cl
+synchronization/work_group_barrier.cl
+workitem/get_enqueued_local_size.cl
workitem/get_global_id.cl
workitem/get_global_size.cl
diff --git a/libclc/opencl/lib/generic/async/wait_group_events.cl b/libclc/opencl/lib/generic/async/wait_group_events.cl
index 0881a74bd904..76a9ee38bb89 100644
--- a/libclc/opencl/lib/generic/async/wait_group_events.cl
+++ b/libclc/opencl/lib/generic/async/wait_group_events.cl
@@ -12,5 +12,5 @@ _CLC_DEF _CLC_OVERLOAD void wait_group_events(int num_events,
event_t *event_list) {
(void)num_events;
(void)event_list;
- barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
+ work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
}
diff --git a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
new file mode 100644
index 000000000000..14de313c4f58
--- /dev/null
+++ b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/opencl/synchronization/utils.h"
+#include "clc/opencl/utils.h"
+#include "clc/synchronization/clc_work_group_barrier.h"
+
+_CLC_DEF _CLC_OVERLOAD void work_group_barrier(cl_mem_fence_flags flags,
+ memory_scope scope) {
+ int memory_order = __ATOMIC_SEQ_CST;
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope), memory_order,
+ memory_semantics);
+}
+
+_CLC_DEF _CLC_OVERLOAD void work_group_barrier(cl_mem_fence_flags flags) {
+ work_group_barrier(flags, memory_scope_work_group);
+}
+
+_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
+ work_group_barrier(flags);
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_local_size.cl b/libclc/opencl/lib/generic/workitem/get_enqueued_local_size.cl
new file mode 100644
index 000000000000..416a3e9837cd
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_enqueued_local_size.cl
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/opencl/opencl-base.h>
+#include <clc/workitem/clc_get_enqueued_local_size.h>
+
+_CLC_DEF _CLC_OVERLOAD size_t get_enqueued_local_size(uint dim) {
+ return __clc_get_enqueued_local_size(dim);
+}
diff --git a/libclc/opencl/lib/ptx-nvidiacl/SOURCES b/libclc/opencl/lib/ptx-nvidiacl/SOURCES
index eb28570a617a..eb64360fece7 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/SOURCES
+++ b/libclc/opencl/lib/ptx-nvidiacl/SOURCES
@@ -1,5 +1,4 @@
mem_fence/fence.cl
-synchronization/barrier.cl
workitem/get_global_id.cl
workitem/get_group_id.cl
workitem/get_local_id.cl
diff --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
deleted file mode 100644
index 9f67b6ebcb6d..000000000000
--- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/opencl/synchronization/utils.h>
-#include <clc/synchronization/clc_work_group_barrier.h>
-
-_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
- int memory_scope = __opencl_get_memory_scope(flags);
- int memory_order = __ATOMIC_SEQ_CST;
- __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
- __clc_work_group_barrier(memory_scope, memory_order, memory_semantics);
-}
diff --git a/libcxx/test/libcxx/strings/basic.string/string.cons/constexpr_initialization_stress.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.cons/constexpr_initialization_stress.pass.cpp
index 19342168a72f..4ed74027ed8f 100644
--- a/libcxx/test/libcxx/strings/basic.string/string.cons/constexpr_initialization_stress.pass.cpp
+++ b/libcxx/test/libcxx/strings/basic.string/string.cons/constexpr_initialization_stress.pass.cpp
@@ -14,9 +14,6 @@
// Stress test for constexpr std::string initialization.
// This test ensures that we can handle a large number of constexpr strings.
-// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=1000000
-// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=1000000
-
#include <string>
#include <array>
#include <cassert>
diff --git a/lldb/docs/use/variable.rst b/lldb/docs/use/variable.rst
index 73df62dcd1b7..6a258dfcd342 100644
--- a/lldb/docs/use/variable.rst
+++ b/lldb/docs/use/variable.rst
@@ -958,7 +958,8 @@ be implemented by the Python class):
def get_child_index(self, name: str) -> int:
"""
This call should return the index of the synthetic child whose name is
- given as the argument.
+ given as the argument. Array subscripting, names in the form "[N]", is
+ automatically supported.
Return -1 if there is no child at the index.
"""
diff --git a/lldb/include/lldb/DataFormatters/TypeSynthetic.h b/lldb/include/lldb/DataFormatters/TypeSynthetic.h
index fbf1d060a92b..d1e6efb79303 100644
--- a/lldb/include/lldb/DataFormatters/TypeSynthetic.h
+++ b/lldb/include/lldb/DataFormatters/TypeSynthetic.h
@@ -47,7 +47,14 @@ public:
virtual lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) = 0;
- virtual llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) = 0;
+ /// Determine the index of a named child. Subscript names ("[N]") are, by
+ /// default, handled automatically. For data types which need custom
+ /// subscripting behavior - for example a sparse array, disable automatic
+ /// subscripting with TypeOptions::eTypeOptionCustomSubscripting.
+ virtual llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) {
+ return llvm::createStringError("Type has no child named '%s'",
+ name.AsCString());
+ }
/// This function is assumed to always succeed and if it fails, the front-end
/// should know to deal with it in the correct way (most probably, by refusing
@@ -223,6 +230,18 @@ public:
return *this;
}
+ bool GetCustomSubscripting() const {
+ return m_flags & lldb::eTypeOptionCustomSubscripting;
+ }
+
+ Flags &SetCustomSubscripting(bool value = true) {
+ if (value)
+ m_flags |= lldb::eTypeOptionCustomSubscripting;
+ else
+ m_flags &= ~lldb::eTypeOptionCustomSubscripting;
+ return *this;
+ }
+
uint32_t GetValue() { return m_flags; }
void SetValue(uint32_t value) { m_flags = value; }
@@ -245,6 +264,8 @@ public:
bool WantsDereference() const { return m_flags.GetFrontEndWantsDereference();}
+ bool CustomSubscripting() const { return m_flags.GetCustomSubscripting(); }
+
void SetCascades(bool value) { m_flags.SetCascades(value); }
void SetSkipsPointers(bool value) { m_flags.SetSkipPointers(value); }
diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h
index 7ebcb2214e0e..d2600d0a6ce4 100644
--- a/lldb/include/lldb/lldb-enumerations.h
+++ b/lldb/include/lldb/lldb-enumerations.h
@@ -926,7 +926,8 @@ FLAGS_ENUM(TypeOptions){eTypeOptionNone = (0u),
eTypeOptionHideNames = (1u << 6),
eTypeOptionNonCacheable = (1u << 7),
eTypeOptionHideEmptyAggregates = (1u << 8),
- eTypeOptionFrontEndWantsDereference = (1u << 9)};
+ eTypeOptionFrontEndWantsDereference = (1u << 9),
+ eTypeOptionCustomSubscripting = (1u << 10)};
/// This is the return value for frame comparisons. If you are comparing frame
/// A to frame B the following cases arise:
diff --git a/lldb/source/DataFormatters/VectorType.cpp b/lldb/source/DataFormatters/VectorType.cpp
index c2355fbfdcb2..624f9de312bb 100644
--- a/lldb/source/DataFormatters/VectorType.cpp
+++ b/lldb/source/DataFormatters/VectorType.cpp
@@ -271,19 +271,6 @@ public:
return lldb::ChildCacheState::eRefetch;
}
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
- }
-
private:
lldb::Format m_parent_format = eFormatInvalid;
lldb::Format m_item_format = eFormatInvalid;
diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
index f89b0fe6b6db..4e9b1389a11a 100644
--- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
@@ -1561,7 +1561,8 @@ void DynamicLoaderDarwinKernel::PrivateInitialize(Process *process) {
}
void DynamicLoaderDarwinKernel::SetNotificationBreakpointIfNeeded() {
- if (m_break_id == LLDB_INVALID_BREAK_ID && m_kernel.GetModule()) {
+ if (m_break_id == LLDB_INVALID_BREAK_ID && m_kernel.GetModule() &&
+ m_process->IsLiveDebugSession()) {
DEBUG_PRINTF("DynamicLoaderDarwinKernel::%s() process state = %s\n",
__FUNCTION__, StateAsCString(m_process->GetState()));
diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
index f2521ec75087..1d522f0b4a72 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
@@ -28,15 +28,6 @@ public:
GenericBitsetFrontEnd(ValueObject &valobj, StdLib stdlib);
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
- }
-
lldb::ChildCacheState Update() override;
llvm::Expected<uint32_t> CalculateNumChildren() override {
return m_elements.size();
diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericList.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericList.cpp
index b6ff4477a890..841d3b2220df 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/GenericList.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/GenericList.cpp
@@ -124,14 +124,6 @@ private:
template <StlType Stl>
class AbstractListFrontEnd : public SyntheticChildrenFrontEnd {
public:
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
- }
lldb::ChildCacheState Update() override;
protected:
diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp
index 7fc6eb55d4e3..e4c261a5411f 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp
@@ -41,12 +41,8 @@ public:
llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override {
if (name == "$$dereference$$")
return 0;
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
+ return llvm::createStringError("Type has no child named '%s'",
+ name.AsCString());
}
llvm::Expected<uint32_t> CalculateNumChildren() override {
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp
index 9061be2e4014..c40223f5845a 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp
@@ -197,8 +197,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
llvm::Expected<uint32_t>
CalculateNumChildrenForOldCompressedPairLayout(ValueObject &pair);
@@ -390,16 +388,6 @@ lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::Update() {
return lldb::ChildCacheState::eRefetch;
}
-llvm::Expected<size_t> lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::
- GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
-}
-
SyntheticChildrenFrontEnd *
lldb_private::formatters::LibcxxStdMapSyntheticFrontEndCreator(
CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) {
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp
index ebc6d92aabe0..3e4093509b6b 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp
@@ -20,15 +20,6 @@ public:
Update();
}
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
- }
-
lldb::ChildCacheState Update() override;
llvm::Expected<uint32_t> CalculateNumChildren() override {
return m_elements.size();
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
index fd8411ba0e56..34eed108d850 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
@@ -40,8 +40,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
CompilerType GetNodeType();
CompilerType GetElementType(CompilerType table_type);
@@ -285,17 +283,6 @@ lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd::Update() {
return lldb::ChildCacheState::eRefetch;
}
-llvm::Expected<size_t>
-lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd::
- GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
-}
-
SyntheticChildrenFrontEnd *
lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEndCreator(
CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) {
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp
index 30fec4e2dde0..2855fc1e0512 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp
@@ -202,15 +202,6 @@ public:
Update();
}
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
- }
-
lldb::ChildCacheState Update() override;
llvm::Expected<uint32_t> CalculateNumChildren() override { return m_size; }
ValueObjectSP GetChildAtIndex(uint32_t idx) override;
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp
index 076bbbb87448..7ef12f06d96d 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp
@@ -32,8 +32,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
// The lifetime of a ValueObject and all its derivative ValueObjects
// (children, clones, etc.) is managed by a ClusterManager. These
@@ -98,16 +96,6 @@ LibStdcppTupleSyntheticFrontEnd::CalculateNumChildren() {
return m_members.size();
}
-llvm::Expected<size_t>
-LibStdcppTupleSyntheticFrontEnd::GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
-}
-
SyntheticChildrenFrontEnd *
lldb_private::formatters::LibStdcppTupleSyntheticFrontEndCreator(
CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) {
diff --git a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTree.cpp b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTree.cpp
index 566f92c39b1d..3cd88eebc56b 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTree.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTree.cpp
@@ -187,8 +187,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
/// Returns the ValueObject for the _Tree_node at index \ref idx.
///
@@ -335,17 +333,6 @@ lldb_private::formatters::MsvcStlTreeSyntheticFrontEnd::Update() {
return lldb::ChildCacheState::eRefetch;
}
-llvm::Expected<size_t>
-lldb_private::formatters::MsvcStlTreeSyntheticFrontEnd::GetIndexOfChildWithName(
- ConstString name) {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
-}
-
lldb::ChildCacheState MsvcStlTreeIterSyntheticFrontEnd::Update() {
m_inner_sp = nullptr;
ValueObjectSP node_sp = m_backend.GetChildMemberWithName("_Ptr");
diff --git a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTuple.cpp
index fe20b4c141a6..fd133550e00b 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTuple.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlTuple.cpp
@@ -20,15 +20,6 @@ public:
Update();
}
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
- }
-
lldb::ChildCacheState Update() override;
llvm::Expected<uint32_t> CalculateNumChildren() override {
return m_elements.size();
diff --git a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlVariant.cpp b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlVariant.cpp
index 55e964256264..de6216d8a431 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlVariant.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlVariant.cpp
@@ -147,15 +147,6 @@ public:
Update();
}
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override {
- auto optional_idx = formatters::ExtractIndexFromString(name.GetCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- return *optional_idx;
- }
-
lldb::ChildCacheState Update() override;
llvm::Expected<uint32_t> CalculateNumChildren() override { return m_size; }
ValueObjectSP GetChildAtIndex(uint32_t idx) override;
diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp
index 25376e064879..b1dc9ff7e48b 100644
--- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp
@@ -56,8 +56,6 @@ public:
lldb::ChildCacheState Update() override = 0;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
protected:
virtual lldb::addr_t GetDataAddress() = 0;
@@ -218,8 +216,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
ExecutionContextRef m_exe_ctx_ref;
uint8_t m_ptr_size = 8;
@@ -526,20 +522,6 @@ lldb_private::formatters::GenericNSArrayMSyntheticFrontEnd<D32, D64>::Update() {
: lldb::ChildCacheState::eRefetch;
}
-llvm::Expected<size_t> lldb_private::formatters::NSArrayMSyntheticFrontEndBase::
- GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
template <typename D32, typename D64>
lldb_private::formatters::GenericNSArrayMSyntheticFrontEnd<D32, D64>::
GenericNSArrayMSyntheticFrontEnd::~GenericNSArrayMSyntheticFrontEnd() {
@@ -616,22 +598,6 @@ lldb_private::formatters::GenericNSArrayISyntheticFrontEnd<D32, D64, Inline>::
}
template <typename D32, typename D64, bool Inline>
-llvm::Expected<size_t>
-lldb_private::formatters::GenericNSArrayISyntheticFrontEnd<
- D32, D64, Inline>::GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
-template <typename D32, typename D64, bool Inline>
llvm::Expected<uint32_t>
lldb_private::formatters::GenericNSArrayISyntheticFrontEnd<
D32, D64, Inline>::CalculateNumChildren() {
diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
index 4ff8f36adff8..2d72e913192a 100644
--- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
@@ -109,8 +109,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
struct DataDescriptor_32 {
uint32_t _used : 26;
@@ -148,8 +146,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
ExecutionContextRef m_exe_ctx_ref;
CompilerType m_pair_type;
@@ -178,8 +174,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
struct DictionaryItemDescriptor {
lldb::addr_t key_ptr;
@@ -228,8 +222,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
struct DictionaryItemDescriptor {
lldb::addr_t key_ptr;
@@ -259,8 +251,6 @@ namespace Foundation1100 {
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
struct DataDescriptor_32 {
uint32_t _used : 26;
@@ -585,20 +575,6 @@ lldb_private::formatters::NSDictionaryISyntheticFrontEnd::
m_data_64 = nullptr;
}
-llvm::Expected<size_t> lldb_private::formatters::
- NSDictionaryISyntheticFrontEnd::GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
llvm::Expected<uint32_t> lldb_private::formatters::
NSDictionaryISyntheticFrontEnd::CalculateNumChildren() {
if (!m_data_32 && !m_data_64)
@@ -723,20 +699,6 @@ lldb_private::formatters::NSCFDictionarySyntheticFrontEnd::
: SyntheticChildrenFrontEnd(*valobj_sp), m_exe_ctx_ref(), m_hashtable(),
m_pair_type() {}
-llvm::Expected<size_t> lldb_private::formatters::
- NSCFDictionarySyntheticFrontEnd::GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
llvm::Expected<uint32_t> lldb_private::formatters::
NSCFDictionarySyntheticFrontEnd::CalculateNumChildren() {
if (!m_hashtable.IsValid())
@@ -859,21 +821,6 @@ lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd::
NSConstantDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp)
: SyntheticChildrenFrontEnd(*valobj_sp) {}
-llvm::Expected<size_t>
-lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd::
- GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
llvm::Expected<uint32_t> lldb_private::formatters::
NSConstantDictionarySyntheticFrontEnd::CalculateNumChildren() {
return m_size;
@@ -1064,22 +1011,6 @@ lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd<
}
template <typename D32, typename D64>
-llvm::Expected<size_t>
-lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd<
- D32, D64>::GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
-template <typename D32, typename D64>
llvm::Expected<uint32_t>
lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd<
D32, D64>::CalculateNumChildren() {
@@ -1227,20 +1158,6 @@ lldb_private::formatters::Foundation1100::
m_data_64 = nullptr;
}
-llvm::Expected<size_t> lldb_private::formatters::Foundation1100::
- NSDictionaryMSyntheticFrontEnd::GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
llvm::Expected<uint32_t> lldb_private::formatters::Foundation1100::
NSDictionaryMSyntheticFrontEnd::CalculateNumChildren() {
if (!m_data_32 && !m_data_64)
diff --git a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp
index b5360195e91d..23f711931f95 100644
--- a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp
@@ -126,19 +126,6 @@ public:
bool MightHaveChildren() override { return m_impl.m_mode != Mode::Invalid; }
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
- }
-
lldb::ValueObjectSP GetSyntheticValue() override { return nullptr; }
protected:
diff --git a/lldb/source/Plugins/Language/ObjC/NSSet.cpp b/lldb/source/Plugins/Language/ObjC/NSSet.cpp
index 150b23350712..44af668759f9 100644
--- a/lldb/source/Plugins/Language/ObjC/NSSet.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSSet.cpp
@@ -52,8 +52,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
struct DataDescriptor_32 {
uint32_t _used : 26;
@@ -88,8 +86,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
struct SetItemDescriptor {
lldb::addr_t item_ptr;
@@ -119,8 +115,6 @@ public:
lldb::ChildCacheState Update() override;
- llvm::Expected<size_t> GetIndexOfChildWithName(ConstString name) override;
-
private:
struct SetItemDescriptor {
@@ -386,21 +380,6 @@ lldb_private::formatters::NSSetISyntheticFrontEnd::~NSSetISyntheticFrontEnd() {
m_data_64 = nullptr;
}
-llvm::Expected<size_t>
-lldb_private::formatters::NSSetISyntheticFrontEnd::GetIndexOfChildWithName(
- ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
llvm::Expected<uint32_t>
lldb_private::formatters::NSSetISyntheticFrontEnd::CalculateNumChildren() {
if (!m_data_32 && !m_data_64)
@@ -522,21 +501,6 @@ lldb_private::formatters::NSCFSetSyntheticFrontEnd::NSCFSetSyntheticFrontEnd(
: SyntheticChildrenFrontEnd(*valobj_sp), m_exe_ctx_ref(), m_hashtable(),
m_pair_type() {}
-llvm::Expected<size_t>
-lldb_private::formatters::NSCFSetSyntheticFrontEnd::GetIndexOfChildWithName(
- ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
llvm::Expected<uint32_t>
lldb_private::formatters::NSCFSetSyntheticFrontEnd::CalculateNumChildren() {
if (!m_hashtable.IsValid())
@@ -662,21 +626,6 @@ lldb_private::formatters::GenericNSSetMSyntheticFrontEnd<D32, D64>::
}
template <typename D32, typename D64>
-llvm::Expected<size_t> lldb_private::formatters::GenericNSSetMSyntheticFrontEnd<
- D32, D64>::GetIndexOfChildWithName(ConstString name) {
- auto optional_idx = ExtractIndexFromString(name.AsCString());
- if (!optional_idx) {
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- }
- uint32_t idx = *optional_idx;
- if (idx >= CalculateNumChildrenIgnoringErrors())
- return llvm::createStringError("Type has no child named '%s'",
- name.AsCString());
- return idx;
-}
-
-template <typename D32, typename D64>
llvm::Expected<uint32_t>
lldb_private::formatters::GenericNSSetMSyntheticFrontEnd<
D32, D64>::CalculateNumChildren() {
diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86.h
index 51a53c749dfe..cd519691eb5a 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86.h
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86.h
@@ -33,12 +33,11 @@ namespace process_freebsd {
class NativeProcessFreeBSD;
-class NativeRegisterContextFreeBSD_x86_64
- : public NativeRegisterContextFreeBSD,
- public NativeRegisterContextDBReg_x86 {
+class NativeRegisterContextFreeBSD_x86 : public NativeRegisterContextFreeBSD,
+ public NativeRegisterContextDBReg_x86 {
public:
- NativeRegisterContextFreeBSD_x86_64(const ArchSpec &target_arch,
- NativeThreadFreeBSD &native_thread);
+ NativeRegisterContextFreeBSD_x86(const ArchSpec &target_arch,
+ NativeThreadFreeBSD &native_thread);
uint32_t GetRegisterSetCount() const override;
const RegisterSet *GetRegisterSet(uint32_t set_index) const override;
diff --git a/lldb/source/ValueObject/ValueObjectSynthetic.cpp b/lldb/source/ValueObject/ValueObjectSynthetic.cpp
index 44e53bd5fd82..b0e67df5541e 100644
--- a/lldb/source/ValueObject/ValueObjectSynthetic.cpp
+++ b/lldb/source/ValueObject/ValueObjectSynthetic.cpp
@@ -9,6 +9,7 @@
#include "lldb/ValueObject/ValueObjectSynthetic.h"
#include "lldb/Core/Value.h"
+#include "lldb/DataFormatters/FormattersHelpers.h"
#include "lldb/DataFormatters/TypeSynthetic.h"
#include "lldb/Target/ExecutionContext.h"
#include "lldb/Utility/ConstString.h"
@@ -18,6 +19,7 @@
#include "lldb/ValueObject/ValueObject.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Error.h"
#include <optional>
namespace lldb_private {
@@ -344,12 +346,30 @@ ValueObjectSynthetic::GetIndexOfChildWithName(llvm::StringRef name_ref) {
}
if (!found_index && m_synth_filter_up != nullptr) {
- auto index_or_err = m_synth_filter_up->GetIndexOfChildWithName(name);
- if (!index_or_err)
- return index_or_err.takeError();
+ size_t index = SIZE_MAX;
+ if (auto index_or_err = m_synth_filter_up->GetIndexOfChildWithName(name)) {
+ index = *index_or_err;
+ } else if (!m_synth_sp->CustomSubscripting()) {
+ // Provide automatic support for subscript child names ("[N]").
+ auto maybe_index = formatters::ExtractIndexFromString(name.GetCString());
+ if (!maybe_index)
+ // The child name was not of the form "[N]", return the original error.
+ return index_or_err.takeError();
+
+ // Subscripting succeeded, ignore the original error.
+ llvm::consumeError(index_or_err.takeError());
+ index = *maybe_index;
+
+ // Prevent unnecessary work by limiting max to one past the index.
+ uint32_t max = index + 1;
+ auto num_children = GetNumChildrenIgnoringErrors(max);
+ if (index >= num_children)
+ return llvm::createStringError("Subscript index out of range: %zu",
+ index);
+ }
std::lock_guard<std::mutex> guard(m_child_mutex);
- m_name_toindex[name.GetCString()] = *index_or_err;
- return *index_or_err;
+ m_name_toindex[name.GetCString()] = index;
+ return index;
} else if (!found_index && m_synth_filter_up == nullptr) {
return llvm::createStringError("Type has no child named '%s'",
name.AsCString());
diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/myArraySynthProvider.py b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/myArraySynthProvider.py
index 167899bd3907..7549128d9b64 100644
--- a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/myArraySynthProvider.py
+++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/myArraySynthProvider.py
@@ -25,7 +25,7 @@ class myArraySynthProvider:
if name == "[0]":
return 0
if name == "[1]":
- return
+ return 1
if name == "[2]":
return 2
if name == "[3]":
diff --git a/lldb/test/API/functionalities/data-formatter/synthetic_subscript/Makefile b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/Makefile
new file mode 100644
index 000000000000..c9319d6e6888
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/Makefile
@@ -0,0 +1,2 @@
+C_SOURCES := main.c
+include Makefile.rules
diff --git a/lldb/test/API/functionalities/data-formatter/synthetic_subscript/TestSyntheticSubscript.py b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/TestSyntheticSubscript.py
new file mode 100644
index 000000000000..be34b0f1f72d
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/TestSyntheticSubscript.py
@@ -0,0 +1,23 @@
+import lldb
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import *
+from lldbsuite.test import lldbutil
+
+
+class TestCase(TestBase):
+ def test(self):
+ self.build()
+ _, process, _, _ = lldbutil.run_to_source_breakpoint(
+ self, "break here", lldb.SBFileSpec("main.c")
+ )
+ self.runCmd("command script import thing_formatter.py")
+ frame = process.selected_thread.selected_frame
+ x = frame.var("x")
+ names = ("zero", "one")
+ for i in range(x.num_children):
+ idx = x.GetIndexOfChildWithName(f"[{i}]")
+ self.assertEqual(idx, i)
+ child = x.GetChildAtIndex(idx)
+ self.assertEqual(child.name, names[idx])
+ idx = x.GetIndexOfChildWithName(f"[{x.num_children + 1}]")
+ self.assertEqual(idx, lldb.UINT32_MAX)
diff --git a/lldb/test/API/functionalities/data-formatter/synthetic_subscript/main.c b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/main.c
new file mode 100644
index 000000000000..ca0da120a7c0
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/main.c
@@ -0,0 +1,12 @@
+struct Thing {
+ int zero;
+ int one;
+};
+
+int main() {
+ struct Thing x;
+ x.zero = 1;
+ x.one = 2;
+ __builtin_printf("break here\n");
+ return 0;
+}
diff --git a/lldb/test/API/functionalities/data-formatter/synthetic_subscript/thing_formatter.py b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/thing_formatter.py
new file mode 100644
index 000000000000..0027f0ba0be6
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/synthetic_subscript/thing_formatter.py
@@ -0,0 +1,15 @@
+class ThingSynthetic:
+ def __init__(self, valobj, _) -> None:
+ self.valobj = valobj
+
+ def num_children(self):
+ return self.valobj.num_children
+
+ def get_child_at_index(self, idx):
+ return self.valobj.GetChildAtIndex(idx)
+
+ # Use default implementation of get_child_index.
+
+
+def __lldb_init_module(dbg, _):
+ dbg.HandleCommand(f"type synthetic add -l {__name__}.ThingSynthetic Thing")
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Inputs/FormatterBytecode/formatter.py b/lldb/test/Shell/ScriptInterpreter/Python/Inputs/FormatterBytecode/formatter.py
index cc8778bd51c7..3740ce8c451f 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Inputs/FormatterBytecode/formatter.py
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Inputs/FormatterBytecode/formatter.py
@@ -9,6 +9,8 @@ import lldb
def __lldb_init_module(debugger, internal_dict):
+ debugger.HandleCommand("type category define --enabled llvm")
+
debugger.HandleCommand(
"type synthetic add -w llvm "
f"-l {__name__}.MyOptionalSynthProvider "
@@ -55,8 +57,8 @@ def stringify(bytecode: bytearray) -> str:
return s
-def evaluate(assembler: str, data: list):
- bytecode = compile(assembler)
+def evaluate(assembly: str, data: list):
+ bytecode = assemble(assembly)
trace = True
if trace:
print(
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/bytecode.test b/lldb/test/Shell/ScriptInterpreter/Python/bytecode.test
index 746f0197a1b5..863964477287 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/bytecode.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/bytecode.test
@@ -3,10 +3,10 @@
# RUN: %clang_host -std=c++17 -g %S/Inputs/FormatterBytecode/MyOptional.cpp -o %t.exe
# RUN: %lldb %t.exe -o "command script import %S/../../../../examples/python/formatter_bytecode.py" -o "command script import %S/Inputs/FormatterBytecode/formatter.py" -o "b -p here" -o "r" -o "v x" -o "v y" -o q | FileCheck %s --check-prefix=OPTIONAL
# OPTIONAL: (lldb) v x
-# OPTIONAL: (MyOptional<int>) x = {
+# OPTIONAL: (MyOptional<int>) x = None {
# OPTIONAL: hasVal = false
# OPTIONAL: }
# OPTIONAL: (lldb) v y
-# OPTIONAL: (MyOptional<int>) y = {
+# OPTIONAL: (MyOptional<int>) y = (int) value = 42 {
# OPTIONAL: Storage = (value = 42, hasVal = true)
# OPTIONAL: }
diff --git a/llvm/docs/AIToolPolicy.md b/llvm/docs/AIToolPolicy.md
index 2a3ff9345db7..c7576327b326 100644
--- a/llvm/docs/AIToolPolicy.md
+++ b/llvm/docs/AIToolPolicy.md
@@ -143,6 +143,18 @@ contributors are responsible for ensuring that such material does not appear in
their contributions. Contributions found to violate this policy will be removed
just like any other offending contribution.
+## Exceptions
+
+We have one exception to this policy for the Bazel-fixer bot. The project
+council approved [this RFC][bazel-rfc] proposing to use a combination of
+[dwyu][dwyu] and LLMs to maintain the Bazel build files.
+
+[bazel-rfc]: https://discourse.llvm.org/t/rfc-ai-assisted-bazel-fixer-bot/89178/93
+[dwyu]: https://github.com/hzeller/bant?tab=readme-ov-file#dwyu--depend-on-what-you-use
+
+Any future exception will be considered individually on its own merits as to
+whether it is useful to the project or extracts work from maintainers.
+
## Examples
Here are some examples of contributions that demonstrate how to apply
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h b/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h
index 88371b557fe7..6c506d7eb5b7 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h
@@ -88,6 +88,15 @@ public:
using ElementSet = DenseSet<ElementId>;
using ContainerElementsMap = DenseMap<ContainerId, ElementSet>;
+ class SuperNode;
+
+private:
+ using ElemToSuperNodeMap =
+ DenseMap<ContainerId, DenseMap<ElementId, SuperNode *>>;
+
+ using SuperNodeDepsMap = DenseMap<SuperNode *, DenseSet<SuperNode *>>;
+
+public:
class SuperNode {
friend class WaitingOnGraph;
friend class WaitingOnGraphTest;
@@ -103,14 +112,112 @@ public:
private:
ContainerElementsMap Defs;
ContainerElementsMap Deps;
- };
-private:
- using ElemToSuperNodeMap =
- DenseMap<ContainerId, DenseMap<ElementId, SuperNode *>>;
+ ElemToSuperNodeMap *RegisteredElemToSN = nullptr;
- using SuperNodeDepsMap = DenseMap<SuperNode *, DenseSet<SuperNode *>>;
+ /// Add a mapping from the Defs in this SuperNode to SN (which may or may
+ /// not be the same as this).
+ void mapDefsTo(ElemToSuperNodeMap &ElemToSN, SuperNode *SN,
+ bool AbandonOldMapping = false) {
+ assert(!Defs.empty() && "Empty defs!?");
+ for (auto &[Container, Elements] : Defs) {
+ assert(!Elements.empty() && "Empty elements for container?");
+ auto &ContainerElemToSN = ElemToSN[Container];
+ for (auto &Elem : Elements)
+ ContainerElemToSN[Elem] = SN;
+ }
+ assert((AbandonOldMapping || !SN->RegisteredElemToSN ||
+ SN->RegisteredElemToSN == &ElemToSN) &&
+ "SN defs split across maps");
+ SN->RegisteredElemToSN = &ElemToSN;
+ }
+
+ /// Add a mapping from the Defs in this SuperNode to this.
+ /// (Equivalent to `SN.mapDefsTo(ElemToSN, &SN);`)
+ void mapDefsToThis(ElemToSuperNodeMap &ElemToSN,
+ bool AbandonOldMapping = false) {
+ mapDefsTo(ElemToSN, this, AbandonOldMapping);
+ }
+
+ /// Remove a mapping from the Defs in this SuperNode from the registered
+ /// ElemToSuperNodeMap. The mapping must already exist.
+ void unmapDefsFromThis() {
+ assert(RegisteredElemToSN && "No registered ElemToSuperNodeMap");
+ for (auto &[Container, Elements] : Defs) {
+ auto I = RegisteredElemToSN->find(Container);
+ assert(I != RegisteredElemToSN->end() && "Container not in map");
+ auto &ContainerElemToSN = I->second;
+ for (auto &Elem : Elements) {
+ assert(ContainerElemToSN[Elem] == this && "Mapping not present");
+ ContainerElemToSN.erase(Elem);
+ }
+ if (ContainerElemToSN.empty())
+ RegisteredElemToSN->erase(I);
+ }
+ RegisteredElemToSN = nullptr;
+ }
+
+ /// For all Defs of this node that are defined by some node in ElemToSN,
+ /// remove the Def from this map and add this SuperNode to the list of
+ /// dependants of the defining node.
+ ///
+ /// Returns true if SuperNodeDeps was changed.
+ bool hoistDeps(SuperNodeDepsMap &SuperNodeDeps,
+ ElemToSuperNodeMap &ElemToSN) {
+ bool Changed = false;
+
+ SmallVector<ContainerId> ContainersToRemove;
+ for (auto &[DepContainer, DepElems] : Deps) {
+ auto I = ElemToSN.find(DepContainer);
+ if (I == ElemToSN.end())
+ continue;
+ auto &ContainerElemToSN = I->second;
+
+ // ElemToSN includes SuperNodes that define elements in DepContainer.
+ // We need to iterate over ContainerElemToSN or DepElems: we pick the
+ // smaller to minimize the cost.
+ if (ContainerElemToSN.size() < DepElems.size()) {
+ for (auto &[DefElem, DefSN] : ContainerElemToSN) {
+ if (DepElems.erase(DefElem) && DefSN != this) {
+ Changed = true;
+ SuperNodeDeps[DefSN].insert(this);
+ }
+ }
+ } else {
+ SmallVector<ElementId> ElemsToRemove;
+ for (auto &DepElem : DepElems) {
+ auto J = ContainerElemToSN.find(DepElem);
+ if (J == ContainerElemToSN.end())
+ continue;
+ ElemsToRemove.push_back(DepElem);
+ SuperNode *DefSN = J->second;
+ if (DefSN != this) {
+ Changed = true;
+ SuperNodeDeps[DefSN].insert(this);
+ }
+ }
+
+ for (auto &DepElem : ElemsToRemove)
+ DepElems.erase(DepElem);
+ }
+
+ // If DepElems has become empty then add DepContainer to the list of
+ // containers to remove.
+ if (DepElems.empty())
+ ContainersToRemove.push_back(DepContainer);
+ }
+
+ for (auto &DepContainer : ContainersToRemove) {
+ assert(Deps.count(DepContainer) && "already removed?");
+ assert(Deps[DepContainer].empty() && "non empty?");
+ Deps.erase(DepContainer);
+ }
+ return Changed;
+ }
+ };
+
+private:
class Coalescer {
public:
std::unique_ptr<SuperNode> addOrCreateSuperNode(ContainerElementsMap Defs,
@@ -136,19 +243,17 @@ private:
}
void coalesce(std::vector<std::unique_ptr<SuperNode>> &SNs,
- ElemToSuperNodeMap &ElemToSN) {
+ ElemToSuperNodeMap &ElemToSN,
+ bool AbandonOldMapping = false) {
for (size_t I = 0; I != SNs.size();) {
auto &SN = SNs[I];
assert(!SNHashes.count(SN.get()) &&
"Elements of SNs should be new to the coalescer");
auto H = getHash(SN->Deps);
if (auto *CanonicalSN = findCanonicalSuperNode(H, SN->Deps)) {
- for (auto &[Container, Elems] : SN->Defs) {
+ SN->mapDefsTo(ElemToSN, CanonicalSN, AbandonOldMapping);
+ for (auto &[Container, Elems] : SN->Defs)
CanonicalSN->Defs[Container].insert(Elems.begin(), Elems.end());
- auto &ContainerElemToSN = ElemToSN[Container];
- for (auto &Elem : Elems)
- ContainerElemToSN[Elem] = CanonicalSN;
- }
std::swap(SN, SNs.back());
SNs.pop_back();
} else {
@@ -286,16 +391,11 @@ public:
static SimplifyResult simplify(std::vector<std::unique_ptr<SuperNode>> SNs) {
// Build ElemToSN map.
ElemToSuperNodeMap ElemToSN;
- for (auto &SN : SNs) {
- for (auto &[Container, Elements] : SN->Defs) {
- auto &ContainerElemToSN = ElemToSN[Container];
- for (auto &E : Elements)
- ContainerElemToSN[E] = SN.get();
- }
- }
+ for (auto &SN : SNs)
+ SN->mapDefsToThis(ElemToSN);
SuperNodeDepsMap SuperNodeDeps;
- hoistDeps(SuperNodeDeps, SNs, ElemToSN);
+ hoistDeps(SNs, SuperNodeDeps, ElemToSN);
propagateDeps(SuperNodeDeps);
// Pre-coalesce nodes.
@@ -324,25 +424,13 @@ public:
// First process any dependencies on nodes with external state.
auto FailedSNs = processExternalDeps(NewSNs, GetExternalState);
+ SuperNodeDepsMap SuperNodeDeps;
+
// Collect the PendingSNs whose dep sets are about to be modified.
std::vector<std::unique_ptr<SuperNode>> ModifiedPendingSNs;
for (size_t I = 0; I != PendingSNs.size();) {
auto &SN = PendingSNs[I];
- bool Remove = false;
- for (auto &[Container, Elems] : SN->Deps) {
- auto I = ElemToNewSN.find(Container);
- if (I == ElemToNewSN.end())
- continue;
- for (auto Elem : Elems) {
- if (I->second.contains(Elem)) {
- Remove = true;
- break;
- }
- }
- if (Remove)
- break;
- }
- if (Remove) {
+ if (SN->hoistDeps(SuperNodeDeps, ElemToNewSN)) {
ModifiedPendingSNs.push_back(std::move(SN));
std::swap(SN, PendingSNs.back());
PendingSNs.pop_back();
@@ -350,15 +438,11 @@ public:
++I;
}
- // Remove cycles from the graphs.
- SuperNodeDepsMap SuperNodeDeps;
- hoistDeps(SuperNodeDeps, ModifiedPendingSNs, ElemToNewSN);
-
- // If SN's deps are about to be modified then remove it from the coalescer.
+ // Remove SNs whose deps have been modified from the coalescer.
for (auto &SN : ModifiedPendingSNs)
CoalesceToPendingSNs.erase(SN.get());
- hoistDeps(SuperNodeDeps, NewSNs, ElemToPendingSN);
+ hoistDeps(NewSNs, SuperNodeDeps, ElemToPendingSN);
propagateDeps(SuperNodeDeps);
propagateFailures(FailedSNs, SuperNodeDeps);
@@ -372,7 +456,8 @@ public:
FailedSNs, nullptr);
CoalesceToPendingSNs.coalesce(ModifiedPendingSNs, ElemToPendingSN);
- CoalesceToPendingSNs.coalesce(NewSNs, ElemToPendingSN);
+ CoalesceToPendingSNs.coalesce(NewSNs, ElemToPendingSN,
+ /* AbandonOldMapping = */ true);
// Integrate remaining ModifiedPendingSNs and NewSNs into PendingSNs.
for (auto &SN : ModifiedPendingSNs)
@@ -380,11 +465,7 @@ public:
// Update ElemToPendingSN for the remaining elements.
for (auto &SN : NewSNs) {
- for (auto &[Container, Elems] : SN->Defs) {
- auto &Row = ElemToPendingSN[Container];
- for (auto &Elem : Elems)
- Row[Elem] = SN.get();
- }
+ SN->mapDefsToThis(ElemToPendingSN, /* AbandonOldMapping = */ true);
PendingSNs.push_back(std::move(SN));
}
@@ -514,57 +595,12 @@ public:
private:
// Replace individual dependencies with supernode dependencies.
- static void hoistDeps(SuperNodeDepsMap &SuperNodeDeps,
- std::vector<std::unique_ptr<SuperNode>> &SNs,
+ static void hoistDeps(std::vector<std::unique_ptr<SuperNode>> &SNs,
+ SuperNodeDepsMap &SuperNodeDeps,
ElemToSuperNodeMap &ElemToSN) {
// For all SNs...
- for (auto &SN : SNs) {
- SmallVector<ContainerId> ContainersToRemove;
- for (auto &[DepContainer, DepElems] : SN->Deps) {
-
- // Check ElemToSN to see if any other SuperNodes define elements in
- // DepContainer. If not then bail out early.
- auto I = ElemToSN.find(DepContainer);
- if (I == ElemToSN.end())
- continue;
- auto &ContainerElemToSN = I->second;
-
- // ElemToSN includes SuperNodes that define elements in DepContainer.
- // We need to iterate over ContainerElemToSN or DepElems: we pick the
- // smaller to minimize the cost.
- if (ContainerElemToSN.size() < DepElems.size()) {
- for (auto &[DefElem, DefSN] : ContainerElemToSN)
- if (DepElems.erase(DefElem) && DefSN != SN.get())
- SuperNodeDeps[DefSN].insert(SN.get());
- } else {
- SmallVector<ElementId> ElemsToRemove;
- for (auto &DepElem : DepElems) {
- auto J = ContainerElemToSN.find(DepElem);
- if (J == ContainerElemToSN.end())
- continue;
- ElemsToRemove.push_back(DepElem);
- SuperNode *DefSN = J->second;
- if (DefSN != SN.get())
- SuperNodeDeps[DefSN].insert(SN.get());
- }
-
- for (auto &DepElem : ElemsToRemove)
- DepElems.erase(DepElem);
- }
-
- // If DepElems has become empty then add DepContainer to the list of
- // containers to remove.
- if (DepElems.empty())
- ContainersToRemove.push_back(DepContainer);
- }
-
- // Remove any containers in SN->Deps that have become empty.
- for (auto &DepContainer : ContainersToRemove) {
- assert(SN->Deps.count(DepContainer) && "DepContainer already removed?");
- assert(SN->Deps[DepContainer].empty() && "DepContainer deps not empty");
- SN->Deps.erase(DepContainer);
- }
- }
+ for (auto &SN : SNs)
+ SN->hoistDeps(SuperNodeDeps, ElemToSN);
}
// Compute transitive closure of deps for each node.
@@ -692,13 +728,8 @@ private:
}
// Update ElemToSNs (if passed) to remove elements pointing at SN.
- for (auto *SN : ToRemoveFromElemToSNs) {
- for (auto &[Container, Elems] : SN->defs()) {
- auto &Row = (*ElemToSNs)[Container];
- for (auto &Elem : Elems)
- Row.erase(Elem);
- }
- }
+ for (auto *SN : ToRemoveFromElemToSNs)
+ SN->unmapDefsFromThis();
}
std::vector<std::unique_ptr<SuperNode>> PendingSNs;
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 6774a33556c0..e2b2feb92731 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -138,6 +138,18 @@ def int_dx_resource_samplecmplevelzero
llvm_float_ty, llvm_any_ty],
[IntrReadMem]>;
+def int_dx_resource_gather
+ : DefaultAttrsIntrinsic<[llvm_any_ty],
+ [llvm_any_ty, llvm_any_ty, llvm_any_ty,
+ llvm_i32_ty, llvm_any_ty],
+ [IntrReadMem]>;
+
+def int_dx_resource_gather_cmp
+ : DefaultAttrsIntrinsic<[llvm_any_ty],
+ [llvm_any_ty, llvm_any_ty, llvm_any_ty,
+ llvm_float_ty, llvm_i32_ty, llvm_any_ty],
+ [IntrReadMem]>;
+
// Cast between target extension handle types and dxil-style opaque handles
def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 065036cedc2a..8444675b847e 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -264,8 +264,6 @@ unsigned OptTable::internalFindNearest(
StringRef Option, std::string &NearestString, unsigned MinimumLength,
unsigned MaximumDistance,
std::function<bool(const Info &)> ExcludeOption) const {
- assert(!Option.empty());
-
// Consider each [option prefix + option name] pair as a candidate, finding
// the closest match.
unsigned BestDistance =
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index bc2dcdf9b591..bb0998064a26 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -305,8 +305,16 @@ def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $src0, (as_hw_round_mode $round))>;
let True16Predicate = UseRealTrue16Insts in
+def : GCNPat <(f16 (fptrunc_round (f32 (VOP3OpSelMods f32:$src0, i32:$src0_modifiers)), (i32 SupportedRoundMode:$round))),
+ (FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 $src0_modifiers, $src0, (as_hw_round_mode $round))>;
+
+let True16Predicate = NotUseRealTrue16Insts in
+def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 0))),
+ (V_CVT_PKRTZ_F16_F32_e32 $src0, (IMPLICIT_DEF))>;
+
+let True16Predicate = UseRealTrue16Insts in
def : GCNPat <(f16 (fptrunc_round (f32 (VOP3OpSelMods f32:$src0, i32:$src0_modifiers)), (i32 0))),
- (V_CVT_PKRTZ_F16_F32_e32 $src0_modifiers, $src0)>;
+ (EXTRACT_SUBREG (V_CVT_PKRTZ_F16_F32_e64 $src0_modifiers, $src0, 0, (IMPLICIT_DEF)), lo16)>;
def : GCNPat <(v2f16 (build_vector (f16 (fptrunc_round f32:$src0, (i32 0))),
(f16 (fptrunc_round f32:$src1, (i32 0))))),
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 08f3ac44e157..5c4b1f3a4bdc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8859,14 +8859,27 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::SRA:
if (Op.getSimpleValueType().isFixedLengthVector()) {
if (Subtarget.hasStdExtP()) {
- // We have patterns for scalar/immediate shift amount, so no lowering
- // needed.
- if (Op.getOperand(1)->getOpcode() == ISD::SPLAT_VECTOR)
- return Op;
-
// There's no vector-vector version of shift instruction in P extension
// so we need to unroll to scalar computation and pack them back.
- return DAG.UnrollVectorOp(Op.getNode());
+ if (Op.getOperand(1)->getOpcode() != ISD::SPLAT_VECTOR)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ unsigned Opc;
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case ISD::SHL:
+ Opc = RISCVISD::PSHL;
+ break;
+ case ISD::SRL:
+ Opc = RISCVISD::PSRL;
+ break;
+ case ISD::SRA:
+ Opc = RISCVISD::PSRA;
+ break;
+ }
+ return DAG.getNode(Opc, SDLoc(Op), Op.getValueType(), Op.getOperand(0),
+ Op.getOperand(1).getOperand(0));
}
return lowerToScalableOp(Op, DAG);
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index eb276ef17da7..6917a8576a23 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1381,7 +1381,7 @@ class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType,
(Inst GPR:$rs1, ImmType:$imm)>;
class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
: PatGprImm<OpNode, Inst, simm12_lo>;
-class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIShift Inst>
+class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIBase Inst>
: PatGprImm<OpNode, Inst, uimmlog2xlen>;
/// Predicates
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index f82ff91eecdb..2e8e4c9fd816 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1658,6 +1658,13 @@ def riscv_mulhr : RVSDNode<"MULHR", SDTIntBinOp>;
def riscv_mulhru : RVSDNode<"MULHRU", SDTIntBinOp>;
def riscv_mulhrsu : RVSDNode<"MULHRSU", SDTIntBinOp>;
+def STD_RISCVPackedShift : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, XLenVT>]>;
+def riscv_pshl : RVSDNode<"PSHL", STD_RISCVPackedShift>;
+def riscv_psrl : RVSDNode<"PSRL", STD_RISCVPackedShift>;
+def riscv_psra : RVSDNode<"PSRA", STD_RISCVPackedShift>;
+
// Bitwise merge: res = (~op0 & op1) | (op0 & op2)
def SDT_RISCVMERGE : SDTypeProfile<1, 3, [SDTCisInt<0>,
SDTCisSameAs<0, 1>,
@@ -1766,23 +1773,23 @@ let Predicates = [HasStdExtP] in {
def: Pat<(XLenVecI16VT (riscv_mulhrsu GPR:$rs1, GPR:$rs2)), (PMULHRSU_H GPR:$rs1, GPR:$rs2)>;
// 8-bit logical shift left/right patterns
- def: Pat<(XLenVecI8VT (shl GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))),
+ def: Pat<(XLenVecI8VT (riscv_pshl GPR:$rs1, uimm3:$shamt)),
(PSLLI_B GPR:$rs1, uimm3:$shamt)>;
- def: Pat<(XLenVecI8VT (srl GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))),
+ def: Pat<(XLenVecI8VT (riscv_psrl GPR:$rs1, uimm3:$shamt)),
(PSRLI_B GPR:$rs1, uimm3:$shamt)>;
// 16-bit logical shift left/right patterns
- def: Pat<(XLenVecI16VT (shl GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))),
+ def: Pat<(XLenVecI16VT (riscv_pshl GPR:$rs1, uimm4:$shamt)),
(PSLLI_H GPR:$rs1, uimm4:$shamt)>;
- def: Pat<(XLenVecI16VT (srl GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))),
+ def: Pat<(XLenVecI16VT (riscv_psrl GPR:$rs1, uimm4:$shamt)),
(PSRLI_H GPR:$rs1, uimm4:$shamt)>;
// 8-bit arithmetic shift right patterns
- def: Pat<(XLenVecI8VT (sra GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))),
+ def: Pat<(XLenVecI8VT (riscv_psra GPR:$rs1, uimm3:$shamt)),
(PSRAI_B GPR:$rs1, uimm3:$shamt)>;
// 16-bit arithmetic shift right patterns
- def: Pat<(XLenVecI16VT (sra GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))),
+ def: Pat<(XLenVecI16VT (riscv_psra GPR:$rs1, uimm4:$shamt)),
(PSRAI_H GPR:$rs1, uimm4:$shamt)>;
// 16-bit signed saturation shift left patterns
@@ -1790,29 +1797,23 @@ let Predicates = [HasStdExtP] in {
(PSSLAI_H GPR:$rs1, uimm4:$shamt)>;
// 8-bit logical shift left/right
- def: Pat<(XLenVecI8VT (shl GPR:$rs1,
- (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))),
+ def: Pat<(XLenVecI8VT (riscv_pshl GPR:$rs1, GPR:$rs2)),
(PSLL_BS GPR:$rs1, GPR:$rs2)>;
- def: Pat<(XLenVecI8VT (srl GPR:$rs1,
- (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))),
+ def: Pat<(XLenVecI8VT (riscv_psrl GPR:$rs1, GPR:$rs2)),
(PSRL_BS GPR:$rs1, GPR:$rs2)>;
// 8-bit arithmetic shift left/right
- def: Pat<(XLenVecI8VT (sra GPR:$rs1,
- (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))),
+ def: Pat<(XLenVecI8VT (riscv_psra GPR:$rs1, GPR:$rs2)),
(PSRA_BS GPR:$rs1, GPR:$rs2)>;
// 16-bit logical shift left/right
- def: Pat<(XLenVecI16VT (shl GPR:$rs1,
- (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))),
+ def: Pat<(XLenVecI16VT (riscv_pshl GPR:$rs1, GPR:$rs2)),
(PSLL_HS GPR:$rs1, GPR:$rs2)>;
- def: Pat<(XLenVecI16VT (srl GPR:$rs1,
- (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))),
+ def: Pat<(XLenVecI16VT (riscv_psrl GPR:$rs1, GPR:$rs2)),
(PSRL_HS GPR:$rs1, GPR:$rs2)>;
// 16-bit arithmetic shift left/right
- def: Pat<(XLenVecI16VT (sra GPR:$rs1,
- (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))),
+ def: Pat<(XLenVecI16VT (riscv_psra GPR:$rs1, GPR:$rs2)),
(PSRA_HS GPR:$rs1, GPR:$rs2)>;
// 8-bit PLI SD node pattern
@@ -1865,6 +1866,7 @@ let Predicates = [HasStdExtP, IsRV32] in {
def : PatGprGpr<uaddsat, SADDU>;
def : PatGprGpr<usubsat, SSUBU>;
def : PatGprGpr<sshlsat, SSHA>;
+ def : PatGprUimmLog2XLen<sshlsat, SSLAI>;
// Narrowing shift patterns (NSRL/NSRA)
// Immediate shift amount patterns
@@ -1972,14 +1974,28 @@ let Predicates = [HasStdExtP, IsRV64] in {
def: Pat<(v2i32 (mul GPR:$rs1, GPR:$rs2)),
(PACK (MUL_W00 GPR:$rs1, GPR:$rs2), (MUL_W11 GPR:$rs1, GPR:$rs2))>;
+ // 32-bit logical shift left/right patterns
+ def: Pat<(v2i32 (riscv_pshl GPR:$rs1, uimm5:$shamt)),
+ (PSLLI_W GPR:$rs1, uimm5:$shamt)>;
+ def: Pat<(v2i32 (riscv_psrl GPR:$rs1, uimm5:$shamt)),
+ (PSRLI_W GPR:$rs1, uimm5:$shamt)>;
+
+ // 32-bit arithmetic shift left/right patterns
+ def: Pat<(v2i32 (riscv_psra GPR:$rs1, uimm5:$shamt)),
+ (PSRAI_W GPR:$rs1, uimm5:$shamt)>;
+
+ // 32-bit signed saturation shift left patterns
+ def: Pat<(v2i32 (sshlsat GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
+ (PSSLAI_W GPR:$rs1, uimm5:$shamt)>;
+
// 32-bit logical shift left/right
- def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))),
+ def: Pat<(v2i32 (riscv_pshl GPR:$rs1, GPR:$rs2)),
(PSLL_WS GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i32 (srl GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))),
+ def: Pat<(v2i32 (riscv_psrl GPR:$rs1, GPR:$rs2)),
(PSRL_WS GPR:$rs1, GPR:$rs2)>;
// 32-bit arithmetic shift left/right
- def: Pat<(v2i32 (sra GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))),
+ def: Pat<(v2i32 (riscv_psra GPR:$rs1, GPR:$rs2)),
(PSRA_WS GPR:$rs1, GPR:$rs2)>;
// splat pattern
@@ -2006,20 +2022,6 @@ let Predicates = [HasStdExtP, IsRV64] in {
def: Pat<(v2i32 (smax GPR:$rs1, GPR:$rs2)), (PMAX_W GPR:$rs1, GPR:$rs2)>;
def: Pat<(v2i32 (umax GPR:$rs1, GPR:$rs2)), (PMAXU_W GPR:$rs1, GPR:$rs2)>;
- // 32-bit logical shift left/right patterns
- def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
- (PSLLI_W GPR:$rs1, uimm5:$shamt)>;
- def: Pat<(v2i32 (srl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
- (PSRLI_W GPR:$rs1, uimm5:$shamt)>;
-
- // 32-bit arithmetic shift left/right patterns
- def: Pat<(v2i32 (sra GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
- (PSRAI_W GPR:$rs1, uimm5:$shamt)>;
-
- // 32-bit signed saturation shift left patterns
- def: Pat<(v2i32 (sshlsat GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
- (PSSLAI_W GPR:$rs1, uimm5:$shamt)>;
-
// 32-bit vselect patterns
def: Pat<(v2i32 (vselect (v2i32 GPR:$mask), GPR:$true_v, GPR:$false_v)),
(MERGE GPR:$mask, GPR:$false_v, GPR:$true_v)>;
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 01fe45a013a6..63e897e076ed 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -545,15 +545,13 @@ public:
collectFusionCandidates(LV);
Changed |= fuseCandidates();
+ // All loops in the candidate sets have a common parent (or no parent).
+ // Next loop vector will correspond to a different parent. It is safe
+ // to remove all the candidates currently in the set.
FusionCandidates.clear();
}
- // Finished analyzing candidates at this level.
- // Descend to the next level and clear all of the candidates currently
- // collected. Note that it will not be possible to fuse any of the
- // existing candidates with new candidates because the new candidates will
- // be at a different nest level and thus not be control flow equivalent
- // with all of the candidates collected so far.
+ // Finished analyzing candidates at this level. Descend to the next level.
LLVM_DEBUG(dbgs() << "Descend one level!\n");
LDT.descend();
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index a4a0e5d9a8b4..837a9b329514 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -1817,10 +1817,12 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan,
bool IsStrictPredicate = ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred);
if (IsStrictPredicate) {
- return handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR,
- cast<VPWidenIntOrFpInductionRecipe>(IVOp),
- MinOrMaxResult, FindIVSelect, FindIVCmp,
- FindIVRdxResult);
+ if (!handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR,
+ cast<VPWidenIntOrFpInductionRecipe>(IVOp),
+ MinOrMaxResult, FindIVSelect, FindIVCmp,
+ FindIVRdxResult))
+ return false;
+ continue;
}
// The reduction using MinOrMaxPhiR needs adjusting to compute the correct
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index 2e33fcac9536..2a5a8f5e068d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -1,44 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=CHECK,GFX11-SDAG %s
+; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=CHECK,GFX11-GISEL %s
; XUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,GISEL %s
; FIXME. gisel for fptrunc_round rtz
define amdgpu_gs half @v_fptrunc_round_f32_to_f16_tonearest(float %a) {
-; CHECK-LABEL: v_fptrunc_round_f32_to_f16_tonearest:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: v_fptrunc_round_f32_to_f16_tonearest:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_tonearest:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0
+; GFX11-SDAG-NEXT: ; return to shader part epilog
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearest")
ret half %res
}
define amdgpu_gs half @v_fptrunc_round_f32_to_f16_upward(float %a) {
-; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: v_fptrunc_round_f32_to_f16_upward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_upward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0
+; GFX11-SDAG-NEXT: ; return to shader part epilog
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
ret half %res
}
define amdgpu_gs half @v_fptrunc_round_f32_to_f16_downward(float %a) {
-; CHECK-LABEL: v_fptrunc_round_f32_to_f16_downward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: v_fptrunc_round_f32_to_f16_downward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_downward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0
+; GFX11-SDAG-NEXT: ; return to shader part epilog
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward")
ret half %res
}
define amdgpu_gs half @v_fptrunc_round_f32_to_f16_towardzero(float %a) {
-; CHECK-LABEL: v_fptrunc_round_f32_to_f16_towardzero:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: v_fptrunc_round_f32_to_f16_towardzero:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_towardzero:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v0, v0, s0
+; GFX11-SDAG-NEXT: ; return to shader part epilog
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.towardzero")
ret half %res
}
@@ -49,6 +73,10 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_f32_to_v2f16_towardzero(float %a, f
; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_v2f16_towardzero:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_f32_to_v2f16_towardzero:
; GISEL: ; %bb.0:
; GISEL-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
@@ -63,10 +91,15 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_f32_to_v2f16_towardzero(float %a, f
}
define amdgpu_gs <2 x half> @v_fptrunc_round_poison_to_v2f16_towardzero(float %a) {
-; CHECK-LABEL: v_fptrunc_round_poison_to_v2f16_towardzero:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: v_fptrunc_round_poison_to_v2f16_towardzero:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: v_fptrunc_round_poison_to_v2f16_towardzero:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v0, v0, s0
+; GFX11-SDAG-NEXT: ; return to shader part epilog
%lo = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.towardzero")
%tmp = insertelement <2 x half> poison, half %lo, i32 0
ret <2 x half> %tmp
@@ -80,6 +113,11 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_constant_to_v2f16_towardzero(float
; SDAG-NEXT: v_perm_b32 v0, s0, v0, 0x5040100
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_constant_to_v2f16_towardzero:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v0, v0, s0
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, 0x3c00
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_constant_to_v2f16_towardzero:
; GISEL: ; %bb.0:
; GISEL-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
@@ -92,18 +130,32 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_constant_to_v2f16_towardzero(float
}
define amdgpu_gs void @v_fptrunc_round_f32_to_f16_upward_multiple_calls(float %a, float %b, ptr addrspace(1) %out) {
-; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v1
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
-; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
-; CHECK-NEXT: v_add_f16_e32 v0, v0, v4
-; CHECK-NEXT: v_add_f16_e32 v0, v1, v0
-; CHECK-NEXT: global_store_short v[2:3], v0, off
-; CHECK-NEXT: s_endpgm
+; SDAG-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_cvt_f16_f32_e32 v4, v1
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
+; SDAG-NEXT: v_add_f16_e32 v0, v0, v4
+; SDAG-NEXT: v_add_f16_e32 v0, v1, v0
+; SDAG-NEXT: global_store_short v[2:3], v0, off
+; SDAG-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.h, v1
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v1
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v1.l, v0.l
+; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-SDAG-NEXT: s_endpgm
%res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
%res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward")
%res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward")
@@ -114,18 +166,32 @@ define amdgpu_gs void @v_fptrunc_round_f32_to_f16_upward_multiple_calls(float %a
}
define amdgpu_gs void @v_fptrunc_round_f32_to_f16_downward_multiple_calls(float %a, float %b, ptr addrspace(1) %out) {
-; CHECK-LABEL: v_fptrunc_round_f32_to_f16_downward_multiple_calls:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v0
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
-; CHECK-NEXT: v_add_f16_e32 v0, v4, v0
-; CHECK-NEXT: v_add_f16_e32 v0, v1, v0
-; CHECK-NEXT: global_store_short v[2:3], v0, off
-; CHECK-NEXT: s_endpgm
+; SDAG-LABEL: v_fptrunc_round_f32_to_f16_downward_multiple_calls:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v4, v0
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
+; SDAG-NEXT: v_add_f16_e32 v0, v4, v0
+; SDAG-NEXT: v_add_f16_e32 v0, v1, v0
+; SDAG-NEXT: global_store_short v[2:3], v0, off
+; SDAG-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_downward_multiple_calls:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v4.l, v0
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.h, v1
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v4.l, v0.l
+; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l
+; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-SDAG-NEXT: s_endpgm
%res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
%res2 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward")
%res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward")
@@ -136,17 +202,30 @@ define amdgpu_gs void @v_fptrunc_round_f32_to_f16_downward_multiple_calls(float
}
define amdgpu_gs void @v_fptrunc_round_f32_to_f16_towardzero_multiple_calls(float %a, float %b, ptr addrspace(1) %out) {
-; CHECK-LABEL: v_fptrunc_round_f32_to_f16_towardzero_multiple_calls:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
-; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v4, v1, v0
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
-; CHECK-NEXT: v_add_f16_e32 v0, v0, v4
-; CHECK-NEXT: v_add_f16_e32 v0, v1, v0
-; CHECK-NEXT: global_store_short v[2:3], v0, off
-; CHECK-NEXT: s_endpgm
+; SDAG-LABEL: v_fptrunc_round_f32_to_f16_towardzero_multiple_calls:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
+; SDAG-NEXT: v_cvt_pkrtz_f16_f32_e32 v4, v1, v0
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
+; SDAG-NEXT: v_add_f16_e32 v0, v0, v4
+; SDAG-NEXT: v_add_f16_e32 v0, v1, v0
+; SDAG-NEXT: global_store_short v[2:3], v0, off
+; SDAG-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_fptrunc_round_f32_to_f16_towardzero_multiple_calls:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v4, v0, s0
+; GFX11-SDAG-NEXT: v_cvt_pk_rtz_f16_f32_e64 v5, v1, s0
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v1
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add_f16_e32 v0.h, v4.l, v5.l
+; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-SDAG-NEXT: s_endpgm
%res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.towardzero")
%res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.towardzero")
%res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward")
@@ -157,14 +236,23 @@ define amdgpu_gs void @v_fptrunc_round_f32_to_f16_towardzero_multiple_calls(floa
}
define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, s0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-SDAG-NEXT: ; return to shader part epilog
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
%bitcast = bitcast half %res to i16
%ret = zext i16 %bitcast to i32
@@ -172,14 +260,23 @@ define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addr
}
define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward(float inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_f32_to_f16_downward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_f32_to_f16_downward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: s_fptrunc_round_f32_to_f16_downward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, s0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-SDAG-NEXT: ; return to shader part epilog
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward")
%bitcast = bitcast half %res to i16
%ret = zext i16 %bitcast to i32
@@ -187,20 +284,34 @@ define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward(float inreg %a, ptr ad
}
define amdgpu_gs void @s_fptrunc_round_f32_to_f16_upward_multiple_calls(float inreg %a, float inreg %b, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_mov_b32_e32 v2, s0
-; CHECK-NEXT: v_mov_b32_e32 v3, s1
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v3
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
-; CHECK-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
-; CHECK-NEXT: v_add_f16_e32 v2, v2, v4
-; CHECK-NEXT: v_add_f16_e32 v2, v3, v2
-; CHECK-NEXT: global_store_short v[0:1], v2, off
-; CHECK-NEXT: s_endpgm
+; SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-NEXT: v_cvt_f16_f32_e32 v4, v3
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
+; SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
+; SDAG-NEXT: v_add_f16_e32 v2, v2, v4
+; SDAG-NEXT: v_add_f16_e32 v2, v3, v2
+; SDAG-NEXT: global_store_short v[0:1], v2, off
+; SDAG-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v2.l, s0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v2.h, s1
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, s1
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add_f16_e32 v2.l, v2.l, v2.h
+; GFX11-SDAG-NEXT: v_add_f16_e32 v2.l, v3.l, v2.l
+; GFX11-SDAG-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-SDAG-NEXT: s_endpgm
%res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
%res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward")
%res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward")
@@ -219,6 +330,14 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> %
; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
@@ -239,6 +358,14 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_v2f32_to_v2f16_downward(<2 x float>
; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_v2f32_to_v2f16_downward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_v2f32_to_v2f16_downward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
@@ -270,6 +397,22 @@ define amdgpu_gs void @v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
; SDAG-NEXT: global_store_dword v[4:5], v0, off
; SDAG-NEXT: s_endpgm
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.h, v3
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, v2
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
+; GFX11-SDAG-NEXT: v_pk_add_f16 v0, v1, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_add_f16 v0, v3, v0
+; GFX11-SDAG-NEXT: global_store_b32 v[4:5], v0, off
+; GFX11-SDAG-NEXT: s_endpgm
; GISEL-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
@@ -298,18 +441,31 @@ define amdgpu_gs void @v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
}
define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: v_mov_b32_e32 v1, s1
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: v_readfirstlane_b32 s1, v1
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, s0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, s1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX11-SDAG-NEXT: ; return to shader part epilog
%res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward")
%bitcast = bitcast <2 x half> %res to <2 x i16>
%ret = zext <2 x i16> %bitcast to <2 x i32>
@@ -317,18 +473,31 @@ define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> in
}
define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_downward(<2 x float> inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: v_mov_b32_e32 v1, s1
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: v_readfirstlane_b32 s1, v1
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0.l, s0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, s1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX11-SDAG-NEXT: ; return to shader part epilog
%res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.downward")
%bitcast = bitcast <2 x half> %res to <2 x i16>
%ret = zext <2 x i16> %bitcast to <2 x i32>
@@ -362,6 +531,22 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
; SDAG-NEXT: global_store_dword v[0:1], v2, off
; SDAG-NEXT: s_endpgm
;
+; GFX11-SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v2.h, s1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v2.l, s0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, s3
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, s2
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v4.h, s3
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v4.l, s2
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
+; GFX11-SDAG-NEXT: v_pk_add_f16 v2, v2, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_add_f16 v2, v4, v2
+; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-SDAG-NEXT: s_endpgm
; GISEL-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls:
; GISEL: ; %bb.0:
; GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -403,6 +588,15 @@ define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, v3
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
@@ -425,6 +619,15 @@ define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float>
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, v3
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
@@ -449,6 +652,16 @@ define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_upward(<4 x float> %
; SDAG-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v3
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
@@ -475,6 +688,16 @@ define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_downward(<4 x float>
; SDAG-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_downward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v3
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_v4f32_to_v4f16_downward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
@@ -507,6 +730,21 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_upward(<8 x float> %
; SDAG-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_v8f32_to_v8f16_upward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v7.h, v7
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v5.h, v5
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v5.l, v4
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v7.l, v6
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v3
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v7
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_v8f32_to_v8f16_upward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
@@ -545,6 +783,21 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float>
; SDAG-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
; SDAG-NEXT: ; return to shader part epilog
;
+; GFX11-SDAG-LABEL: v_fptrunc_round_v8f32_to_v8f16_downward:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v7.h, v7
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v5.h, v5
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.h, v3
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.h, v1
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v1.l, v0
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v3.l, v2
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v5.l, v4
+; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v7.l, v6
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v3
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v7
+; GFX11-SDAG-NEXT: ; return to shader part epilog
; GISEL-LABEL: v_fptrunc_round_v8f32_to_v8f16_downward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
diff --git a/llvm/test/CodeGen/RISCV/rv32p.ll b/llvm/test/CodeGen/RISCV/rv32p.ll
index 2369bbb4bfe4..cc00f427126b 100644
--- a/llvm/test/CodeGen/RISCV/rv32p.ll
+++ b/llvm/test/CodeGen/RISCV/rv32p.ll
@@ -584,6 +584,37 @@ define i32 @shlsat_i32(i32 %a, i32 %b) {
ret i32 %sshlsat
}
+define i8 @shlsati_i8(i8 %a) {
+; CHECK-LABEL: shlsati_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 24
+; CHECK-NEXT: sslai a0, a0, 5
+; CHECK-NEXT: srai a0, a0, 24
+; CHECK-NEXT: ret
+ %sshlsat = tail call i8 @llvm.sshl.sat.i8(i8 %a, i8 5)
+ ret i8 %sshlsat
+}
+
+define i16 @shlsati_i16(i16 %a) {
+; CHECK-LABEL: shlsati_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 16
+; CHECK-NEXT: sslai a0, a0, 10
+; CHECK-NEXT: srai a0, a0, 16
+; CHECK-NEXT: ret
+ %sshlsat = tail call i16 @llvm.sshl.sat.i16(i16 %a, i16 10)
+ ret i16 %sshlsat
+}
+
+define i32 @shlsati_i32(i32 %a) {
+; CHECK-LABEL: shlsati_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sslai a0, a0, 21
+; CHECK-NEXT: ret
+ %sshlsat = tail call i32 @llvm.sshl.sat.i32(i32 %a, i32 21)
+ ret i32 %sshlsat
+}
+
define i8 @sadd_i8(i8 %x, i8 %y) {
; CHECK-LABEL: sadd_i8:
; CHECK: # %bb.0:
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-argmin-argmax.ll b/llvm/test/Transforms/LoopVectorize/multiple-argmin-argmax.ll
index ee7928963332..02e4bb4e5c2a 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-argmin-argmax.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-argmin-argmax.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
-; FIXME: Some loops in the file are currently mis-compiled.
; Tests for https://github.com/llvm/llvm-project/issues/184729.
define i64 @argmin_argmax(ptr %data, i32 %start_val) {
; CHECK-LABEL: define i64 @argmin_argmax(
@@ -15,11 +14,11 @@ define i64 @argmin_argmax(ptr %data, i32 %start_val) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
@@ -31,7 +30,7 @@ define i64 @argmin_argmax(ptr %data, i32 %start_val) {
; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI3]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; CHECK-NEXT: [[VEC_IND_NEXT6]] = add nuw nsw <2 x i64> [[VEC_IND2]], splat (i64 2)
+; CHECK-NEXT: [[VEC_IND_NEXT6]] = add nuw <2 x i64> [[VEC_IND2]], splat (i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -45,9 +44,14 @@ define i64 @argmin_argmax(ptr %data, i32 %start_val) {
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL]]
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[TMP6]])
-; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]])
-; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
-; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT8]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT9]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1)
+; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]])
+; CHECK-NEXT: [[DERIVED_IV_RESULT10:%.*]] = add i64 1, [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL]]
+; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT10]]
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -114,11 +118,11 @@ define i64 @argmin_argmin(ptr %data, i32 %start_val1, i32 %start_val2) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT2]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
@@ -130,7 +134,7 @@ define i64 @argmin_argmin(ptr %data, i32 %start_val1, i32 %start_val2) {
; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI5]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw <2 x i64> [[VEC_IND3]], splat (i64 2)
-; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw nsw <2 x i64> [[VEC_IND4]], splat (i64 2)
+; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw <2 x i64> [[VEC_IND4]], splat (i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -144,9 +148,14 @@ define i64 @argmin_argmin(ptr %data, i32 %start_val1, i32 %start_val2) {
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL1]]
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[TMP6]])
-; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]])
-; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
-; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT10]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1)
+; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]])
+; CHECK-NEXT: [[DERIVED_IV_RESULT12:%.*]] = add i64 1, [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL2]]
+; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT12]]
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -213,11 +222,11 @@ define i64 @argmax_argmax(ptr %data, i32 %start_val1, i32 %start_val2) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT2]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
@@ -229,7 +238,7 @@ define i64 @argmax_argmax(ptr %data, i32 %start_val1, i32 %start_val2) {
; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI5]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw <2 x i64> [[VEC_IND3]], splat (i64 2)
-; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw nsw <2 x i64> [[VEC_IND4]], splat (i64 2)
+; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw <2 x i64> [[VEC_IND4]], splat (i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -243,9 +252,14 @@ define i64 @argmax_argmax(ptr %data, i32 %start_val1, i32 %start_val2) {
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL1]]
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[TMP6]])
-; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]])
-; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
-; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT10]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1)
+; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]])
+; CHECK-NEXT: [[DERIVED_IV_RESULT12:%.*]] = add i64 1, [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL2]]
+; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT12]]
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -313,11 +327,11 @@ define i64 @argmin_signed_argmax_unsigned(ptr %data, i32 %start_val1, i32 %start
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT2]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
@@ -329,7 +343,7 @@ define i64 @argmin_signed_argmax_unsigned(ptr %data, i32 %start_val1, i32 %start
; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI5]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw <2 x i64> [[VEC_IND3]], splat (i64 2)
-; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw nsw <2 x i64> [[VEC_IND4]], splat (i64 2)
+; CHECK-NEXT: [[VEC_IND_NEXT8]] = add nuw <2 x i64> [[VEC_IND4]], splat (i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -343,9 +357,14 @@ define i64 @argmin_signed_argmax_unsigned(ptr %data, i32 %start_val1, i32 %start
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL1]]
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[TMP6]])
-; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]])
-; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
-; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT10]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1)
+; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]])
+; CHECK-NEXT: [[DERIVED_IV_RESULT12:%.*]] = add i64 1, [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL2]]
+; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT12]]
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -410,11 +429,11 @@ define i64 @argmin_argmax_unsigned(ptr %data, i32 %start_val) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
@@ -426,7 +445,7 @@ define i64 @argmin_argmax_unsigned(ptr %data, i32 %start_val) {
; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI3]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; CHECK-NEXT: [[VEC_IND_NEXT6]] = add nuw nsw <2 x i64> [[VEC_IND2]], splat (i64 2)
+; CHECK-NEXT: [[VEC_IND_NEXT6]] = add nuw <2 x i64> [[VEC_IND2]], splat (i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -440,9 +459,14 @@ define i64 @argmin_argmax_unsigned(ptr %data, i32 %start_val) {
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL]]
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[TMP6]])
-; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]])
-; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
-; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT8]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP6]], [[BROADCAST_SPLAT9]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1)
+; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP16]])
+; CHECK-NEXT: [[DERIVED_IV_RESULT10:%.*]] = add i64 1, [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP14]], [[START_VAL]]
+; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 0, i64 [[DERIVED_IV_RESULT10]]
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -498,57 +522,14 @@ exit:
define i64 @argmin_second_not_argmax(ptr %data, i32 %start_val) {
; CHECK-LABEL: define i64 @argmin_second_not_argmax(
; CHECK-SAME: ptr [[DATA:%.*]], i32 [[START_VAL:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[START_VAL]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP2]] = select <2 x i1> [[TMP1]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI2]]
-; CHECK-NEXT: [[TMP3]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI]])
-; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> splat (i32 10), [[VEC_PHI3]]
-; CHECK-NEXT: [[TMP5]] = select <2 x i1> [[TMP4]], <2 x i64> [[VEC_IND1]], <2 x i64> [[VEC_PHI4]]
-; CHECK-NEXT: [[TMP6]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[VEC_PHI3]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[TMP3]])
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT6]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i32> [[TMP3]], [[BROADCAST_SPLAT7]]
-; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1)
-; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[TMP10]])
-; CHECK-NEXT: [[DERIVED_IV_RESULT:%.*]] = add i64 1, [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], [[START_VAL]]
-; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 0, i64 [[DERIVED_IV_RESULT]]
-; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[TMP6]])
-; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[TMP5]])
-; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
-; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0
-; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
-; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[SCALAR_PH:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 99, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MINVAL:%.*]] = phi i32 [ [[TMP8]], %[[SCALAR_PH]] ], [ [[NEW_MINVAL:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MINPOS:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[NEW_MINPOS:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MAXVAL:%.*]] = phi i32 [ [[TMP14]], %[[SCALAR_PH]] ], [ [[NEW_MAXVAL:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MAXPOS:%.*]] = phi i64 [ [[TMP17]], %[[SCALAR_PH]] ], [ [[NEW_MAXPOS:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MINVAL:%.*]] = phi i32 [ [[START_VAL]], %[[SCALAR_PH]] ], [ [[NEW_MINVAL:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MINPOS:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[NEW_MINPOS:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAXVAL:%.*]] = phi i32 [ [[START_VAL]], %[[SCALAR_PH]] ], [ [[NEW_MAXVAL:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAXPOS:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[NEW_MAXPOS:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[IV]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[CMP_MIN:%.*]] = icmp slt i32 [[VAL]], [[MINVAL]]
@@ -559,7 +540,7 @@ define i64 @argmin_second_not_argmax(ptr %data, i32 %start_val) {
; CHECK-NEXT: [[NEW_MAXVAL]] = call i32 @llvm.smax.i32(i32 [[VAL]], i32 [[MAXVAL]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 100
-; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[NEW_MINPOS_LCSSA:%.*]] = phi i64 [ [[NEW_MINPOS]], %[[LOOP]] ]
; CHECK-NEXT: [[NEW_MAXPOS_LCSSA:%.*]] = phi i64 [ [[NEW_MAXPOS]], %[[LOOP]] ]
diff --git a/llvm/utils/gn/secondary/bolt/unittests/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/BUILD.gn
index eded7696e9e8..2e2415cd441e 100644
--- a/llvm/utils/gn/secondary/bolt/unittests/BUILD.gn
+++ b/llvm/utils/gn/secondary/bolt/unittests/BUILD.gn
@@ -1,8 +1,8 @@
group("unittests") {
deps = [
"Core:CoreTests",
- "Profile:ProfileTests",
"Passes:PassTests",
+ "Profile:ProfileTests",
]
testonly = true
}
diff --git a/llvm/utils/gn/secondary/clang/lib/Options/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Options/BUILD.gn
index 3f022ed7f748..f3bd07f61416 100644
--- a/llvm/utils/gn/secondary/clang/lib/Options/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Options/BUILD.gn
@@ -10,7 +10,7 @@ static_library("Options") {
]
public_deps = [
# public_dep because public header Options.h includes generated Options.inc.
- "//clang/include/clang/Options:Options",
+ "//clang/include/clang/Options",
]
sources = [
"DriverOptions.cpp",
diff --git a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn
index b6818a2a5d02..f6e4632cc7b5 100644
--- a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn
@@ -22,9 +22,9 @@ static_library("Sema") {
"//clang/include/clang/Basic:riscv_andes_vector_builtin_sema",
"//clang/include/clang/Basic:riscv_sifive_vector_builtin_sema",
"//clang/include/clang/Basic:riscv_vector_builtin_sema",
+ "//clang/include/clang/Sema:AttrIsTypeDependent",
"//clang/include/clang/Sema:AttrParsedAttrImpl",
"//clang/include/clang/Sema:AttrParsedAttrKinds",
- "//clang/include/clang/Sema:AttrIsTypeDependent",
"//clang/include/clang/Sema:AttrSpellingListIndex",
"//clang/include/clang/Sema:AttrTemplateInstantiate",
"//clang/lib/APINotes",
diff --git a/llvm/utils/gn/secondary/clang/lib/Tooling/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Tooling/BUILD.gn
index 653062d108ce..e5f491ba8fa4 100644
--- a/llvm/utils/gn/secondary/clang/lib/Tooling/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Tooling/BUILD.gn
@@ -2,7 +2,7 @@ static_library("Tooling") {
output_name = "clangTooling"
configs += [ "//llvm/utils/gn/build:clang_code" ]
deps = [
- "//clang/include/clang/Options:Options",
+ "//clang/include/clang/Options",
"//clang/lib/AST",
"//clang/lib/ASTMatchers",
"//clang/lib/Basic",
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
index 483d56be802a..ab8ea1ffb56b 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
@@ -7,10 +7,10 @@ if (current_toolchain == host_toolchain) {
}
} else {
asan_sources = [
- "asan_aix.cpp",
"asan_activation.cpp",
"asan_activation.h",
"asan_activation_flags.inc",
+ "asan_aix.cpp",
"asan_allocator.cpp",
"asan_allocator.h",
"asan_debugging.cpp",
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
index 1bd812560b2c..5815de177bb7 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
@@ -94,13 +94,13 @@ source_set("_unused") {
# Thumb1
"arm/addsf3.S",
"arm/comparesf2.S",
+ "arm/divsf3.S",
"arm/divsi3.S",
- "arm/udivsi3.S",
"arm/fnan2.c",
"arm/fnorm2.c",
"arm/funder.c",
"arm/mulsf3.S",
- "arm/divsf3.S",
"arm/thumb1/mulsf3.S",
+ "arm/udivsi3.S",
]
}
diff --git a/llvm/utils/gn/secondary/compiler-rt/test/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/asan/BUILD.gn
index b501d7df1468..297ccc381a4e 100644
--- a/llvm/utils/gn/secondary/compiler-rt/test/asan/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/test/asan/BUILD.gn
@@ -26,7 +26,7 @@ write_cmake_config("lit_site_cfg") {
values += [
"ASAN_TEST_APPLE_TARGET_IS_HOST_PYBOOL=1",
"ASAN_TEST_DYNAMIC=1",
- ]
+ ]
} else {
values += [
"ASAN_TEST_APPLE_TARGET_IS_HOST_PYBOOL=0",
diff --git a/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn
index 838846e31d92..41dfc1fdb0ba 100644
--- a/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn
@@ -76,8 +76,8 @@ static_library("Host") {
"windows/MainLoopWindows.cpp",
"windows/PipeWindows.cpp",
"windows/ProcessLauncherWindows.cpp",
- "windows/PseudoConsole.cpp",
"windows/ProcessRunLock.cpp",
+ "windows/PseudoConsole.cpp",
]
} else {
sources += [
diff --git a/llvm/utils/gn/secondary/lldb/test/BUILD.gn b/llvm/utils/gn/secondary/lldb/test/BUILD.gn
index e82fe2d11b75..3a151bbe0fe2 100644
--- a/llvm/utils/gn/secondary/lldb/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/test/BUILD.gn
@@ -142,7 +142,7 @@ write_lit_cfg("lit_shell_site_cfg") {
"LLDB_TOOL_LLDB_SERVER_BUILD=1",
"LLDB_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"),
"LLDB_USE_SYSTEM_DEBUGSERVER=1", # XXX port //lldb/tools/debugserver (?)
- "LLVM_ENABLE_DIA_SDK=0", # FIXME: option? just enable on windows?
+ "LLVM_ENABLE_DIA_SDK=0", # FIXME: option? just enable on windows?
"LLVM_HOST_TRIPLE=$llvm_current_triple",
"LLVM_USE_SANITIZER=",
"Python3_EXECUTABLE=$python_path",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
index 9fe4f3e4149b..4c237ddfed39 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
@@ -4,6 +4,7 @@ static_library("Analysis") {
# Must be a public_dep because Analysis's headers include
# TargetLibraryInfo.inc.
"//llvm/include/llvm/Analysis:TargetLibraryInfo",
+
# Must be a public_dep because Analysis's headers include llvm-config.h.
"//llvm/include/llvm/Config:llvm-config",
]
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn
index 3f6de22922a7..acd3fbd176a8 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn
@@ -49,8 +49,8 @@ static_library("LLVMBPFCodeGen") {
":BPFGenFastISel",
":BPFGenGlobalISel",
":BPFGenMCPseudoLowering",
- ":BPFGenSDNodeInfo",
":BPFGenRegisterBank",
+ ":BPFGenSDNodeInfo",
"MCTargetDesc",
"TargetInfo",
"//llvm/include/llvm/Config:llvm-config",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
index 186d2ef96c19..35b069e823c2 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
@@ -28,8 +28,8 @@ static_library("Utils") {
"CountVisits.cpp",
"CtorUtils.cpp",
"DXILUpgrade.cpp",
- "Debugify.cpp",
"DebugSSAUpdater.cpp",
+ "Debugify.cpp",
"DeclareRuntimeLibcalls.cpp",
"DemoteRegToStack.cpp",
"EntryExitInstrumenter.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn
index 2363dad4bd34..454b2ed1f1d1 100644
--- a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn
@@ -121,5 +121,6 @@ write_cmake_config("llvm-lit") {
"Python3_EXECUTABLE=/usr/bin/env $python_path",
"BUILD_MODE=.",
"LLVM_LIT_CONFIG_MAP=" + config_map,
+ "LLVM_WINDOWS_PREFER_FORWARD_SLASH=",
]
}
diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td
index ab8557406968..27d3d0d9b6e7 100644
--- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td
+++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td
@@ -168,8 +168,7 @@ class Arith_IntBinaryOpWithOverflowFlags<string mnemonic, list<Trait> traits = [
class Arith_IntBinaryOpWithExactFlag<string mnemonic, list<Trait> traits = []> :
Arith_BinaryOp<mnemonic, traits #
- [DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
- DeclareOpInterfaceMethods<ArithExactFlagInterface>]>,
+ [DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>]>,
Arguments<(ins Arith_SignlessIntegerOrIndexLike:$lhs,
Arith_SignlessIntegerOrIndexLike:$rhs,
UnitAttr:$isExact)>,
@@ -1588,34 +1587,15 @@ def IndexCastTypeConstraint : TypeConstraint<Or<[
def Arith_IndexCastOp
: Arith_CastOp<"index_cast", IndexCastTypeConstraint, IndexCastTypeConstraint,
- [DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
- DeclareOpInterfaceMethods<ArithExactFlagInterface>]> {
+ [DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>]> {
let summary = "cast between index and integer types";
let description = [{
Casts between scalar or vector integers and corresponding 'index' scalar or
vectors. Index is an integer of platform-specific bit width. If casting to
a wider integer, the value is sign-extended. If casting to a narrower
integer, the value is truncated.
-
- If the `exact` attribute is present, it is assumed that the operand
- contains a value that fits in the destination's representation, therefore
- the cast does not lose any information. When this assumption is violated,
- the result is poison.
-
- Example:
-
- ```mlir
- %0 = arith.index_cast %a : index to i64
- %1 = arith.index_cast %a exact : index to i64
- %2 = arith.index_cast %b exact : i32 to index
- ```
}];
- let arguments = (ins IndexCastTypeConstraint:$in, UnitAttr:$isExact);
- let results = (outs IndexCastTypeConstraint:$out);
- let assemblyFormat = [{
- $in (`exact` $isExact^)? attr-dict `:` type($in) `to` type($out)
- }];
let hasFolder = 1;
let hasCanonicalizer = 1;
}
@@ -1627,8 +1607,7 @@ def Arith_IndexCastOp
def Arith_IndexCastUIOp
: Arith_CastOp<"index_castui", IndexCastTypeConstraint, IndexCastTypeConstraint,
[DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
- DeclareOpInterfaceMethods<ArithNonNegFlagInterface>,
- DeclareOpInterfaceMethods<ArithExactFlagInterface>]> {
+ DeclareOpInterfaceMethods<ArithNonNegFlagInterface>]> {
let summary = "unsigned cast between index and integer types";
let description = [{
Casts between scalar or vector integers and corresponding 'index' scalar or
@@ -1641,27 +1620,19 @@ def Arith_IndexCastUIOp
is equivalent to sign extension. When this assumption is violated,
the result is poison.
- If the `exact` attribute is present, it is assumed that the operand
- contains a value that fits in the destination's representation, therefore
- the cast does not lose any information. When this assumption is violated,
- the result is poison.
-
Example:
```mlir
%0 = arith.index_castui %a : i32 to index
%1 = arith.index_castui %a nneg : i32 to index
%2 = arith.index_castui %b nneg : index to i64
- %3 = arith.index_castui %a nneg exact : i64 to index
```
}];
- let arguments = (ins IndexCastTypeConstraint:$in, UnitAttr:$nonNeg,
- UnitAttr:$isExact);
+ let arguments = (ins IndexCastTypeConstraint:$in, UnitAttr:$nonNeg);
let results = (outs IndexCastTypeConstraint:$out);
let assemblyFormat = [{
- $in oilist(`exact` $isExact | `nneg` $nonNeg) attr-dict
- `:` type($in) `to` type($out)
+ $in (`nneg` $nonNeg^)? attr-dict `:` type($in) `to` type($out)
}];
let hasFolder = 1;
let hasCanonicalizer = 1;
diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td
index e8287ac2d6bc..d1b8e250cdb5 100644
--- a/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td
@@ -153,53 +153,6 @@ def ArithNonNegFlagInterface : OpInterface<"ArithNonNegFlagInterface"> {
];
}
-def ArithExactFlagInterface : OpInterface<"ArithExactFlagInterface"> {
- let description = [{
- Access to op exact flag.
- }];
-
- let cppNamespace = "::mlir::arith";
-
- let methods = [
- InterfaceMethod<
- /*desc=*/ "Returns whether the operation has the exact flag set",
- /*returnType=*/ "bool",
- /*methodName=*/ "getExact",
- /*args=*/ (ins),
- /*methodBody=*/ [{}],
- /*defaultImpl=*/ [{
- auto op = cast<ConcreteOp>(this->getOperation());
- return op.getIsExactAttr() != nullptr;
- }]
- >,
- InterfaceMethod<
- /*desc=*/ "Set the exact flag for the operation",
- /*returnType=*/ "void",
- /*methodName=*/ "setExact",
- /*args=*/ (ins "bool":$isExact),
- /*methodBody=*/ [{}],
- /*defaultImpl=*/ [{
- auto op = cast<ConcreteOp>(this->getOperation());
- if (isExact)
- op.setIsExactAttr(UnitAttr::get(op->getContext()));
- else
- op.removeIsExactAttr();
- }]
- >,
- StaticInterfaceMethod<
- /*desc=*/ [{Returns the name of the exact flag attribute for
- the operation}],
- /*returnType=*/ "StringRef",
- /*methodName=*/ "getExactFlagAttrName",
- /*args=*/ (ins),
- /*methodBody=*/ [{}],
- /*defaultImpl=*/ [{
- return "isExact";
- }]
- >
- ];
-}
-
def ArithRoundingModeInterface : OpInterface<"ArithRoundingModeInterface"> {
let description = [{
Access to op rounding mode.
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td
index f179cfd752c6..ebb0e6132fee 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td
@@ -210,4 +210,157 @@ def OpenACC_FirstprivateMapInitialOp
let extraClassDeclaration = extraClassDeclarationBase;
}
+//===----------------------------------------------------------------------===//
+// acc.par_width
+//===----------------------------------------------------------------------===//
+
+def OpenACC_ParWidthOp
+ : OpenACC_Op<"par_width", [NoMemoryEffect, AlwaysSpeculatable]> {
+ let summary = "Specify parallel width for a GPU dimension";
+ let description = [{
+ The `acc.par_width` operation specifies the parallel width for a
+ given GPU parallel dimension. It is used as an input to
+ `acc.compute_region` to define the launch configuration.
+
+ The optional `launchArg` operand provides a known width value. When
+ absent, the width is unknown and must be determined later (either at
+ compile time by analysis or at runtime).
+
+ Examples:
+
+ ```mlir
+ // Known width from SSA value
+ %w1 = acc.par_width %vector_len {par_dim = #acc.par_dim<thread_x>}
+
+ // Unknown width (to be computed later)
+ %w2 = acc.par_width {par_dim = #acc.par_dim<block_x>}
+ ```
+ }];
+ let arguments = (ins Optional<Index>:$launchArg,
+ OpenACC_GPUParallelDimAttr:$par_dim);
+ let results = (outs OpenACC_ParWidthType:$output);
+ let assemblyFormat = [{
+ ($launchArg^)? attr-dict
+ }];
+}
+
+//===----------------------------------------------------------------------===//
+// acc.compute_region
+//===----------------------------------------------------------------------===//
+
+// Local type constraint for gpu::AsyncTokenType.
+def OpenACC_GPUAsyncTokenType : Type<
+ CPred<"::llvm::isa<::mlir::gpu::AsyncTokenType>($_self)">,
+ "GPU async token type">;
+
+def OpenACC_ComputeRegionOp
+ : OpenACC_Op<"compute_region",
+ [OffloadRegionOpInterface, AffineScope,
+ RecursiveMemoryEffects,
+ SingleBlockImplicitTerminator<"YieldOp">,
+ IsolatedFromAbove, AttrSizedOperandSegments]> {
+ let summary = "Compute region for GPU execution";
+ let description = [{
+ The `acc.compute_region` operation wraps a region of code that will be
+ compiled and executed on a GPU. It is typically produced by lowering
+ OpenACC compute constructs (`acc.parallel`, `acc.kernels`, `acc.serial`)
+ but can also be targeted directly by other frontends or lowered from
+ other constructs that benefit from the automatic parallelization and data
+ mapping facilities that the `acc` dialect provides. It serves as the
+ bridge between the high-level representation and the `gpu.launch`
+ operation.
+
+ The operation is `IsolatedFromAbove`: all values used inside the
+ region must be explicitly captured. Values are captured in two ways:
+
+ - Launch arguments (`launch`): Results of operations that define
+ the parallel launch configuration. These are `!acc.par_width`-typed
+ and become block arguments representing the parallel width for each
+ dimension.
+
+ - Input arguments (`ins`): Arbitrary values captured from outside
+ the region (data pointers, scalars, etc.). These become block
+ arguments with their original types.
+
+ The `origin` attribute records which construct produced this compute
+ region (e.g., `"acc.parallel"`, `"acc.kernels"`). This is intended to
+ be solely informational.
+
+ Example:
+
+ ```mlir
+ %w0 = acc.par_width %c128 {par_dim = #acc.par_dim<thread_x>}
+ %w1 = acc.par_width %c8 {par_dim = #acc.par_dim<block_x>}
+ acc.compute_region launch(%arg0 = %w0, %arg1 = %w1)
+ ins(%arg2 = %data) : (memref<1024xf32>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c1024 = arith.constant 1024 : index
+ scf.parallel (%iv) = (%c0) to (%c1024) step (%c1) {
+ %v = memref.load %arg2[%iv] : memref<1024xf32>
+ scf.reduce
+ } {acc.par_dims = #acc<par_dims[thread_x]>}
+ acc.yield
+ } {origin = "acc.parallel"}
+ ```
+ }];
+
+ let arguments = (ins Variadic<OpenACC_ParWidthType>:$launchArgs,
+ Variadic<AnyType>:$inputArgs,
+ Optional<OpenACC_GPUAsyncTokenType>:$stream,
+ StrAttr:$origin,
+ OptionalAttr<FlatSymbolRefAttr>:$kernel_func_name,
+ OptionalAttr<FlatSymbolRefAttr>:$kernel_module_name);
+
+ let results = (outs Variadic<AnyType>:$results);
+
+ let regions = (region AnyRegion:$region);
+
+ let extraClassDeclaration = [{
+ /// Look up the par_width op for the given dimension among launch args.
+ std::optional<mlir::Value> getLaunchArg(
+ ::mlir::acc::GPUParallelDimAttr parDim);
+
+ /// Get the known (non-empty) launch value for a dimension.
+ std::optional<mlir::Value> getKnownLaunchArg(
+ ::mlir::acc::GPUParallelDimAttr parDim);
+
+ /// Get the known constant launch value for a dimension.
+ std::optional<uint64_t> getKnownConstantLaunchArg(
+ ::mlir::acc::GPUParallelDimAttr parDim);
+
+ /// Add a new input argument, appending to both the operand list and
+ /// the region block arguments. Returns the new block argument.
+ ::mlir::BlockArgument appendInputArg(::mlir::Value);
+
+ /// Check whether all parallel dimensions have width 1.
+ bool isEffectivelySerial();
+
+ /// Get the block argument representing the width for a given dimension.
+ ::mlir::BlockArgument parDimToWidth(
+ ::mlir::acc::GPUParallelDimAttr parDim);
+
+ /// Get the block argument for a specific gpu::Processor.
+ ::mlir::BlockArgument gpuParWidth(::mlir::gpu::Processor);
+
+ /// Collect all GPU parallel dimensions present in the launch config.
+ llvm::SmallVector<::mlir::acc::GPUParallelDimAttr> getLaunchParDims();
+
+ /// Get the body block of the compute region.
+ ::mlir::Block *getBody() { return &getRegion().front(); }
+
+ /// Get the terminator of the compute region.
+ ::mlir::Operation *getTerminator() {
+ return &getRegion().back().back();
+ }
+
+ /// Map a block argument back to its corresponding operand
+ /// ($launchArgs or $inputArgs).
+ ::mlir::Value getOperand(::mlir::BlockArgument blockArg);
+ }];
+
+ let hasVerifier = 1;
+ let hasCustomAssemblyFormat = 1;
+}
+
#endif // OPENACC_CG_OPS
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 2bb1654cb636..33d3b84b32b9 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2845,7 +2845,7 @@ def OpenACC_LoopOp
def OpenACC_YieldOp : OpenACC_Op<"yield", [Pure, ReturnLike, Terminator,
ParentOneOf<["FirstprivateRecipeOp, LoopOp, ParallelOp, PrivateRecipeOp,"
"ReductionRecipeOp, ReductionInitOp, ReductionCombineRegionOp,"
- "SerialOp, AtomicUpdateOp"]>]> {
+ "SerialOp, AtomicUpdateOp, ComputeRegionOp"]>]> {
let summary = "Acc yield and termination operation";
let description = [{
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
index 117272693d62..bba385e69c0f 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
@@ -33,4 +33,12 @@ def OpenACC_DeclareTokenType : OpenACC_Type<"DeclareToken", "declare_token"> {
}];
}
+def OpenACC_ParWidthType : OpenACC_Type<"ParWidth", "par_width"> {
+ let summary = "parallel width token type";
+ let description = [{
+ Represents a type that is consumed by a compute region in order to
+ capture its parallelism dimensions arguments.
+ }];
+}
+
#endif // OPENACC_OPS_TYPES
diff --git a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
index 852407292979..daf9b0df4191 100644
--- a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h
@@ -81,7 +81,7 @@ int64_t linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis);
///
/// Let `li = linearIndex`, assuming `strides` are `[s0, .. sn]`, return the
/// vector of int64_t
-/// `[li % s0, (li / s0) % s1, ..., (li / s0 / .. / sn-1) % sn]`
+/// `[li / s0, (li % s0) / s1, ..., (li % s0 % .. % sn-1) / sn]`
SmallVector<int64_t> delinearize(int64_t linearIndex,
ArrayRef<int64_t> strides);
@@ -181,7 +181,7 @@ AffineExpr linearize(MLIRContext *ctx, ArrayRef<AffineExpr> offsets,
///
/// Let `li = linearIndex`, assuming `strides` are `[s0, .. sn]`, return the
/// vector of AffineExpr
-/// `[li % s0, (li / s0) % s1, ..., (li / s0 / .. / sn-1) % sn]`
+/// `[li / s0, (li % s0) / s1, ..., (li % s0 % .. % sn-1) / sn]`
///
/// It is the caller's responsibility to pass proper AffineExpr kind that result
/// in valid AffineExpr (i.e. cannot multiply 2 AffineDimExpr or divide by an
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 6f667f480167..a98073f3c5cf 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -254,6 +254,10 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
"xegpu::DistributeLayoutAttr",
"collapseDims",
(ins "SmallVector<int64_t>": $dimGroup)>,
+ InterfaceMethod<[{Derive a new layout by trasnposing it using `permutation`.}],
+ "xegpu::DistributeLayoutAttr",
+ "transposeDims",
+ (ins "ArrayRef<int64_t>": $permutation)>,
InterfaceMethod<[{Generates instructions to compute multidimensional coordinates for dist units
assigned to a level identified by linearId. The shape parameter
represents the higher-level problem size. Each level may access
@@ -261,56 +265,17 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
"FailureOr<SmallVector<SmallVector<Value>>>",
"computeDistributedCoords",
(ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId, "ArrayRef<int64_t>":$shape)>,
- InterfaceMethod</*desc=*/[{Check if this layout can be achieved by applying a transpose
- to some other layout according to given permutation of (0...n-1).}],
- /*retTy=*/"bool",
- /*methodName=*/"isTransposeOf",
- /*args=*/(ins "const xegpu::DistributeLayoutAttr&": $other, "ArrayRef<int64_t>": $perm),
- /*methodBody=*/[{
- if (!other)
- return false;
- if ($_self.getRank() != other.getRank() || perm.size() != static_cast<size_t>($_self.getRank()))
- return false;
- // Check if the permutation is valid
- if (!isPermutationVector(perm))
- return false;
- auto checkTranspose = [](ArrayRef<int64_t> dst, ArrayRef<int64_t> src, ArrayRef<int64_t> perm) {
- // If both `dst` and `src` are empty, conservatively return true
- // here because some layout fields can be empty.
- if (dst.empty() && src.empty())
- return true;
- for (const auto &ta : llvm::enumerate(perm)) {
- if (src[ta.index()] != dst[ta.value()])
- return false;
- }
- return true;
- };
- // Check sgLayout
- if (!checkTranspose($_self.getEffectiveSgLayoutAsInt(), other.getEffectiveSgLayoutAsInt(), perm))
- return false;
- // Check sgData
- if (!checkTranspose($_self.getEffectiveSgDataAsInt(), other.getEffectiveSgDataAsInt(), perm))
- return false;
- // Check instData
- if (!checkTranspose($_self.getEffectiveInstDataAsInt(), other.getEffectiveInstDataAsInt(), perm))
- return false;
- // Check laneLayout
- if (!checkTranspose($_self.getEffectiveLaneLayoutAsInt(), other.getEffectiveLaneLayoutAsInt(), perm))
- return false;
- // Check laneData
- if (!checkTranspose($_self.getEffectiveLaneDataAsInt(), other.getEffectiveLaneDataAsInt(), perm))
- return false;
- // Check order
- if (!checkTranspose($_self.getEffectiveOrderAsInt(), other.getEffectiveOrderAsInt(), perm))
- return false;
-
- return true;
- }]>,
InterfaceMethod</*desc=*/[{Check if this layout is a slice of another layout.}],
/*retTy=*/"bool",
/*methodName=*/"isSliceOf",
/*args=*/(ins "const xegpu::DistributeLayoutAttr&": $other)>,
-
+ InterfaceMethod</*desc=*/[{Check if this layout is a transpose of
+ the other layout according to given permutation of (0...n-1).}],
+ /*retTy=*/"bool",
+ /*methodName=*/"isTransposeOf",
+ /*args=*/(ins "const xegpu::DistributeLayoutAttr&": $other,
+ "ArrayRef<int64_t>": $perm,
+ "xegpu::LayoutKind": $kind)>,
InterfaceMethod</*desc=*/[{Check if this layout is compatible with another layout
at a specific level of the layout hierarchy. Unlike isEqualTo,
this compares only the effective (non-sliced) fields at the
@@ -498,8 +463,11 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
// avoid every field of the attribute is nullptr, which may lead to segment fault
if (!getInstData() && !getLaneLayout())
return nullptr;
+ // Only preserve order if lane_layout remains, since order requires
+ // sg_layout or lane_layout to be present.
+ auto order = getLaneLayout() ? getOrder() : nullptr;
return LayoutAttr::get(getContext(), nullptr, nullptr, getInstData(),
- getLaneLayout(), getLaneData(), getOrder());
+ getLaneLayout(), getLaneData(), order);
}
LayoutAttr dropInstData() const{
@@ -567,6 +535,9 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
// that are collapsed into a single dimension in the derived layout.
DistributeLayoutAttr collapseDims(SmallVector<int64_t> dimGroup);
+ // Derive a new layout by transposing the layout using `permutation`.
+ DistributeLayoutAttr transposeDims(ArrayRef<int64_t> permutation);
+
/// Delinearizes a linear ID into its multidimensional indices
/// based on the effective level of the layout.
FailureOr<SmallVector<Value>>
@@ -584,6 +555,9 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
/// Check if this layout is equal to another layout.
bool isEqualTo(const xegpu::DistributeLayoutAttr &other);
+
+ /// Check if this layout is a transpose of another layout.
+ bool isTransposeOf(const xegpu::DistributeLayoutAttr &other, ArrayRef<int64_t> perm, const xegpu::LayoutKind kind);
}];
let assemblyFormat = "`<` struct(params) `>`";
@@ -767,6 +741,9 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
// that are collapsed into a single dimension in the derived layout.
DistributeLayoutAttr collapseDims(SmallVector<int64_t> dimGroup);
+ // Derive a new layout by transposing the layout using `permutation`.
+ DistributeLayoutAttr transposeDims(ArrayRef<int64_t> permutation);
+
/// flatten a nested SliceAttr, e.g., for 2-level nested SliceAttr
/// #xegpu.slice<#xegpu.slice<#xegpu.layout<sg_layout = [4, 8, 12]>, dims = [0]>, dims = [0]>
/// it will coalese two slice operations and return a simplified SliceAttr
@@ -792,6 +769,9 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
/// Check if this layout is equal to another layout.
bool isEqualTo(const xegpu::DistributeLayoutAttr &other);
+ /// Check if this layout is a transpose of another layout.
+ bool isTransposeOf(const xegpu::DistributeLayoutAttr &other, ArrayRef<int64_t> perm, const xegpu::LayoutKind kind);
+
/// Drop the slice dims to get the original layout.
SliceAttr dropSliceDims(ArrayRef<int64_t> sliceDimsToDrop);
}];
diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h b/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h
index 3482d1b9401b..2ae0ef3ae852 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h
@@ -81,6 +81,11 @@ DistributeLayoutAttr
inferMultiReductionSourceLayout(DistributeLayoutAttr resLayout,
SmallVector<int64_t> reduceDims);
+/// Infers the source layout attribute for a transpose operation given the
+/// result layout attribute and permutation.
+DistributeLayoutAttr inferTransposeSourceLayout(DistributeLayoutAttr resLayout,
+ ArrayRef<int64_t> permutation);
+
/// Infers the source layout attribute for a bitcast operation given the
/// result layout attribute, result element type bitwidth, and source element
/// type bitwidth.
diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp
index e0e1be35e4e1..e7f561e8a4d6 100644
--- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp
+++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp
@@ -311,32 +311,13 @@ LogicalResult IndexCastOpLowering<OpTy, ExtCastTy>::matchAndRewrite(
if constexpr (std::is_same_v<ExtCastTy, LLVM::ZExtOp>)
isNonNeg = op.getNonNeg();
- bool isExact = op.getExact();
-
- // Map exact to the appropriate overflow flag(s) for truncation:
- // index_cast (signed) exact -> trunc nsw
- // index_castui (unsigned) exact -> trunc nuw
- // index_castui nneg exact -> trunc nuw nsw
- LLVM::IntegerOverflowFlags truncOverflow = LLVM::IntegerOverflowFlags::none;
- if (isExact) {
- if constexpr (std::is_same_v<ExtCastTy, LLVM::SExtOp>) {
- truncOverflow = LLVM::IntegerOverflowFlags::nsw;
- } else {
- truncOverflow = LLVM::IntegerOverflowFlags::nuw;
- if (isNonNeg)
- truncOverflow |= LLVM::IntegerOverflowFlags::nsw;
- }
- }
-
// Handle the scalar and 1D vector cases.
Type operandType = adaptor.getIn().getType();
if (!isa<LLVM::LLVMArrayType>(operandType)) {
Type targetType = this->typeConverter->convertType(resultType);
if (targetBits < sourceBits) {
- auto truncOp = rewriter.replaceOpWithNewOp<LLVM::TruncOp>(
- op, targetType, adaptor.getIn());
- if (isExact)
- truncOp.setOverflowFlags(truncOverflow);
+ rewriter.replaceOpWithNewOp<LLVM::TruncOp>(op, targetType,
+ adaptor.getIn());
} else {
auto extOp = rewriter.replaceOpWithNewOp<ExtCastTy>(op, targetType,
adaptor.getIn());
@@ -354,16 +335,15 @@ LogicalResult IndexCastOpLowering<OpTy, ExtCastTy>::matchAndRewrite(
[&](Type llvm1DVectorTy, ValueRange operands) -> Value {
typename OpTy::Adaptor adaptor(operands);
if (targetBits < sourceBits) {
- auto truncOp = LLVM::TruncOp::create(rewriter, op.getLoc(),
- llvm1DVectorTy, adaptor.getIn());
- if (isExact)
- truncOp.setOverflowFlags(truncOverflow);
- return truncOp;
+ return LLVM::TruncOp::create(rewriter, op.getLoc(), llvm1DVectorTy,
+ adaptor.getIn());
}
auto extOp = ExtCastTy::create(rewriter, op.getLoc(), llvm1DVectorTy,
adaptor.getIn());
- if constexpr (std::is_same_v<ExtCastTy, LLVM::ZExtOp>)
- extOp.setNonNeg(isNonNeg);
+ if constexpr (std::is_same_v<ExtCastTy, LLVM::ZExtOp>) {
+ if (isNonNeg)
+ extOp.setNonNeg(true);
+ }
return extOp;
},
rewriter);
diff --git a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td
index f26af4816ce8..fb9c16db9143 100644
--- a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td
+++ b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td
@@ -288,38 +288,31 @@ def SelectI1ToNot :
// IndexCastOp
//===----------------------------------------------------------------------===//
-// index_cast(index_cast(x, exact)) -> x, if dstType == srcType.
-// The inner exact guarantees the iN -> index conversion is lossless,
-// so the roundtrip through index preserves the value.
+// index_cast(index_cast(x)) -> x, if dstType == srcType.
def IndexCastOfIndexCast :
- Pat<(Arith_IndexCastOp:$res (Arith_IndexCastOp $x, $exact1), $exact2),
+ Pat<(Arith_IndexCastOp:$res (Arith_IndexCastOp $x)),
(replaceWithValue $x),
- [(Constraint<CPred<"$0.getType() == $1.getType()">> $res, $x),
- (Constraint<CPred<"(bool)$0">> $exact1)]>;
+ [(Constraint<CPred<"$0.getType() == $1.getType()">> $res, $x)]>;
// index_cast(extsi(x)) -> index_cast(x)
def IndexCastOfExtSI :
- Pat<(Arith_IndexCastOp (Arith_ExtSIOp $x), $exact),
- (Arith_IndexCastOp $x, $exact)>;
+ Pat<(Arith_IndexCastOp (Arith_ExtSIOp $x)), (Arith_IndexCastOp $x)>;
//===----------------------------------------------------------------------===//
// IndexCastUIOp
//===----------------------------------------------------------------------===//
-// index_castui(index_castui(x, exact)) -> x, if dstType == srcType.
-// The inner exact guarantees the iN -> index conversion is lossless,
-// so the roundtrip through index preserves the value.
+// index_castui(index_castui(x)) -> x, if dstType == srcType.
def IndexCastUIOfIndexCastUI :
- Pat<(Arith_IndexCastUIOp:$res
- (Arith_IndexCastUIOp $x, $nneg1, $exact1), $nneg2, $exact2),
+ Pat<(Arith_IndexCastUIOp:$res (Arith_IndexCastUIOp $x, $nneg1), $nneg2),
(replaceWithValue $x),
- [(Constraint<CPred<"$0.getType() == $1.getType()">> $res, $x),
- (Constraint<CPred<"static_cast<bool>($0)">> $exact1)]>;
+ [(Constraint<CPred<"$0.getType() == $1.getType()">> $res, $x)]>;
// index_castui(extui(x)) -> index_castui(x)
def IndexCastUIOfExtUI :
- Pat<(Arith_IndexCastUIOp (Arith_ExtUIOp $x, $nneg1), $nneg2, $exact),
- (Arith_IndexCastUIOp $x, $nneg1, $exact)>;
+ Pat<(Arith_IndexCastUIOp (Arith_ExtUIOp $x, $nneg1), $nneg2),
+ (Arith_IndexCastUIOp $x, $nneg1)>;
+
//===----------------------------------------------------------------------===//
// BitcastOp
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp
index 881234cdd619..85dba70dbde1 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp
@@ -16,6 +16,7 @@
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Region.h"
@@ -326,6 +327,241 @@ void ReductionCombineOp::getEffects(
}
//===----------------------------------------------------------------------===//
+// ComputeRegionOp
+//===----------------------------------------------------------------------===//
+
+static ParWidthOp getParWidthOpForLaunchArg(ComputeRegionOp op,
+ GPUParallelDimAttr parDim) {
+ for (auto launchArg : op.getLaunchArgs()) {
+ auto parOp = launchArg.getDefiningOp<ParWidthOp>();
+ if (!parOp)
+ continue;
+ auto launchArgDim = cast<GPUParallelDimAttr>(parOp.getParDim());
+ if (launchArgDim == parDim)
+ return parOp;
+ }
+ return nullptr;
+}
+
+std::optional<Value> ComputeRegionOp::getLaunchArg(GPUParallelDimAttr parDim) {
+ if (auto parWidthOp = getParWidthOpForLaunchArg(*this, parDim))
+ return parWidthOp.getResult();
+ return {};
+}
+
+std::optional<Value>
+ComputeRegionOp::getKnownLaunchArg(GPUParallelDimAttr parDim) {
+ if (auto parWidthOp = getParWidthOpForLaunchArg(*this, parDim))
+ if (parWidthOp.getLaunchArg())
+ return parWidthOp.getLaunchArg();
+ return {};
+}
+
+std::optional<uint64_t>
+ComputeRegionOp::getKnownConstantLaunchArg(GPUParallelDimAttr parDim) {
+ auto knownParWidth = getKnownLaunchArg(parDim);
+ if (knownParWidth.has_value())
+ return getConstantIntValue(knownParWidth.value());
+ return {};
+}
+
+BlockArgument ComputeRegionOp::appendInputArg(Value value) {
+ getInputArgsMutable().append(value);
+ return getBody()->addArgument(value.getType(), getLoc());
+}
+
+bool ComputeRegionOp::isEffectivelySerial() {
+ auto *ctx = getContext();
+
+ if (getLaunchArg(GPUParallelDimAttr::seqDim(ctx)))
+ return true;
+
+ auto checkDim = [&](GPUParallelDimAttr dim) -> bool {
+ auto val = getKnownConstantLaunchArg(dim);
+ return val && *val == 1;
+ };
+
+ return checkDim(GPUParallelDimAttr::threadXDim(ctx)) &&
+ checkDim(GPUParallelDimAttr::threadYDim(ctx)) &&
+ checkDim(GPUParallelDimAttr::threadZDim(ctx)) &&
+ checkDim(GPUParallelDimAttr::blockXDim(ctx)) &&
+ checkDim(GPUParallelDimAttr::blockYDim(ctx)) &&
+ checkDim(GPUParallelDimAttr::blockZDim(ctx));
+}
+
+BlockArgument ComputeRegionOp::parDimToWidth(GPUParallelDimAttr parDim) {
+ for (auto [pos, launchArg] : llvm::enumerate(getLaunchArgs())) {
+ auto parOp = launchArg.getDefiningOp<ParWidthOp>();
+ assert(parOp);
+ auto launchArgDim = cast<GPUParallelDimAttr>(parOp.getParDim());
+ if (launchArgDim == parDim) {
+ assert(pos < getRegion().front().getNumArguments() &&
+ "launch arg position out of range");
+ return getRegion().front().getArgument(pos);
+ }
+ }
+ llvm_unreachable("attempting to get unspecified parDim");
+}
+
+SmallVector<GPUParallelDimAttr> ComputeRegionOp::getLaunchParDims() {
+ SmallVector<GPUParallelDimAttr> parDims;
+ for (auto launchArg : getLaunchArgs()) {
+ auto parOp = launchArg.getDefiningOp<ParWidthOp>();
+ auto launchArgDim = cast<GPUParallelDimAttr>(parOp.getParDim());
+ int64_t dimInt = launchArgDim.getValue().getInt();
+ parDims.push_back(intToParDim(getContext(), dimInt));
+ }
+ return parDims;
+}
+
+Value ComputeRegionOp::getOperand(BlockArgument blockArg) {
+ unsigned argNumber = blockArg.getArgNumber();
+ unsigned numLaunchArgs = getLaunchArgs().size();
+ unsigned numInputArgs = getInputArgs().size();
+ assert(argNumber < (numLaunchArgs + numInputArgs) &&
+ "invalid block argument");
+ if (argNumber < numLaunchArgs)
+ return getLaunchArgs()[argNumber];
+ return getInputArgs()[argNumber - numLaunchArgs];
+}
+
+BlockArgument ComputeRegionOp::gpuParWidth(gpu::Processor processor) {
+ return parDimToWidth(GPUParallelDimAttr::get(getContext(), processor));
+}
+
+LogicalResult ComputeRegionOp::verify() {
+ unsigned expectedBlockArgs = getLaunchArgs().size() + getInputArgs().size();
+ unsigned actualBlockArgs = getRegion().front().getNumArguments();
+ if (expectedBlockArgs != actualBlockArgs)
+ return emitOpError("expected ")
+ << expectedBlockArgs << " block arguments (launch + input), got "
+ << actualBlockArgs;
+
+ return success();
+}
+
+void ComputeRegionOp::print(OpAsmPrinter &p) {
+ ValueRange regionArgs = getBody()->getArguments();
+ ValueRange launchArgs = getLaunchArgs();
+ ValueRange inputArgs = getInputArgs();
+
+ assert(regionArgs.size() == (launchArgs.size() + inputArgs.size()) &&
+ "region args mismatch");
+
+ if (getStream())
+ p << " stream(" << getStream() << " : " << getStream().getType() << ")";
+
+ size_t i = 0;
+ if (!launchArgs.empty()) {
+ p << " launch(";
+ for (size_t j = 0; j < launchArgs.size(); ++j, ++i) {
+ p << regionArgs[i] << " = " << launchArgs[j];
+ if (j < launchArgs.size() - 1)
+ p << ", ";
+ }
+ p << ")";
+ }
+ if (!inputArgs.empty()) {
+ p << " ins(";
+ for (size_t j = 0; j < inputArgs.size(); ++j, ++i) {
+ p << regionArgs[i] << " = " << inputArgs[j];
+ if (j < inputArgs.size() - 1)
+ p << ", ";
+ }
+ p << ") : (";
+ for (size_t j = 0; j < inputArgs.size(); ++j) {
+ p << inputArgs[j].getType();
+ if (j < inputArgs.size() - 1)
+ p << ", ";
+ }
+ p << ")";
+ }
+ p.printOptionalArrowTypeList(getResultTypes());
+ p << " ";
+ p.printRegion(getRegion(), /*printEntryBlockArgs=*/false);
+ p.printOptionalAttrDict((*this)->getAttrs(),
+ /*elidedAttrs=*/getOperandSegmentSizeAttr());
+}
+
+ParseResult ComputeRegionOp::parse(OpAsmParser &parser,
+ OperationState &result) {
+ auto &builder = parser.getBuilder();
+
+ SmallVector<OpAsmParser::Argument> regionArgs;
+ OpAsmParser::UnresolvedOperand streamOperand;
+ Type streamType;
+ SmallVector<OpAsmParser::UnresolvedOperand> launchOperands;
+ SmallVector<OpAsmParser::UnresolvedOperand> inputOperands;
+ SmallVector<Type> types;
+
+ bool hasStream = false;
+ if (succeeded(parser.parseOptionalKeyword("stream"))) {
+ hasStream = true;
+ if (parser.parseLParen() || parser.parseOperand(streamOperand) ||
+ parser.parseColon() || parser.parseType(streamType) ||
+ parser.parseRParen())
+ return failure();
+ }
+
+ if (succeeded(parser.parseOptionalKeyword("launch"))) {
+ if (parser.parseAssignmentList(regionArgs, launchOperands))
+ return failure();
+ auto parWidthType = acc::ParWidthType::get(builder.getContext());
+ for (size_t i = 0; i < regionArgs.size(); ++i)
+ types.push_back(parWidthType);
+ }
+
+ if (succeeded(parser.parseOptionalKeyword("ins"))) {
+ if (parser.parseAssignmentList(regionArgs, inputOperands) ||
+ parser.parseColon() || parser.parseLParen() ||
+ parser.parseTypeList(types) || parser.parseRParen())
+ return failure();
+ }
+
+ if (parser.parseOptionalArrowTypeList(result.types))
+ return failure();
+
+ for (auto [iterArg, type] : llvm::zip_equal(regionArgs, types))
+ iterArg.type = type;
+
+ Region *body = result.addRegion();
+ if (parser.parseRegion(*body, regionArgs))
+ return failure();
+
+ const size_t numLaunchOperands = launchOperands.size();
+ const size_t numInputOperands = inputOperands.size();
+ assert(numLaunchOperands + numInputOperands == regionArgs.size() &&
+ "compute region args mismatch");
+
+ result.addAttribute(
+ ComputeRegionOp::getOperandSegmentSizeAttr(),
+ builder.getDenseI32ArrayAttr({static_cast<int32_t>(numLaunchOperands),
+ static_cast<int32_t>(numInputOperands),
+ hasStream ? 1 : 0}));
+
+ for (size_t i = 0; i < numLaunchOperands; ++i) {
+ if (parser.resolveOperand(launchOperands[i], types[i], result.operands))
+ return failure();
+ }
+
+ for (size_t i = numLaunchOperands; i < regionArgs.size(); ++i) {
+ if (parser.resolveOperand(inputOperands[i - numLaunchOperands], types[i],
+ result.operands))
+ return failure();
+ }
+
+ if (hasStream) {
+ if (parser.resolveOperand(streamOperand, streamType, result.operands))
+ return failure();
+ }
+
+ if (parser.parseOptionalAttrDict(result.attributes))
+ return failure();
+
+ return success();
+}
+
+//===----------------------------------------------------------------------===//
// GPUParallelDimAttr
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/OffloadLiveInValueCanonicalization.cpp b/mlir/lib/Dialect/OpenACC/Transforms/OffloadLiveInValueCanonicalization.cpp
index ea7ee715189e..7dbde227b2fa 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/OffloadLiveInValueCanonicalization.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/OffloadLiveInValueCanonicalization.cpp
@@ -135,7 +135,7 @@ static bool isRematerializationCandidate(Value val,
// Trace through view-like operations to find the original value.
Value origVal = getOriginalValue(val);
Operation *definingOp = origVal.getDefiningOp();
- if (!definingOp)
+ if (!definingOp && !(definingOp = val.getDefiningOp()))
return false;
LLVM_DEBUG(llvm::dbgs() << "\tChecking candidate: " << *definingOp << "\n");
@@ -181,6 +181,20 @@ static bool isRematerializationCandidate(Value val,
}
}
+ // An op implementing both ViewLikeOpInterface and
+ // OutlineRematerializationOpInterface may have been traced through by
+ // getOriginalValue. If the traced op is not a candidate, check the direct
+ // defining op of the live-in value.
+ if (origVal != val) {
+ definingOp = val.getDefiningOp();
+ if (definingOp &&
+ isa<acc::OutlineRematerializationOpInterface>(definingOp)) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "\t\t-> OutlineRematerializationOpInterface (direct)\n");
+ return true;
+ }
+ }
+
LLVM_DEBUG(llvm::dbgs() << "\t\t-> not a candidate\n");
return false;
}
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index c082600ec27d..4d412dd92e1b 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -618,24 +618,97 @@ DistributeLayoutAttr LayoutAttr::collapseDims(SmallVector<int64_t> dimGroup) {
SmallVector<int32_t> laneLayout32(laneLayout.begin(), laneLayout.end());
SmallVector<int32_t> laneData32(laneData.begin(), laneData.end());
+ auto toAttr = [&](ArrayRef<int32_t> v) -> DenseI32ArrayAttr {
+ return v.empty() ? nullptr : DenseI32ArrayAttr::get(getContext(), v);
+ };
+
auto collapsedLayout = xegpu::LayoutAttr::get(
- getContext(),
- sgLayout32.empty() ? DenseI32ArrayAttr()
- : DenseI32ArrayAttr::get(getContext(), sgLayout32),
- sgData32.empty() ? DenseI32ArrayAttr()
- : DenseI32ArrayAttr::get(getContext(), sgData32),
- instData32.empty() ? DenseI32ArrayAttr()
- : DenseI32ArrayAttr::get(getContext(), instData32),
- laneLayout32.empty() ? DenseI32ArrayAttr()
- : DenseI32ArrayAttr::get(getContext(), laneLayout32),
- laneData32.empty() ? DenseI32ArrayAttr()
- : DenseI32ArrayAttr::get(getContext(), laneData32),
- collapsedOrder.empty()
- ? DenseI32ArrayAttr()
- : DenseI32ArrayAttr::get(getContext(), collapsedOrder));
+ getContext(), toAttr(sgLayout32), toAttr(sgData32), toAttr(instData32),
+ toAttr(laneLayout32), toAttr(laneData32), toAttr(collapsedOrder));
return collapsedLayout;
}
+// Derive a new layout by transpose the layout using `permutation`.
+DistributeLayoutAttr LayoutAttr::transposeDims(ArrayRef<int64_t> permutation) {
+
+ SmallVector<int64_t> origSgLayout = getEffectiveSgLayoutAsInt();
+ SmallVector<int64_t> origSgData = getEffectiveSgDataAsInt();
+ SmallVector<int64_t> origInstData = getEffectiveInstDataAsInt();
+ SmallVector<int64_t> origLaneLayout = getEffectiveLaneLayoutAsInt();
+ SmallVector<int64_t> origLaneData = getEffectiveLaneDataAsInt();
+ SmallVector<int64_t> origOrder = getEffectiveOrderAsInt();
+
+ SmallVector<int32_t> sgLayout;
+ SmallVector<int32_t> sgData;
+ SmallVector<int32_t> instData;
+ SmallVector<int32_t> laneLayout;
+ SmallVector<int32_t> laneData;
+ SmallVector<int32_t> order;
+
+ for (int64_t idx : permutation) {
+ if (!origLaneLayout.empty()) {
+ laneLayout.push_back(static_cast<int32_t>(origLaneLayout[idx]));
+ laneData.push_back(static_cast<int32_t>(origLaneData[idx]));
+ }
+ if (!origInstData.empty())
+ instData.push_back(static_cast<int32_t>(origInstData[idx]));
+ if (!origSgLayout.empty()) {
+ sgLayout.push_back(static_cast<int32_t>(origSgLayout[idx]));
+ sgData.push_back(static_cast<int32_t>(origSgData[idx]));
+ }
+ order.push_back(static_cast<int32_t>(origOrder[idx]));
+ }
+ if (origLaneLayout.empty() && origSgLayout.empty())
+ order.clear();
+
+ auto toAttr = [&](ArrayRef<int32_t> v) -> DenseI32ArrayAttr {
+ return v.empty() ? nullptr : DenseI32ArrayAttr::get(getContext(), v);
+ };
+ return xegpu::LayoutAttr::get(getContext(), toAttr(sgLayout), toAttr(sgData),
+ toAttr(instData), toAttr(laneLayout),
+ toAttr(laneData), toAttr(order));
+}
+
+/// Check if this layout is a transpose of another layout.
+bool LayoutAttr::isTransposeOf(const xegpu::DistributeLayoutAttr &other,
+ ArrayRef<int64_t> perm,
+ const xegpu::LayoutKind kind) {
+ if (!other)
+ return false;
+ if (getRank() != other.getRank() ||
+ perm.size() != static_cast<size_t>(getRank()))
+ return false;
+ if (!isPermutationVector(perm))
+ return false;
+ auto checkTranspose = [](ArrayRef<int64_t> dst, ArrayRef<int64_t> src,
+ ArrayRef<int64_t> perm) {
+ for (const auto &ta : llvm::enumerate(perm)) {
+ if (src[ta.index()] != dst[ta.value()])
+ return false;
+ }
+ return true;
+ };
+ if (kind == xegpu::LayoutKind::Subgroup)
+ return checkTranspose(getEffectiveSgLayoutAsInt(),
+ other.getEffectiveSgLayoutAsInt(), perm) &&
+ checkTranspose(getEffectiveSgDataAsInt(),
+ other.getEffectiveSgDataAsInt(), perm) &&
+ checkTranspose(getEffectiveOrderAsInt(),
+ other.getEffectiveOrderAsInt(), perm);
+ if (kind == xegpu::LayoutKind::InstData)
+ return checkTranspose(getEffectiveInstDataAsInt(),
+ other.getEffectiveInstDataAsInt(), perm);
+ if (kind == xegpu::LayoutKind::Lane)
+ return checkTranspose(getEffectiveLaneLayoutAsInt(),
+ other.getEffectiveLaneLayoutAsInt(), perm) &&
+ checkTranspose(getEffectiveLaneDataAsInt(),
+ other.getEffectiveLaneDataAsInt(), perm) &&
+ checkTranspose(getEffectiveOrderAsInt(),
+ other.getEffectiveOrderAsInt(), perm);
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_SliceAttr
//===----------------------------------------------------------------------===//
@@ -881,6 +954,62 @@ DistributeLayoutAttr SliceAttr::collapseDims(SmallVector<int64_t> dimGroup) {
DenseI64ArrayAttr::get(getContext(), sliceDims));
}
+SmallVector<int64_t> getPermForParentLayout(ArrayRef<int64_t> sliceDims,
+ ArrayRef<int64_t> permutation) {
+ SmallVector<int64_t> sortedSliceDims = llvm::to_vector(sliceDims);
+ llvm::sort(sortedSliceDims);
+
+ for (size_t i = 1; i < sortedSliceDims.size(); ++i) {
+ assert((sortedSliceDims[i] == sortedSliceDims[i - 1] + 1) &&
+ "slice dims non consecutive, cannot be transposed");
+ }
+
+ SmallVector<int64_t> permForParent;
+ if (sortedSliceDims.front() == 0) {
+ // Example: sliceDims.size() = 2, permutation= {1, 0}
+ // result: {3, 2, 1, 0}.
+ for (int64_t dim : permutation)
+ permForParent.push_back(dim + sortedSliceDims.size());
+ for (int64_t i = sortedSliceDims.size() - 1; i >= 0; --i)
+ permForParent.push_back(i);
+ } else {
+ // Example: sliceDims.size() = 2, permutation = {0, 1}
+ // result: {3, 2, 0, 1}.
+ for (int64_t i = sortedSliceDims.size() - 1; i >= 0; --i)
+ permForParent.push_back(i + permutation.size());
+ for (int64_t dim : permutation)
+ permForParent.push_back(dim);
+ }
+ return permForParent;
+}
+
+// Derive a new layout by transpose the layout using `permutation`.
+DistributeLayoutAttr SliceAttr::transposeDims(ArrayRef<int64_t> permutation) {
+ SmallVector<int64_t> sliceDims = llvm::to_vector(getDims().asArrayRef());
+ DistributeLayoutAttr parent = getParent();
+ SmallVector<int64_t> permForParent =
+ getPermForParentLayout(sliceDims, permutation);
+ auto transposedParent = parent.transposeDims(permForParent);
+ return SliceAttr::get(getContext(), transposedParent,
+ DenseI64ArrayAttr::get(getContext(), sliceDims));
+}
+
+/// Check if this layout is a transpose of another layout.
+bool SliceAttr::isTransposeOf(const xegpu::DistributeLayoutAttr &other,
+ ArrayRef<int64_t> perm,
+ const xegpu::LayoutKind kind) {
+ // other must be a SliceAttr with the same slice dims.
+ auto otherSlice = dyn_cast<xegpu::SliceAttr>(other);
+ if (!otherSlice || getDims() != otherSlice.getDims())
+ return false;
+ // check whether the parent layout is transpose of each other.
+ SmallVector<int64_t> sliceDims = llvm::to_vector(getDims().asArrayRef());
+ DistributeLayoutAttr parent = getParent();
+ SmallVector<int64_t> permForParent = getPermForParentLayout(sliceDims, perm);
+ auto otherParent = otherSlice.getParent();
+ return parent.isTransposeOf(otherParent, permForParent, kind);
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_RangeAttr
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp
index 7aa186bb2222..432886db29d2 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp
@@ -178,6 +178,14 @@ xegpu::inferMultiReductionSourceLayout(xegpu::DistributeLayoutAttr resLayout,
return sliceLayout.getParent();
}
+/// Infers the source layout attribute for a transpose operation given the
+/// result layout attribute and permutation.
+xegpu::DistributeLayoutAttr
+xegpu::inferTransposeSourceLayout(xegpu::DistributeLayoutAttr resLayout,
+ ArrayRef<int64_t> permutation) {
+ return resLayout.transposeDims(permutation);
+}
+
/// Infers the source layout attribute for a bitcast operation given the
/// result layout attribute, result element type bitwidth, and source element
/// type bitwidth.
@@ -1144,6 +1152,16 @@ xegpu::DistributeLayoutAttr xegpu::getConsumerLayoutAt(OpOperand &operand) {
if (idx == 1)
return resLayout;
}
+
+ // For vector::TransposeOp, infer source layout from result layout using
+ // permutation.
+ if (auto transpose = dyn_cast<vector::TransposeOp>(op)) {
+ if (!resLayout)
+ return xegpu::DistributeLayoutAttr();
+ return xegpu::inferTransposeSourceLayout(resLayout,
+ transpose.getPermutation());
+ }
+
// For elementwise operations, all operands must have the same layout as the
// result.
if (OpTrait::hasElementwiseMappableTraits(op) && op->getNumResults() == 1) {
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 7f7e8d6ad773..ab8f7e768ec1 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -912,9 +912,12 @@ void LayoutInfoPropagation::visitTransposeOp(
LayoutInfo resultLayout = results[0]->getValue();
if (!resultLayout.isAssigned())
return;
- LayoutInfo newLayout = resultLayout.transpose(transpose.getPermutation());
+ auto consumerLayoutAttr =
+ dyn_cast<xegpu::DistributeLayoutAttr>(resultLayout.get());
+ auto srcLayoutAttr = xegpu::inferTransposeSourceLayout(
+ consumerLayoutAttr, transpose.getPermutation());
// Propagate the new layout to the vector operand.
- propagateIfChanged(operands[0], operands[0]->meet(newLayout));
+ propagateIfChanged(operands[0], operands[0]->meet(LayoutInfo(srcLayoutAttr)));
}
/// For vector::BitCastOp, the lane_data of the source layout is changed based
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index bf9fded8a3ab..38bc95d39c2c 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -1963,7 +1963,8 @@ struct VectorTransposeDistribution final : public gpu::WarpDistributionPattern {
"does not have 2D layout");
ArrayRef<int64_t> perm = transposeOp.getPermutation();
// Result layout must be a transpose of source layout.
- if (!resultLayout.isTransposeOf(sourceLayout, perm))
+ if (!resultLayout.isTransposeOf(sourceLayout, perm,
+ xegpu::LayoutKind::Lane))
return rewriter.notifyMatchFailure(
transposeOp,
"the source or result vector layouts must be 2D transposes of each "
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 30e4a956a0ad..139a30e76854 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -1532,7 +1532,8 @@ struct WgToSgVectorTransposeOp
// Check that sgLayout, sgData & order are properly transposed for source
// and result
- if (!layout.isTransposeOf(sourceLayout, permutation))
+ if (!layout.isTransposeOf(sourceLayout, permutation,
+ xegpu::LayoutKind::Subgroup))
return rewriter.notifyMatchFailure(
op, "Result layout is not a valid transpose of source layout "
"according to permutation");
@@ -1540,13 +1541,13 @@ struct WgToSgVectorTransposeOp
SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
VectorType newResultType =
VectorType::get(sgShape, resultType.getElementType());
+
SmallVector<Value> newTransposeOps;
for (auto src : adaptor.getVector()) {
auto newTranspose = vector::TransposeOp::create(
rewriter, op.getLoc(), newResultType, src, permutation);
newTransposeOps.push_back(newTranspose.getResult());
}
-
rewriter.replaceOpWithMultiple(op, {newTransposeOps});
return success();
}
diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir
index 2845df23293d..47069906fa11 100644
--- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir
+++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir
@@ -160,58 +160,6 @@ func.func @index_castui_nneg_not_set(%arg0: i1) {
// -----
-// index_cast exact on truncation lowers to trunc nsw (signed semantics).
-// CHECK-LABEL: @index_cast_exact_trunc
-func.func @index_cast_exact_trunc(%arg0: index) {
-// CHECK: llvm.trunc %{{.*}} overflow<nsw> : i{{.*}} to i1
- %0 = arith.index_cast %arg0 exact : index to i1
- return
-}
-
-// -----
-
-// index_cast exact on widening: exact is vacuously true, sext has no flag.
-// CHECK-LABEL: @index_cast_exact_ext
-func.func @index_cast_exact_ext(%arg0: i1) {
-// CHECK: llvm.sext %{{.*}} : i1 to i{{.*}}
-// CHECK-NOT: nsw
- %0 = arith.index_cast %arg0 exact : i1 to index
- return
-}
-
-// -----
-
-// index_castui exact on truncation lowers to trunc nuw (unsigned semantics).
-// CHECK-LABEL: @index_castui_exact_trunc
-func.func @index_castui_exact_trunc(%arg0: index) {
-// CHECK: llvm.trunc %{{.*}} overflow<nuw> : i{{.*}} to i1
- %0 = arith.index_castui %arg0 exact : index to i1
- return
-}
-
-// -----
-
-// index_castui nneg exact on truncation lowers to trunc nuw nsw.
-// CHECK-LABEL: @index_castui_nneg_exact_trunc
-func.func @index_castui_nneg_exact_trunc(%arg0: index) {
-// CHECK: llvm.trunc %{{.*}} overflow<nsw, nuw> : i{{.*}} to i1
- %0 = arith.index_castui %arg0 nneg exact : index to i1
- return
-}
-
-// -----
-
-// index_castui exact on widening: exact is vacuously true, zext has no flag.
-// CHECK-LABEL: @index_castui_exact_ext
-func.func @index_castui_exact_ext(%arg0: i1) {
-// CHECK: llvm.zext %{{.*}} : i1 to i{{.*}}
-// CHECK-NOT: nuw
- %0 = arith.index_castui %arg0 exact : i1 to index
- return
-}
-
-// -----
-
// Checking conversion of signed integer types to floating point.
// CHECK-LABEL: @sitofp
func.func @sitofp(%arg0 : i32, %arg1 : i64) {
diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir
index 326afcae696c..035c10e78bf9 100644
--- a/mlir/test/Dialect/Arith/canonicalize.mlir
+++ b/mlir/test/Dialect/Arith/canonicalize.mlir
@@ -588,15 +588,6 @@ func.func @indexCastOfSignExtend(%arg0: i8) -> index {
return %idx : index
}
-// CHECK-LABEL: @indexCastOfSignExtend_exact
-// CHECK: %[[res:.+]] = arith.index_cast %arg0 exact : i8 to index
-// CHECK: return %[[res]]
-func.func @indexCastOfSignExtend_exact(%arg0: i8) -> index {
- %ext = arith.extsi %arg0 : i8 to i16
- %idx = arith.index_cast %ext exact : i16 to index
- return %idx : index
-}
-
// CHECK-LABEL: @indexCastUIOfUnsignedExtend
// CHECK: %[[res:.+]] = arith.index_castui %arg0 : i8 to index
// CHECK: return %[[res]]
@@ -625,61 +616,6 @@ func.func @indexCastUIOfUnsignedExtend_nneg_on_castui(%arg0: i8) -> index {
return %idx : index
}
-// CHECK-LABEL: @indexCastUIOfUnsignedExtend_exact
-// CHECK: %[[res:.+]] = arith.index_castui %arg0 exact : i8 to index
-// CHECK: return %[[res]]
-func.func @indexCastUIOfUnsignedExtend_exact(%arg0: i8) -> index {
- %ext = arith.extui %arg0 : i8 to i16
- %idx = arith.index_castui %ext exact : i16 to index
- return %idx : index
-}
-
-// CHECK-LABEL: @indexCastUIOfUnsignedExtend_nneg_exact
-// CHECK: %[[res:.+]] = arith.index_castui %arg0 exact nneg : i8 to index
-// CHECK: return %[[res]]
-func.func @indexCastUIOfUnsignedExtend_nneg_exact(%arg0: i8) -> index {
- %ext = arith.extui %arg0 nneg : i8 to i16
- %idx = arith.index_castui %ext exact : i16 to index
- return %idx : index
-}
-
-// index_castui(index_castui(x)) -> x only when exact is on the inner cast.
-// CHECK-LABEL: @indexCastUIOfIndexCastUI_no_exact
-// CHECK: arith.index_castui
-// CHECK: arith.index_castui
-func.func @indexCastUIOfIndexCastUI_no_exact(%arg0: i32) -> i32 {
- %idx = arith.index_castui %arg0 : i32 to index
- %res = arith.index_castui %idx : index to i32
- return %res : i32
-}
-
-// CHECK-LABEL: @indexCastUIOfIndexCastUI_exact_inner
-// CHECK: return %arg0 : i32
-func.func @indexCastUIOfIndexCastUI_exact_inner(%arg0: i32) -> i32 {
- %idx = arith.index_castui %arg0 exact : i32 to index
- %res = arith.index_castui %idx : index to i32
- return %res : i32
-}
-
-// exact on outer only does NOT trigger the fold (outer exact on widening
-// is vacuously true and does not guarantee the inner truncation is lossless).
-// CHECK-LABEL: @indexCastUIOfIndexCastUI_exact_outer
-// CHECK: arith.index_castui
-// CHECK: arith.index_castui
-func.func @indexCastUIOfIndexCastUI_exact_outer(%arg0: i32) -> i32 {
- %idx = arith.index_castui %arg0 : i32 to index
- %res = arith.index_castui %idx exact : index to i32
- return %res : i32
-}
-
-// CHECK-LABEL: @indexCastUIOfIndexCastUI_exact_both
-// CHECK: return %arg0 : i32
-func.func @indexCastUIOfIndexCastUI_exact_both(%arg0: i32) -> i32 {
- %idx = arith.index_castui %arg0 exact : i32 to index
- %res = arith.index_castui %idx exact : index to i32
- return %res : i32
-}
-
// CHECK-LABEL: @indexCastFold
// CHECK: %[[res:.*]] = arith.constant -2 : index
// CHECK: return %[[res]]
diff --git a/mlir/test/Dialect/Arith/ops.mlir b/mlir/test/Dialect/Arith/ops.mlir
index a9eabe97ebfc..9765db69d6dd 100644
--- a/mlir/test/Dialect/Arith/ops.mlir
+++ b/mlir/test/Dialect/Arith/ops.mlir
@@ -909,20 +909,6 @@ func.func @test_index_cast_scalable_vector1(%arg0 : vector<[8]xindex>) -> vector
return %0 : vector<[8]xi64>
}
-// CHECK-LABEL: test_index_cast_exact
-// CHECK: arith.index_cast %{{.*}} exact : i32 to index
-func.func @test_index_cast_exact(%arg0 : i32) -> index {
- %0 = arith.index_cast %arg0 exact : i32 to index
- return %0 : index
-}
-
-// CHECK-LABEL: test_index_cast_exact_vector
-// CHECK: arith.index_cast %{{.*}} exact : vector<8xi32> to vector<8xindex>
-func.func @test_index_cast_exact_vector(%arg0 : vector<8xi32>) -> vector<8xindex> {
- %0 = arith.index_cast %arg0 exact : vector<8xi32> to vector<8xindex>
- return %0 : vector<8xindex>
-}
-
// CHECK-LABEL: test_index_castui0
func.func @test_index_castui0(%arg0 : i32) -> index {
%0 = arith.index_castui %arg0 : i32 to index
@@ -985,20 +971,6 @@ func.func @test_index_castui_nneg_vector(%arg0 : vector<8xi32>) -> vector<8xinde
return %0 : vector<8xindex>
}
-// CHECK-LABEL: test_index_castui_exact
-// CHECK: arith.index_castui %{{.*}} exact : i32 to index
-func.func @test_index_castui_exact(%arg0 : i32) -> index {
- %0 = arith.index_castui %arg0 exact : i32 to index
- return %0 : index
-}
-
-// CHECK-LABEL: test_index_castui_nneg_exact
-// CHECK: arith.index_castui %{{.*}} exact nneg : i32 to index
-func.func @test_index_castui_nneg_exact(%arg0 : i32) -> index {
- %0 = arith.index_castui %arg0 nneg exact : i32 to index
- return %0 : index
-}
-
// CHECK-LABEL: test_bitcast0
func.func @test_bitcast0(%arg0 : i64) -> f64 {
%0 = arith.bitcast %arg0 : i64 to f64
diff --git a/mlir/test/Dialect/OpenACC/invalid-cg.mlir b/mlir/test/Dialect/OpenACC/invalid-cg.mlir
index bc2408ceafe8..f788e6c03bcc 100644
--- a/mlir/test/Dialect/OpenACC/invalid-cg.mlir
+++ b/mlir/test/Dialect/OpenACC/invalid-cg.mlir
@@ -19,3 +19,23 @@ scf.parallel (%iv) = (%c0_2) to (%c4_2) step (%c1_2) {
scf.reduce
// expected-error@+1 {{expected one of ::mlir::gpu::Processor enum names}}
} {acc.par_dims = #acc<par_dims[gang]>}
+
+// -----
+
+// expected-note@+1 {{prior use here}}
+%c32 = arith.constant 32 : index
+// expected-error@+1 {{use of value '%c32' expects different type than prior uses: '!acc.par_width' vs 'index'}}
+acc.compute_region launch(%arg0 = %c32) {
+ acc.yield
+} {origin = "acc.parallel"}
+
+// -----
+
+// Use generic form to introduce an extra block argument.
+%c64 = arith.constant 64 : index
+%w = acc.par_width %c64 {par_dim = #acc.par_dim<thread_x>}
+// expected-error@+1 {{'acc.compute_region' op expected 1 block arguments (launch + input), got 2}}
+"acc.compute_region"(%w) <{operandSegmentSizes = array<i32: 1, 0, 0>}> ({
+^bb0(%arg0: index, %extra: index):
+ "acc.yield"() : () -> ()
+}) {origin = "acc.parallel"} : (!acc.par_width) -> ()
diff --git a/mlir/test/Dialect/OpenACC/ops-cg.mlir b/mlir/test/Dialect/OpenACC/ops-cg.mlir
index e6453da21ed7..7a61261d97ba 100644
--- a/mlir/test/Dialect/OpenACC/ops-cg.mlir
+++ b/mlir/test/Dialect/OpenACC/ops-cg.mlir
@@ -77,3 +77,197 @@ func.func @par_dims_2d_grid() {
return
}
// CHECK: acc.par_dims = #acc<par_dims[block_y, thread_y]>
+
+// -----
+
+// CHECK-LABEL: func @compute_region_single_dim
+func.func @compute_region_single_dim(%data: memref<1024xf32>,
+ %result: memref<f32>) {
+ %c128 = arith.constant 128 : index
+ %copyin = acc.copyin varPtr(%data : memref<1024xf32>) -> memref<1024xf32>
+ %copy = acc.copyin varPtr(%result : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copy>}
+ acc.kernel_environment dataOperands(%copyin, %copy : memref<1024xf32>, memref<f32>) {
+ %w0 = acc.par_width %c128 {par_dim = #acc.par_dim<thread_x>}
+ acc.compute_region launch(%arg0 = %w0)
+ ins(%arg1 = %copyin, %arg2 = %copy) : (memref<1024xf32>, memref<f32>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c128_inner = arith.constant 128 : index
+ %cst = arith.constant 0.000000e+00 : f32
+ memref.store %cst, %arg2[] : memref<f32>
+ scf.parallel (%iv) = (%c0) to (%c128_inner) step (%c1) {
+ %val = memref.load %arg1[%iv] : memref<1024xf32>
+ %cur = memref.load %arg2[] : memref<f32>
+ %sum = arith.addf %cur, %val : f32
+ memref.store %sum, %arg2[] : memref<f32>
+ scf.reduce
+ } {acc.par_dims = #acc<par_dims[thread_x]>}
+ acc.yield
+ } {origin = "acc.parallel"}
+ }
+ acc.copyout accPtr(%copy : memref<f32>) to varPtr(%result : memref<f32>) {dataClause = #acc<data_clause acc_copy>}
+ acc.delete accPtr(%copyin : memref<1024xf32>)
+ return
+}
+// CHECK: %[[W:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<thread_x>}
+// CHECK: acc.compute_region launch(%{{.*}} = %[[W]]) ins({{.*}}) : (memref<1024xf32>, memref<f32>) {
+// CHECK: acc.yield
+// CHECK: } {origin = "acc.parallel"}
+
+// -----
+
+// CHECK-LABEL: func @compute_region_two_dims
+func.func @compute_region_two_dims(%data: memref<8xi32>,
+ %reduction_var: memref<i32>) {
+ %c8 = arith.constant 8 : index
+ %c128 = arith.constant 128 : index
+ %copyin_data = acc.copyin varPtr(%data : memref<8xi32>) -> memref<8xi32>
+ %copyin_red = acc.copyin varPtr(%reduction_var : memref<i32>) -> memref<i32> {dataClause = #acc<data_clause acc_reduction>}
+ acc.kernel_environment dataOperands(%copyin_data, %copyin_red : memref<8xi32>, memref<i32>) {
+ %w0 = acc.par_width %c8 {par_dim = #acc.par_dim<block_x>}
+ %w1 = acc.par_width %c128 {par_dim = #acc.par_dim<thread_x>}
+ acc.compute_region launch(%arg0 = %w0, %arg1 = %w1)
+ ins(%arg2 = %copyin_data, %arg3 = %copyin_red) : (memref<8xi32>, memref<i32>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c8_inner = arith.constant 8 : index
+ %c0_i32 = arith.constant 0 : i32
+ %init = acc.reduction_init %arg3 <add> : memref<i32> {
+ %alloca = memref.alloca() : memref<i32>
+ memref.store %c0_i32, %alloca[] : memref<i32>
+ acc.yield %alloca : memref<i32>
+ }
+ scf.parallel (%iv) = (%c0) to (%c8_inner) step (%c1) {
+ %v = memref.load %arg2[%iv] : memref<8xi32>
+ %cur = memref.load %init[] : memref<i32>
+ %sum = arith.addi %cur, %v : i32
+ memref.store %sum, %init[] : memref<i32>
+ scf.reduce
+ } {acc.par_dims = #acc<par_dims[block_x, thread_x]>}
+ acc.reduction_combine %init into %arg3 <add> : memref<i32>
+ acc.yield
+ } {origin = "acc.parallel"}
+ }
+ acc.copyout accPtr(%copyin_red : memref<i32>) to varPtr(%reduction_var : memref<i32>) {dataClause = #acc<data_clause acc_reduction>}
+ acc.delete accPtr(%copyin_data : memref<8xi32>)
+ return
+}
+// CHECK: %[[W0:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<block_x>}
+// CHECK: %[[W1:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<thread_x>}
+// CHECK: acc.compute_region launch(%{{.*}} = %[[W0]], %{{.*}} = %[[W1]]) ins({{.*}}) : (memref<8xi32>, memref<i32>) {
+// CHECK: acc.yield
+// CHECK: } {origin = "acc.parallel"}
+
+// -----
+
+// CHECK-LABEL: func @compute_region_unknown_width
+func.func @compute_region_unknown_width(%data: memref<100xf32>) {
+ %copyin = acc.copyin varPtr(%data : memref<100xf32>) -> memref<100xf32>
+ acc.kernel_environment dataOperands(%copyin : memref<100xf32>) {
+ %w0 = acc.par_width {par_dim = #acc.par_dim<thread_x>}
+ acc.compute_region launch(%arg0 = %w0)
+ ins(%arg1 = %copyin) : (memref<100xf32>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c100 = arith.constant 100 : index
+ scf.parallel (%iv) = (%c0) to (%c100) step (%c1) {
+ scf.reduce
+ } {acc.par_dims = #acc<par_dims[thread_x]>}
+ acc.yield
+ } {origin = "acc.kernels"}
+ }
+ acc.delete accPtr(%copyin : memref<100xf32>)
+ return
+}
+// CHECK: %[[W:.*]] = acc.par_width {par_dim = #acc.par_dim<thread_x>}
+// CHECK: acc.compute_region launch(%{{.*}} = %[[W]]) ins({{.*}}) : (memref<100xf32>) {
+// CHECK: acc.yield
+// CHECK: } {origin = "acc.kernels"}
+
+// -----
+
+// CHECK-LABEL: func @compute_region_no_launch
+func.func @compute_region_no_launch(%a: memref<i32>, %b: memref<i32>) {
+ %copy_a = acc.copyin varPtr(%a : memref<i32>) -> memref<i32> {dataClause = #acc<data_clause acc_copy>}
+ %copy_b = acc.copyin varPtr(%b : memref<i32>) -> memref<i32> {dataClause = #acc<data_clause acc_copy>}
+ acc.kernel_environment dataOperands(%copy_a, %copy_b : memref<i32>, memref<i32>) {
+ acc.compute_region
+ ins(%arg0 = %copy_a, %arg1 = %copy_b) : (memref<i32>, memref<i32>) {
+ %c1 = arith.constant 1 : i32
+ memref.store %c1, %arg0[] : memref<i32>
+ memref.store %c1, %arg1[] : memref<i32>
+ acc.yield
+ } {origin = "acc.serial"}
+ }
+ acc.copyout accPtr(%copy_a : memref<i32>) to varPtr(%a : memref<i32>) {dataClause = #acc<data_clause acc_copy>}
+ acc.copyout accPtr(%copy_b : memref<i32>) to varPtr(%b : memref<i32>) {dataClause = #acc<data_clause acc_copy>}
+ return
+}
+// CHECK: acc.compute_region ins({{.*}}) : (memref<i32>, memref<i32>) {
+// CHECK: acc.yield
+// CHECK: } {origin = "acc.serial"}
+
+// -----
+
+// CHECK-LABEL: func @compute_region_launch_only
+func.func @compute_region_launch_only() {
+ %c32 = arith.constant 32 : index
+ %w0 = acc.par_width %c32 {par_dim = #acc.par_dim<thread_x>}
+ acc.compute_region launch(%arg0 = %w0) {
+ acc.yield
+ } {origin = "acc.parallel"}
+ return
+}
+// CHECK: %[[W:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<thread_x>}
+// CHECK: acc.compute_region launch(%{{.*}} = %[[W]]) {
+// CHECK: acc.yield
+// CHECK: } {origin = "acc.parallel"}
+
+// -----
+
+// CHECK-LABEL: func @compute_region_all_fields
+// CHECK-SAME: (%{{.*}}: memref<1024xf32>, %[[STREAM:.*]]: !gpu.async.token)
+func.func @compute_region_all_fields(%data: memref<1024xf32>,
+ %stream: !gpu.async.token) {
+ %c128 = arith.constant 128 : index
+ %c8 = arith.constant 8 : index
+ %copyin = acc.copyin varPtr(%data : memref<1024xf32>) -> memref<1024xf32>
+ acc.kernel_environment dataOperands(%copyin : memref<1024xf32>) {
+ %w0 = acc.par_width %c8 {par_dim = #acc.par_dim<block_x>}
+ %w1 = acc.par_width %c128 {par_dim = #acc.par_dim<thread_x>}
+ acc.compute_region stream(%stream : !gpu.async.token)
+ launch(%arg0 = %w0, %arg1 = %w1)
+ ins(%arg2 = %copyin) : (memref<1024xf32>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c1024 = arith.constant 1024 : index
+ scf.parallel (%iv) = (%c0) to (%c1024) step (%c1) {
+ scf.reduce
+ } {acc.par_dims = #acc<par_dims[block_x, thread_x]>}
+ acc.yield
+ } {kernel_func_name = @compute_kernel, kernel_module_name = @device_module, origin = "acc.parallel"}
+ }
+ acc.delete accPtr(%copyin : memref<1024xf32>)
+ return
+}
+// CHECK: %[[W0:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<block_x>}
+// CHECK: %[[W1:.*]] = acc.par_width %{{.*}} {par_dim = #acc.par_dim<thread_x>}
+// CHECK: acc.compute_region stream(%[[STREAM]] : !gpu.async.token) launch(%{{.*}} = %[[W0]], %{{.*}} = %[[W1]]) ins({{.*}}) : (memref<1024xf32>) {
+// CHECK: acc.yield
+// CHECK: } {kernel_func_name = @compute_kernel, kernel_module_name = @device_module, origin = "acc.parallel"}
+
+// -----
+
+// CHECK-LABEL: func @compute_region_with_results
+func.func @compute_region_with_results() -> i32 {
+ %w0 = acc.par_width {par_dim = #acc.par_dim<thread_x>}
+ %0 = acc.compute_region launch(%arg0 = %w0) -> i32 {
+ %c0_i32 = arith.constant 0 : i32
+ acc.yield %c0_i32 : i32
+ } {origin = "acc.parallel"}
+ return %0 : i32
+}
+// CHECK: %[[W:.*]] = acc.par_width {par_dim = #acc.par_dim<thread_x>}
+// CHECK: {{.*}} = acc.compute_region launch(%{{.*}} = %[[W]]) -> i32 {
+// CHECK: acc.yield
+// CHECK: } {origin = "acc.parallel"}
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index c073045691f5..ffbe95b2a6f8 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -58,12 +58,12 @@ gpu.module @test {
gpu.func @vector_transpose(%src: memref<256x128xf32>, %src1: memref<128x256xf32>) kernel attributes
{known_block_size = array<i32: 1, 32, 16>} {
// CHECK: %[[TDESC_LD:.*]] = xegpu.create_nd_tdesc %[[ARG_0]] : memref<256x128xf32> ->
- // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>>
+ // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], order = [0, 1]>>
// CHECK: %[[TDESC_ST:.*]] = xegpu.create_nd_tdesc %[[ARG_1]] : memref<128x256xf32> ->
// CHECK-SAME: !xegpu.tensor_desc<128x256xf32, #xegpu.layout<sg_layout = [4, 8], sg_data = [32, 32]>>
- // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC_LD]][0, 0] <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>}> :
- // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>> -> vector<256x128xf32>
+ // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC_LD]][0, 0] <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], order = [0, 1]>}> :
+ // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], order = [0, 1]>> -> vector<256x128xf32>
// CHECK: %[[TRANSPOSED:.*]] = vector.transpose %2, [1, 0]
// CHECK-SAME {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [32, 32]>} : vector<256x128xf32> to vector<128x256xf32>
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout.mlir b/mlir/test/Dialect/XeGPU/propagate-layout.mlir
index 4f2349a89b1e..3253d0004caf 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout.mlir
@@ -278,9 +278,9 @@ func.func @vector_bitcast_i16_to_f16(%arg0: memref<8x16xi16>, %arg1: memref<16x1
// -----
gpu.module @test {
// CHECK-LABEL: func.func @vector_bitcast_i32_to_f16(
-// CHECK: %[[LOAD:.*]] = xegpu.load_nd %{{.*}} <{layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}>
-// CHECK-SAME: !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>> -> vector<16x8xi32>
-// CHECK-NEXT: %{{.*}} = vector.bitcast %[[LOAD]] {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>}
+// CHECK: %[[LOAD:.*]] = xegpu.load_nd %{{.*}} <{layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>}>
+// CHECK-SAME: !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>> -> vector<16x8xi32>
+// CHECK-NEXT: %{{.*}} = vector.bitcast %[[LOAD]] {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2], order = [0, 1]>}
// CHECK-SAME: vector<16x8xi32> to vector<16x16xf16>
func.func @vector_bitcast_i32_to_f16(%arg0: memref<8x16xf16>, %arg1: memref<16x8xi32>, %arg2: memref<8x16xf32>) {
%c0 = arith.constant 0 : index
diff --git a/mlir/test/Integration/Dialect/XeVM/GPU/gpu_printf.mlir b/mlir/test/Integration/Dialect/XeVM/GPU/gpu_printf.mlir
index 5ed2148a7258..d5f2a2358f1d 100644
--- a/mlir/test/Integration/Dialect/XeVM/GPU/gpu_printf.mlir
+++ b/mlir/test/Integration/Dialect/XeVM/GPU/gpu_printf.mlir
@@ -9,6 +9,11 @@
// RUN: --entry-point-result=void \
// RUN: | FileCheck %s
+// SPIR-V backend generates incorrect printf ops after
+// https://github.com/llvm/llvm-project/pull/178980 changed the way variadic arguments.
+// are handled. Test is expected to fail until the issue is resolved.
+
+// XFAIL: *
module @test attributes {gpu.container_module} {
gpu.module @test_module {
gpu.func @test_printf(%arg0: i32, %arg1: f32) kernel {
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index 3930575c45b3..8e02c06a0a29 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -895,7 +895,7 @@ func.func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
// CHECK-LABEL: func @index_cast
// CHECK-SAME: %[[ARG_0:arg[0-9]+]]: i16
func.func @index_cast(%arg0: i16) -> (i16) {
- %11 = arith.index_cast %arg0 exact : i16 to index
+ %11 = arith.index_cast %arg0 : i16 to index
%12 = arith.index_cast %11 : index to i16
// CHECK: return %[[ARG_0]] : i16
return %12 : i16
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 87d1d88e971b..31a229d94a18 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -3916,6 +3916,17 @@ libc_support_library(
)
libc_support_library(
+ name = "__support_math_ffmaf128",
+ hdrs = ["src/__support/math/ffmaf128.h"],
+ deps = [
+ ":__support_common",
+ ":__support_fputil_fma",
+ ":__support_macros_config",
+ ":llvm_libc_types_float128",
+ ],
+)
+
+libc_support_library(
name = "__support_math_floor",
hdrs = ["src/__support/math/floor.h"],
deps = [
@@ -6694,7 +6705,7 @@ libc_math_function(
libc_math_function(
name = "ffmaf128",
additional_deps = [
- ":__support_fputil_fma",
+ ":__support_math_ffmaf128",
],
)
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 9e20ce1cb3ee..a8455a61c95b 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -10366,6 +10366,7 @@ cc_library(
":BytecodeOpInterface",
":ControlFlowInterfaces",
":DataLayoutInterfaces",
+ ":DialectUtils",
":GPUDialect",
":IR",
":LLVMDialect",