aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bolt/unittests/Profile/PerfSpeEvents.cpp88
-rw-r--r--clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp9
-rw-r--r--clang/docs/LibASTMatchersReference.html14
-rw-r--r--clang/include/clang/ASTMatchers/ASTMatchers.h15
-rw-r--r--clang/include/clang/Basic/Attr.td4
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td1
-rw-r--r--clang/include/clang/CIR/Dialect/IR/CIROps.td51
-rw-r--r--clang/include/clang/CIR/MissingFeatures.h1
-rw-r--r--clang/include/clang/Sema/SemaHLSL.h8
-rw-r--r--clang/lib/AST/ByteCode/Compiler.cpp2
-rw-r--r--clang/lib/AST/ExprConstant.cpp11
-rw-r--r--clang/lib/ASTMatchers/ASTMatchersInternal.cpp2
-rw-r--r--clang/lib/ASTMatchers/Dynamic/Registry.cpp1
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp52
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h22
-rw-r--r--clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp23
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.cpp76
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.h16
-rw-r--r--clang/lib/Sema/SemaHLSL.cpp48
-rw-r--r--clang/test/CIR/CodeGen/object-size-flex-array.c317
-rw-r--r--clang/test/CIR/CodeGen/object-size.c877
-rw-r--r--clang/test/CIR/CodeGen/object-size.cpp108
-rw-r--r--clang/test/CIR/IR/objsize.cir89
-rw-r--r--clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl1
-rw-r--r--clang/test/CodeGenHLSL/semantics/semantic.arbitrary.hlsl36
-rw-r--r--clang/test/CodeGenHLSL/semantics/semantic.array.hlsl37
-rw-r--r--clang/test/CodeGenHLSL/semantics/semantic.struct.hlsl77
-rw-r--r--clang/test/ParserHLSL/semantic_parsing.hlsl21
-rw-r--r--clang/test/SemaCXX/dependent-switch-case.cpp6
-rw-r--r--clang/test/SemaHLSL/Semantics/semantics-invalid.hlsl17
-rw-r--r--clang/test/SemaHLSL/Semantics/semantics-valid.hlsl33
-rw-r--r--clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp14
-rw-r--r--clang/unittests/Support/TimeProfilerTest.cpp3
-rw-r--r--compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c6
-rw-r--r--libc/src/__support/CPP/type_traits/is_destructible.h1
-rw-r--r--libc/startup/baremetal/arm/start.cpp26
-rw-r--r--llvm/docs/SPIRVUsage.rst2
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h4
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h4
-rw-r--r--llvm/include/llvm/IR/IntrinsicsDirectX.td6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp62
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp151
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h11
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp41
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp7
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp11
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.td5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.td6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp7
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h65
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h6
-rw-r--r--llvm/test/CodeGen/AArch64/popcount_vmask.ll315
-rw-r--r--llvm/test/CodeGen/AArch64/vector-minmax.ll119
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir24
-rw-r--r--llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll36
-rw-r--r--llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll98
-rw-r--r--llvm/test/CodeGen/X86/pr166744.ll66
-rw-r--r--llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll11
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll17
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected45
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll13
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected16
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test9
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test8
-rw-r--r--llvm/utils/UpdateTestChecks/common.py1
-rw-r--r--llvm/utils/UpdateTestChecks/mir.py11
-rwxr-xr-xllvm/utils/update_llc_test_checks.py94
-rw-r--r--mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td7
-rw-r--r--mlir/include/mlir/Dialect/Linalg/Utils/Utils.h24
-rw-r--r--mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td10
-rw-r--r--mlir/include/mlir/Dialect/Transform/IR/TransformTypes.td5
-rw-r--r--mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt1
-rw-r--r--mlir/include/mlir/Dialect/XeGPU/TransformOps/CMakeLists.txt6
-rw-r--r--mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h28
-rw-r--r--mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td81
-rw-r--r--mlir/include/mlir/Support/Timing.h5
-rw-r--r--mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp5
-rw-r--r--mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp219
-rw-r--r--mlir/lib/Dialect/Linalg/Utils/Utils.cpp15
-rw-r--r--mlir/lib/Dialect/XeGPU/CMakeLists.txt1
-rw-r--r--mlir/lib/Dialect/XeGPU/TransformOps/CMakeLists.txt17
-rw-r--r--mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp225
-rw-r--r--mlir/lib/Interfaces/ValueBoundsOpInterface.cpp42
-rw-r--r--mlir/lib/RegisterAllExtensions.cpp2
-rw-r--r--mlir/lib/Support/Timing.cpp18
-rw-r--r--mlir/lib/Transforms/RemoveDeadValues.cpp46
-rw-r--r--mlir/python/CMakeLists.txt9
-rw-r--r--mlir/python/mlir/dialects/XeGPUTransformOps.td19
-rw-r--r--mlir/python/mlir/dialects/transform/xegpu.py66
-rw-r--r--mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir2
-rw-r--r--mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir93
-rw-r--r--mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir15
-rw-r--r--mlir/test/Dialect/XeGPU/transform-ops.mlir58
-rw-r--r--mlir/test/Transforms/remove-dead-values.mlir15
-rw-r--r--mlir/test/python/dialects/transform_xegpu_ext.py51
-rw-r--r--utils/bazel/llvm-project-overlay/libc/BUILD.bazel25
-rw-r--r--utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel2
106 files changed, 4151 insertions, 408 deletions
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 8d023cd..4f060cd 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -161,4 +161,92 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
parseAndCheckBrstackEvents(1234, ExpectedSamples);
}
+TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstackAndPbt) {
+ // Check perf input with SPE branch events as brstack format by
+ // combining with the previous branch target address (named as PBT).
+ // Example collection command:
+ // ```
+ // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+ // ```
+ // How Bolt extracts the branch events:
+ // ```
+ // perf script -F pid,brstack --itrace=bl
+ // ```
+
+ opts::ArmSPE = true;
+ opts::ReadPerfEvents =
+ // "<PID> <SRC>/<DEST>/PN/-/-/10/COND/- <NULL>/<PBT>/-/-/-/0//-\n"
+ " 4567 0xa002/0xa003/PN/-/-/10/COND/- 0x0/0xa001/-/-/-/0//-\n"
+ " 4567 0xb002/0xb003/P/-/-/4/RET/- 0x0/0xb001/-/-/-/0//-\n"
+ " 4567 0xc456/0xc789/P/-/-/13/-/- 0x0/0xc123/-/-/-/0//-\n"
+ " 4567 0xd456/0xd789/M/-/-/7/RET/- 0x0/0xd123/-/-/-/0//-\n"
+ " 4567 0xe005/0xe009/P/-/-/14/RET/- 0x0/0xe001/-/-/-/0//-\n"
+ " 4567 0xd456/0xd789/M/-/-/7/RET/- 0x0/0xd123/-/-/-/0//-\n"
+ " 4567 0xf002/0xf003/MN/-/-/8/COND/- 0x0/0xf001/-/-/-/0//-\n"
+ " 4567 0xc456/0xc789/P/-/-/13/-/- 0x0/0xc123/-/-/-/0//-\n";
+
+ // ExpectedSamples contains the aggregated information about
+ // a branch {{From, To, TraceTo}, {TakenCount, MispredCount}}.
+ // Where
+ // - From: is the source address of the sampled branch operation.
+ // - To: is the target address of the sampled branch operation.
+ // - TraceTo could be either
+ // - A 'Type = Trace::BR_ONLY', which means the trace only contains branch
+ // data.
+ // - Or an address, when the trace contains information about the previous
+ // branch.
+ //
+ // When FEAT_SPE_PBT is present, Arm SPE emits two records per sample:
+ // - the current branch (Spe.From/Spe.To), and
+ // - the previous taken branch target (PBT) (PBT.From, PBT.To).
+ //
+ // Together they behave like a depth-1 branch stack where:
+ // - the PBT entry is always taken
+ // - the current branch entry may represent a taken branch or a fall-through
+ // - the destination (Spe.To) is the architecturally executed target
+ //
+ // There can be fall-throughs to be inferred between the PBT entry and
+ // the current branch (Spe.From), but there cannot be between current
+ // branch's (Spe.From/Spe.To).
+ //
+ // PBT records only the target address (PBT.To), meaning we have no
+ // information as the branch source (PBT.From=0x0), branch type, and the
+ // prediction bit.
+ //
+ // Consider the trace pair:
+ // {{Spe.From, Spe.To, Type}, {TK, MP}},
+ // {{PBT.From, PBT.To, TraceTo}, {TK, MP}}
+ // {{0xd456, 0xd789, Trace::BR_ONLY}, {2, 2}}, {{0x0, 0xd123, 0xd456}, {2, 0}}
+ //
+ // The first entry is the Spe record, which represents a trace from 0xd456
+ // (Spe.From) to 0xd789 (Spe.To). Type = Trace::BR_ONLY, as Bolt processes the
+ // current branch event first. At this point we have no information about the
+ // previous trace (PBT). This entry has a TakenCount = 2, as we have two
+ // samples for (0xd456, 0xd789) in our input. It also has MispredsCount = 2,
+ // as 'M' misprediction flag appears in both cases.
+ //
+ // The second entry is the PBT record. TakenCount = 2 because the
+ // (PBT.From = 0x0, PBT.To = 0xd123) branch target appears twice in the input,
+ // and MispredsCount = 0 because prediction data is absent. There is no branch
+ // source information, so the PBT.From field is zero (0x0). TraceTo = 0xd456
+ // connect the flow from the previous taken branch at 0xd123 (PBT.To) to the
+ // current source branch at 0xd456 (Spe.From), which then continues to 0xd789
+ // (Spe.To).
+ std::vector<std::pair<Trace, TakenBranchInfo>> ExpectedSamples = {
+ {{0xa002, 0xa003, Trace::BR_ONLY}, {1, 0}},
+ {{0x0, 0xa001, 0xa002}, {1, 0}},
+ {{0xb002, 0xb003, Trace::BR_ONLY}, {1, 0}},
+ {{0x0, 0xb001, 0xb002}, {1, 0}},
+ {{0xc456, 0xc789, Trace::BR_ONLY}, {2, 0}},
+ {{0x0, 0xc123, 0xc456}, {2, 0}},
+ {{0xd456, 0xd789, Trace::BR_ONLY}, {2, 2}},
+ {{0x0, 0xd123, 0xd456}, {2, 0}},
+ {{0xe005, 0xe009, Trace::BR_ONLY}, {1, 0}},
+ {{0x0, 0xe001, 0xe005}, {1, 0}},
+ {{0xf002, 0xf003, Trace::BR_ONLY}, {1, 1}},
+ {{0x0, 0xf001, 0xf002}, {1, 0}}};
+
+ parseAndCheckBrstackEvents(4567, ExpectedSamples);
+}
+
#endif
diff --git a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp
index a89a896..e7d97b2 100644
--- a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp
+++ b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp
@@ -13,17 +13,10 @@ using namespace clang::ast_matchers;
namespace clang::tidy::hicpp {
-namespace {
-AST_MATCHER(VarDecl, isAsm) { return Node.hasAttr<clang::AsmLabelAttr>(); }
-const ast_matchers::internal::VariadicDynCastAllOfMatcher<Decl,
- FileScopeAsmDecl>
- fileScopeAsmDecl; // NOLINT(readability-identifier-*) preserve clang style
-} // namespace
-
void NoAssemblerCheck::registerMatchers(MatchFinder *Finder) {
Finder->addMatcher(asmStmt().bind("asm-stmt"), this);
Finder->addMatcher(fileScopeAsmDecl().bind("asm-file-scope"), this);
- Finder->addMatcher(varDecl(isAsm()).bind("asm-var"), this);
+ Finder->addMatcher(varDecl(hasAttr(attr::AsmLabel)).bind("asm-var"), this);
}
void NoAssemblerCheck::check(const MatchFinder::MatchResult &Result) {
diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html
index 5b2a96d..ac1abb4 100644
--- a/clang/docs/LibASTMatchersReference.html
+++ b/clang/docs/LibASTMatchersReference.html
@@ -825,6 +825,20 @@ fieldDecl()
</pre></td></tr>
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('fileScopeAsmDecl0')"><a name="fileScopeAsmDecl0Anchor">fileScopeAsmDecl</a></td><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1FileScopeAsmDecl.html">FileScopeAsmDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="fileScopeAsmDecl0"><pre>Matches top level asm declarations.
+
+Given
+ __asm("nop");
+ void f() {
+ __asm("mov al, 2");
+ }
+fileScopeAsmDecl()
+ matches '__asm("nop")',
+ but not '__asm("mov al, 2")'.
+</pre></td></tr>
+
+
<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('friendDecl0')"><a name="friendDecl0Anchor">friendDecl</a></td><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1FriendDecl.html">FriendDecl</a>&gt;...</td></tr>
<tr><td colspan="4" class="doc" id="friendDecl0"><pre>Matches friend declarations.
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index 98e62de..bca2d84 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -2478,6 +2478,21 @@ extern const internal::VariadicDynCastAllOfMatcher<Stmt, NullStmt> nullStmt;
/// matches '__asm("mov al, 2")'
extern const internal::VariadicDynCastAllOfMatcher<Stmt, AsmStmt> asmStmt;
+/// Matches top level asm declarations.
+///
+/// Given
+/// \code
+/// __asm("nop");
+/// void f() {
+/// __asm("mov al, 2");
+/// }
+/// \endcode
+/// fileScopeAsmDecl()
+/// matches '__asm("nop")',
+/// but not '__asm("mov al, 2")'.
+extern const internal::VariadicDynCastAllOfMatcher<Decl, FileScopeAsmDecl>
+ fileScopeAsmDecl;
+
/// Matches bool literals.
///
/// Example matches true
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 749f531..1013bfc 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5017,6 +5017,10 @@ def HLSLUnparsedSemantic : HLSLAnnotationAttr {
let Documentation = [InternalOnly];
}
+def HLSLUserSemantic : HLSLSemanticAttr</* Indexable= */ 1> {
+ let Documentation = [InternalOnly];
+}
+
def HLSLSV_Position : HLSLSemanticAttr</* Indexable= */ 1> {
let Documentation = [HLSLSV_PositionDocs];
}
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index fa50953..f43707e 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -13184,6 +13184,7 @@ def err_hlsl_semantic_indexing_not_supported
: Error<"semantic %0 does not allow indexing">;
def err_hlsl_init_priority_unsupported : Error<
"initializer priorities are not supported in HLSL">;
+def err_hlsl_semantic_index_overlap : Error<"semantic index overlap %0">;
def warn_hlsl_user_defined_type_missing_member: Warning<"binding type '%select{t|u|b|s|c}0' only applies to types containing %select{SRV resources|UAV resources|constant buffer resources|sampler state|numeric types}0">, InGroup<LegacyConstantRegisterBinding>;
def err_hlsl_binding_type_mismatch: Error<"binding type '%select{t|u|b|s|c}0' only applies to %select{SRV resources|UAV resources|constant buffer resources|sampler state|numeric variables in the global scope}0">;
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 6f9a69e..1625851 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4090,6 +4090,57 @@ def CIR_PrefetchOp : CIR_Op<"prefetch"> {
}
//===----------------------------------------------------------------------===//
+// ObjSizeOp
+//===----------------------------------------------------------------------===//
+
+def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> {
+ let summary = "Implements the llvm.objsize builtin";
+ let description = [{
+ The `cir.objsize` operation is designed to provide information to the
+ optimizer to determine whether a) an operation (like memcpy) will
+ overflow a buffer that corresponds to an object, or b) that a runtime
+ check for overflow isn’t necessary. An object in this context means an
+ allocation of a specific class, structure, array, or other object.
+
+ When the `min` attribute is present, the operation returns the minimum
+ guaranteed accessible size. When absent (max mode), it returns the maximum
+ possible object size. Corresponds to `llvm.objectsize`'s `min` argument.
+
+ The `dynamic` attribute determines if the value should be evaluated at
+ runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument.
+
+ The `nullunknown` attribute controls how null pointers are handled. When
+ present, null pointers are treated as having unknown size. When absent,
+ null pointers are treated as having 0 size (in min mode) or -1 size
+ (in max mode). Corresponds to `llvm.objectsize`'s `nullunknown` argument.
+
+ Example:
+
+ ```mlir
+ %size = cir.objsize min %ptr : !cir.ptr<i32> -> i64
+ %dsize = cir.objsize max dynamic %ptr : !cir.ptr<i32> -> i64
+ %nsize = cir.objsize min nullunknown %ptr : !cir.ptr<i32> -> i64
+ ```
+ }];
+
+ let arguments = (ins
+ CIR_PointerType:$ptr,
+ UnitAttr:$min,
+ UnitAttr:$nullunknown,
+ UnitAttr:$dynamic
+ );
+
+ let results = (outs CIR_AnyFundamentalIntType:$result);
+
+ let assemblyFormat = [{
+ (`min` $min^) : (`max`)?
+ (`nullunknown` $nullunknown^)?
+ (`dynamic` $dynamic^)?
+ $ptr `:` qualified(type($ptr)) `->` qualified(type($result)) attr-dict
+ }];
+}
+
+//===----------------------------------------------------------------------===//
// PtrDiffOp
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 6f099a7..af1ffff 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -215,6 +215,7 @@ struct MissingFeatures {
static bool builtinCallMathErrno() { return false; }
static bool builtinCheckKind() { return false; }
static bool cgCapturedStmtInfo() { return false; }
+ static bool countedBySize() { return false; }
static bool cgFPOptionsRAII() { return false; }
static bool checkBitfieldClipping() { return false; }
static bool cirgenABIInfo() { return false; }
diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h
index 8c3b6ae..28b03ac 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -20,7 +20,9 @@
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Sema/SemaBase.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/TargetParser/Triple.h"
#include <initializer_list>
@@ -259,9 +261,11 @@ private:
HLSLSemanticAttr *createSemantic(const SemanticInfo &Semantic,
DeclaratorDecl *TargetDecl);
bool determineActiveSemanticOnScalar(FunctionDecl *FD, DeclaratorDecl *D,
- SemanticInfo &ActiveSemantic);
+ SemanticInfo &ActiveSemantic,
+ llvm::StringSet<> &ActiveInputSemantics);
bool determineActiveSemantic(FunctionDecl *FD, DeclaratorDecl *D,
- SemanticInfo &ActiveSemantic);
+ SemanticInfo &ActiveSemantic,
+ llvm::StringSet<> &ActiveInputSemantics);
void processExplicitBindingsOnDecl(VarDecl *D);
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 4e63400..84f7e62 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -6007,6 +6007,8 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) {
CaseLabels[SC] = this->getLabel();
const Expr *Value = CS->getLHS();
+ if (Value->isValueDependent())
+ return false;
PrimType ValueT = this->classifyPrim(Value->getType());
// Compare the case statement's value to the switch condition.
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 8fab6ef..193f87c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -5452,10 +5452,13 @@ static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info,
}
const CaseStmt *CS = cast<CaseStmt>(SC);
- APSInt LHS = CS->getLHS()->EvaluateKnownConstInt(Info.Ctx);
- APSInt RHS = CS->getRHS() ? CS->getRHS()->EvaluateKnownConstInt(Info.Ctx)
- : LHS;
- if (LHS <= Value && Value <= RHS) {
+ const Expr *LHS = CS->getLHS();
+ const Expr *RHS = CS->getRHS();
+ if (LHS->isValueDependent() || (RHS && RHS->isValueDependent()))
+ return ESR_Failed;
+ APSInt LHSValue = LHS->EvaluateKnownConstInt(Info.Ctx);
+ APSInt RHSValue = RHS ? RHS->EvaluateKnownConstInt(Info.Ctx) : LHSValue;
+ if (LHSValue <= Value && Value <= RHSValue) {
Found = SC;
break;
}
diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
index 42f124b..0874b3d 100644
--- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -954,6 +954,8 @@ const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTryStmt> cxxTryStmt;
const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThrowExpr> cxxThrowExpr;
const internal::VariadicDynCastAllOfMatcher<Stmt, NullStmt> nullStmt;
const internal::VariadicDynCastAllOfMatcher<Stmt, AsmStmt> asmStmt;
+const internal::VariadicDynCastAllOfMatcher<Decl, FileScopeAsmDecl>
+ fileScopeAsmDecl;
const internal::VariadicDynCastAllOfMatcher<Stmt, CXXBoolLiteralExpr>
cxxBoolLiteral;
const internal::VariadicDynCastAllOfMatcher<Stmt, StringLiteral> stringLiteral;
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index 01c03f3..66848f7 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -246,6 +246,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(expr);
REGISTER_MATCHER(exprWithCleanups);
REGISTER_MATCHER(fieldDecl);
+ REGISTER_MATCHER(fileScopeAsmDecl);
REGISTER_MATCHER(fixedPointLiteral);
REGISTER_MATCHER(floatLiteral);
REGISTER_MATCHER(forCallable);
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 0803910..4e6a5ee 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -481,6 +481,19 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
return emitCall(e->getCallee()->getType(), CIRGenCallee::forDirect(fnOp), e,
returnValue);
}
+ case Builtin::BI__builtin_dynamic_object_size:
+ case Builtin::BI__builtin_object_size: {
+ unsigned type =
+ e->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
+ auto resType = mlir::cast<cir::IntType>(convertType(e->getType()));
+
+ // We pass this builtin onto the optimizer so that it can figure out the
+ // object size in more complex cases.
+ bool isDynamic = builtinID == Builtin::BI__builtin_dynamic_object_size;
+ return RValue::get(emitBuiltinObjectSize(e->getArg(0), type, resType,
+ /*EmittedE=*/nullptr, isDynamic));
+ }
+
case Builtin::BI__builtin_prefetch: {
auto evaluateOperandAsInt = [&](const Expr *arg) {
Expr::EvalResult res;
@@ -663,3 +676,42 @@ mlir::Value CIRGenFunction::emitVAArg(VAArgExpr *ve) {
mlir::Value vaList = emitVAListRef(ve->getSubExpr()).getPointer();
return cir::VAArgOp::create(builder, loc, type, vaList);
}
+
+mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type,
+ cir::IntType resType,
+ mlir::Value emittedE,
+ bool isDynamic) {
+ assert(!cir::MissingFeatures::opCallImplicitObjectSizeArgs());
+
+ // LLVM can't handle type=3 appropriately, and __builtin_object_size shouldn't
+ // evaluate e for side-effects. In either case, just like original LLVM
+ // lowering, we shouldn't lower to `cir.objsize` but to a constant instead.
+ if (type == 3 || (!emittedE && e->HasSideEffects(getContext())))
+ return builder.getConstInt(getLoc(e->getSourceRange()), resType,
+ (type & 2) ? 0 : -1);
+
+ mlir::Value ptr = emittedE ? emittedE : emitScalarExpr(e);
+ assert(mlir::isa<cir::PointerType>(ptr.getType()) &&
+ "Non-pointer passed to __builtin_object_size?");
+
+ assert(!cir::MissingFeatures::countedBySize());
+
+ // Extract the min/max mode from type. CIR only supports type 0
+ // (max, whole object) and type 2 (min, whole object), not type 1 or 3
+ // (closest subobject variants).
+ const bool min = ((type & 2) != 0);
+ // For GCC compatibility, __builtin_object_size treats NULL as unknown size.
+ auto op =
+ cir::ObjSizeOp::create(builder, getLoc(e->getSourceRange()), resType, ptr,
+ min, /*nullUnknown=*/true, isDynamic);
+ return op.getResult();
+}
+
+mlir::Value CIRGenFunction::evaluateOrEmitBuiltinObjectSize(
+ const Expr *e, unsigned type, cir::IntType resType, mlir::Value emittedE,
+ bool isDynamic) {
+ uint64_t objectSize;
+ if (!e->tryEvaluateObjectSize(objectSize, getContext(), type))
+ return emitBuiltinObjectSize(e, type, resType, emittedE, isDynamic);
+ return builder.getConstInt(getLoc(e->getSourceRange()), resType, objectSize);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 1c52a78..f879e58 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1307,6 +1307,28 @@ public:
RValue emitBuiltinExpr(const clang::GlobalDecl &gd, unsigned builtinID,
const clang::CallExpr *e, ReturnValueSlot returnValue);
+ /// Returns a Value corresponding to the size of the given expression by
+ /// emitting a `cir.objsize` operation.
+ ///
+ /// \param e The expression whose object size to compute
+ /// \param type Determines the semantics of the object size computation.
+ /// The type parameter is a 2-bit value where:
+ /// bit 0 (type & 1): 0 = whole object, 1 = closest subobject
+ /// bit 1 (type & 2): 0 = maximum size, 2 = minimum size
+ /// \param resType The result type for the size value
+ /// \param emittedE Optional pre-emitted pointer value. If non-null, we'll
+ /// call `cir.objsize` on this value rather than emitting e.
+ /// \param isDynamic If true, allows runtime evaluation via dynamic mode
+ mlir::Value emitBuiltinObjectSize(const clang::Expr *e, unsigned type,
+ cir::IntType resType, mlir::Value emittedE,
+ bool isDynamic);
+
+ mlir::Value evaluateOrEmitBuiltinObjectSize(const clang::Expr *e,
+ unsigned type,
+ cir::IntType resType,
+ mlir::Value emittedE,
+ bool isDynamic);
+
RValue emitCall(const CIRGenFunctionInfo &funcInfo,
const CIRGenCallee &callee, ReturnValueSlot returnValue,
const CallArgList &args, cir::CIRCallOpInterface *callOp,
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index ba967a4..b4afed7 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2832,6 +2832,29 @@ static void collectUnreachable(mlir::Operation *parent,
}
}
+mlir::LogicalResult CIRToLLVMObjSizeOpLowering::matchAndRewrite(
+ cir::ObjSizeOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Type llvmResTy = getTypeConverter()->convertType(op.getType());
+ mlir::Location loc = op->getLoc();
+
+ mlir::IntegerType i1Ty = rewriter.getI1Type();
+
+ auto i1Val = [&rewriter, &loc, &i1Ty](bool val) {
+ return mlir::LLVM::ConstantOp::create(rewriter, loc, i1Ty, val);
+ };
+
+ replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm.objectsize", llvmResTy,
+ {
+ adaptor.getPtr(),
+ i1Val(op.getMin()),
+ i1Val(op.getNullunknown()),
+ i1Val(op.getDynamic()),
+ });
+
+ return mlir::LogicalResult::success();
+}
+
void ConvertCIRToLLVMPass::processCIRAttrs(mlir::ModuleOp module) {
// Lower the module attributes to LLVM equivalents.
if (mlir::Attribute tripleAttr =
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 945f9e2..e392a12 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -549,6 +549,16 @@ static void addSPIRVBuiltinDecoration(llvm::GlobalVariable *GV,
GV->addMetadata("spirv.Decorations", *Decoration);
}
+static void addLocationDecoration(llvm::GlobalVariable *GV, unsigned Location) {
+ LLVMContext &Ctx = GV->getContext();
+ IRBuilder<> B(GV->getContext());
+ MDNode *Operands =
+ MDNode::get(Ctx, {ConstantAsMetadata::get(B.getInt32(/* Location */ 30)),
+ ConstantAsMetadata::get(B.getInt32(Location))});
+ MDNode *Decoration = MDNode::get(Ctx, {Operands});
+ GV->addMetadata("spirv.Decorations", *Decoration);
+}
+
static llvm::Value *createSPIRVBuiltinLoad(IRBuilder<> &B, llvm::Module &M,
llvm::Type *Ty, const Twine &Name,
unsigned BuiltInID) {
@@ -562,6 +572,69 @@ static llvm::Value *createSPIRVBuiltinLoad(IRBuilder<> &B, llvm::Module &M,
return B.CreateLoad(Ty, GV);
}
+static llvm::Value *createSPIRVLocationLoad(IRBuilder<> &B, llvm::Module &M,
+ llvm::Type *Ty, unsigned Location,
+ StringRef Name) {
+ auto *GV = new llvm::GlobalVariable(
+ M, Ty, /* isConstant= */ true, llvm::GlobalValue::ExternalLinkage,
+ /* Initializer= */ nullptr, /* Name= */ Name, /* insertBefore= */ nullptr,
+ llvm::GlobalVariable::GeneralDynamicTLSModel,
+ /* AddressSpace */ 7, /* isExternallyInitialized= */ true);
+ GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ addLocationDecoration(GV, Location);
+ return B.CreateLoad(Ty, GV);
+}
+
+llvm::Value *
+CGHLSLRuntime::emitSPIRVUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type,
+ HLSLSemanticAttr *Semantic,
+ std::optional<unsigned> Index) {
+ Twine BaseName = Twine(Semantic->getAttrName()->getName());
+ Twine VariableName = BaseName.concat(Twine(Index.value_or(0)));
+
+ unsigned Location = SPIRVLastAssignedInputSemanticLocation;
+
+ // DXC completely ignores the semantic/index pair. Location are assigned from
+ // the first semantic to the last.
+ llvm::ArrayType *AT = dyn_cast<llvm::ArrayType>(Type);
+ unsigned ElementCount = AT ? AT->getNumElements() : 1;
+ SPIRVLastAssignedInputSemanticLocation += ElementCount;
+ return createSPIRVLocationLoad(B, CGM.getModule(), Type, Location,
+ VariableName.str());
+}
+
+llvm::Value *
+CGHLSLRuntime::emitDXILUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type,
+ HLSLSemanticAttr *Semantic,
+ std::optional<unsigned> Index) {
+ Twine BaseName = Twine(Semantic->getAttrName()->getName());
+ Twine VariableName = BaseName.concat(Twine(Index.value_or(0)));
+
+ // DXIL packing rules etc shall be handled here.
+ // FIXME: generate proper sigpoint, index, col, row values.
+ // FIXME: also DXIL loads vectors element by element.
+ SmallVector<Value *> Args{B.getInt32(4), B.getInt32(0), B.getInt32(0),
+ B.getInt8(0),
+ llvm::PoisonValue::get(B.getInt32Ty())};
+
+ llvm::Intrinsic::ID IntrinsicID = llvm::Intrinsic::dx_load_input;
+ llvm::Value *Value = B.CreateIntrinsic(/*ReturnType=*/Type, IntrinsicID, Args,
+ nullptr, VariableName);
+ return Value;
+}
+
+llvm::Value *CGHLSLRuntime::emitUserSemanticLoad(
+ IRBuilder<> &B, llvm::Type *Type, const clang::DeclaratorDecl *Decl,
+ HLSLSemanticAttr *Semantic, std::optional<unsigned> Index) {
+ if (CGM.getTarget().getTriple().isSPIRV())
+ return emitSPIRVUserSemanticLoad(B, Type, Semantic, Index);
+
+ if (CGM.getTarget().getTriple().isDXIL())
+ return emitDXILUserSemanticLoad(B, Type, Semantic, Index);
+
+ llvm_unreachable("Unsupported target for user-semantic load.");
+}
+
llvm::Value *CGHLSLRuntime::emitSystemSemanticLoad(
IRBuilder<> &B, llvm::Type *Type, const clang::DeclaratorDecl *Decl,
Attr *Semantic, std::optional<unsigned> Index) {
@@ -626,6 +699,9 @@ CGHLSLRuntime::handleScalarSemanticLoad(IRBuilder<> &B, const FunctionDecl *FD,
std::optional<unsigned> Index = std::nullopt;
if (Semantic->isSemanticIndexExplicit())
Index = Semantic->getSemanticIndex();
+
+ if (isa<HLSLUserSemanticAttr>(Semantic))
+ return emitUserSemanticLoad(B, Type, Decl, Semantic, Index);
return emitSystemSemanticLoad(B, Type, Decl, Semantic, Index);
}
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index d35df52..9d31714 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -200,9 +200,25 @@ private:
llvm::GlobalVariable *BufGV);
void initializeBufferFromBinding(const HLSLBufferDecl *BufDecl,
llvm::GlobalVariable *GV);
+ void initializeBufferFromBinding(const HLSLBufferDecl *BufDecl,
+ llvm::GlobalVariable *GV,
+ HLSLResourceBindingAttr *RBA);
+
+ llvm::Value *emitSPIRVUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type,
+ HLSLSemanticAttr *Semantic,
+ std::optional<unsigned> Index);
+ llvm::Value *emitDXILUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type,
+ HLSLSemanticAttr *Semantic,
+ std::optional<unsigned> Index);
+ llvm::Value *emitUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type,
+ const clang::DeclaratorDecl *Decl,
+ HLSLSemanticAttr *Semantic,
+ std::optional<unsigned> Index);
+
llvm::Triple::ArchType getArch();
llvm::DenseMap<const clang::RecordType *, llvm::TargetExtType *> LayoutTypes;
+ unsigned SPIRVLastAssignedInputSemanticLocation = 0;
};
} // namespace CodeGen
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index b9707f0..a06c57b 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -775,6 +775,10 @@ HLSLSemanticAttr *SemaHLSL::createSemantic(const SemanticInfo &Info,
DeclaratorDecl *TargetDecl) {
std::string SemanticName = Info.Semantic->getAttrName()->getName().upper();
+ if (dyn_cast<HLSLUserSemanticAttr>(Info.Semantic))
+ return createSemanticAttr<HLSLUserSemanticAttr>(*Info.Semantic, TargetDecl,
+ Info.Index);
+
if (SemanticName == "SV_DISPATCHTHREADID") {
return createSemanticAttr<HLSLSV_DispatchThreadIDAttr>(
*Info.Semantic, TargetDecl, Info.Index);
@@ -797,9 +801,10 @@ HLSLSemanticAttr *SemaHLSL::createSemantic(const SemanticInfo &Info,
return nullptr;
}
-bool SemaHLSL::determineActiveSemanticOnScalar(FunctionDecl *FD,
- DeclaratorDecl *D,
- SemanticInfo &ActiveSemantic) {
+bool SemaHLSL::determineActiveSemanticOnScalar(
+ FunctionDecl *FD, DeclaratorDecl *D, SemanticInfo &ActiveSemantic,
+ llvm::StringSet<> &ActiveInputSemantics) {
+
if (ActiveSemantic.Semantic == nullptr) {
ActiveSemantic.Semantic = D->getAttr<HLSLSemanticAttr>();
if (ActiveSemantic.Semantic &&
@@ -818,11 +823,31 @@ bool SemaHLSL::determineActiveSemanticOnScalar(FunctionDecl *FD,
checkSemanticAnnotation(FD, D, A);
FD->addAttr(A);
+
+ unsigned Location = ActiveSemantic.Index.value_or(0);
+
+ const ConstantArrayType *AT = dyn_cast<ConstantArrayType>(D->getType());
+ unsigned ElementCount = AT ? AT->getZExtSize() : 1;
+ ActiveSemantic.Index = Location + ElementCount;
+
+ Twine BaseName = Twine(ActiveSemantic.Semantic->getAttrName()->getName());
+ for (unsigned I = 0; I < ElementCount; ++I) {
+ Twine VariableName = BaseName.concat(Twine(Location + I));
+
+ auto [_, Inserted] = ActiveInputSemantics.insert(VariableName.str());
+ if (!Inserted) {
+ Diag(D->getLocation(), diag::err_hlsl_semantic_index_overlap)
+ << VariableName.str();
+ return false;
+ }
+ }
+
return true;
}
-bool SemaHLSL::determineActiveSemantic(FunctionDecl *FD, DeclaratorDecl *D,
- SemanticInfo &ActiveSemantic) {
+bool SemaHLSL::determineActiveSemantic(
+ FunctionDecl *FD, DeclaratorDecl *D, SemanticInfo &ActiveSemantic,
+ llvm::StringSet<> &ActiveInputSemantics) {
if (ActiveSemantic.Semantic == nullptr) {
ActiveSemantic.Semantic = D->getAttr<HLSLSemanticAttr>();
if (ActiveSemantic.Semantic &&
@@ -833,12 +858,13 @@ bool SemaHLSL::determineActiveSemantic(FunctionDecl *FD, DeclaratorDecl *D,
const Type *T = D->getType()->getUnqualifiedDesugaredType();
const RecordType *RT = dyn_cast<RecordType>(T);
if (!RT)
- return determineActiveSemanticOnScalar(FD, D, ActiveSemantic);
+ return determineActiveSemanticOnScalar(FD, D, ActiveSemantic,
+ ActiveInputSemantics);
const RecordDecl *RD = RT->getDecl();
for (FieldDecl *Field : RD->fields()) {
SemanticInfo Info = ActiveSemantic;
- if (!determineActiveSemantic(FD, Field, Info)) {
+ if (!determineActiveSemantic(FD, Field, Info, ActiveInputSemantics)) {
Diag(Field->getLocation(), diag::note_hlsl_semantic_used_here) << Field;
return false;
}
@@ -911,12 +937,14 @@ void SemaHLSL::CheckEntryPoint(FunctionDecl *FD) {
llvm_unreachable("Unhandled environment in triple");
}
+ llvm::StringSet<> ActiveInputSemantics;
for (ParmVarDecl *Param : FD->parameters()) {
SemanticInfo ActiveSemantic;
ActiveSemantic.Semantic = nullptr;
ActiveSemantic.Index = std::nullopt;
- if (!determineActiveSemantic(FD, Param, ActiveSemantic)) {
+ if (!determineActiveSemantic(FD, Param, ActiveSemantic,
+ ActiveInputSemantics)) {
Diag(Param->getLocation(), diag::note_previous_decl) << Param;
FD->setInvalidDecl();
}
@@ -947,6 +975,8 @@ void SemaHLSL::checkSemanticAnnotation(FunctionDecl *EntryPoint,
return;
DiagnoseAttrStageMismatch(SemanticAttr, ST, {llvm::Triple::Pixel});
break;
+ case attr::HLSLUserSemantic:
+ return;
default:
llvm_unreachable("Unknown SemanticAttr");
}
@@ -1766,7 +1796,7 @@ void SemaHLSL::handleSemanticAttr(Decl *D, const ParsedAttr &AL) {
if (AL.getAttrName()->getName().starts_with_insensitive("SV_"))
diagnoseSystemSemanticAttr(D, AL, Index);
else
- Diag(AL.getLoc(), diag::err_hlsl_unknown_semantic) << AL;
+ D->addAttr(createSemanticAttr<HLSLUserSemanticAttr>(AL, nullptr, Index));
}
void SemaHLSL::handlePackOffsetAttr(Decl *D, const ParsedAttr &AL) {
diff --git a/clang/test/CIR/CodeGen/object-size-flex-array.c b/clang/test/CIR/CodeGen/object-size-flex-array.c
new file mode 100644
index 0000000..74229fd
--- /dev/null
+++ b/clang/test/CIR/CodeGen/object-size-flex-array.c
@@ -0,0 +1,317 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR --check-prefix=CIR-NO-STRICT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm -disable-llvm-passes %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM --check-prefix=LLVM-NO-STRICT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm -disable-llvm-passes %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG --check-prefix=OGCG-NO-STRICT
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=0 -emit-cir %s -o %t-strict-0.cir
+// RUN: FileCheck --input-file=%t-strict-0.cir %s --check-prefix=CIR --check-prefix=CIR-STRICT-0
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=0 -emit-llvm -disable-llvm-passes %s -o %t-cir-strict-0.ll
+// RUN: FileCheck --input-file=%t-cir-strict-0.ll %s --check-prefix=LLVM --check-prefix=LLVM-STRICT-0
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fstrict-flex-arrays=0 -emit-llvm -disable-llvm-passes %s -o %t-strict-0.ll
+// RUN: FileCheck --input-file=%t-strict-0.ll %s --check-prefix=OGCG --check-prefix=OGCG-STRICT-0
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=1 -emit-cir %s -o %t-strict-1.cir
+// RUN: FileCheck --input-file=%t-strict-1.cir %s --check-prefix=CIR --check-prefix=CIR-STRICT-1
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=1 -emit-llvm -disable-llvm-passes %s -o %t-cir-strict-1.ll
+// RUN: FileCheck --input-file=%t-cir-strict-1.ll %s --check-prefix=LLVM --check-prefix=LLVM-STRICT-1
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fstrict-flex-arrays=1 -emit-llvm -disable-llvm-passes %s -o %t-strict-1.ll
+// RUN: FileCheck --input-file=%t-strict-1.ll %s --check-prefix=OGCG --check-prefix=OGCG-STRICT-1
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=2 -emit-cir %s -o %t-strict-2.cir
+// RUN: FileCheck --input-file=%t-strict-2.cir %s --check-prefix=CIR --check-prefix=CIR-STRICT-2
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=2 -emit-llvm -disable-llvm-passes %s -o %t-cir-strict-2.ll
+// RUN: FileCheck --input-file=%t-cir-strict-2.ll %s --check-prefix=LLVM --check-prefix=LLVM-STRICT-2
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fstrict-flex-arrays=2 -emit-llvm -disable-llvm-passes %s -o %t-strict-2.ll
+// RUN: FileCheck --input-file=%t-strict-2.ll %s --check-prefix=OGCG --check-prefix=OGCG-STRICT-2
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=3 -emit-cir %s -o %t-strict-3.cir
+// RUN: FileCheck --input-file=%t-strict-3.cir %s --check-prefix=CIR --check-prefix=CIR-STRICT-3
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=3 -emit-llvm -disable-llvm-passes %s -o %t-cir-strict-3.ll
+// RUN: FileCheck --input-file=%t-cir-strict-3.ll %s --check-prefix=LLVM --check-prefix=LLVM-STRICT-3
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fstrict-flex-arrays=3 -emit-llvm -disable-llvm-passes %s -o %t-strict-3.ll
+// RUN: FileCheck --input-file=%t-strict-3.ll %s --check-prefix=OGCG --check-prefix=OGCG-STRICT-3
+
+#define OBJECT_SIZE_BUILTIN __builtin_object_size
+
+typedef struct {
+ float f;
+ double c[];
+} foo_t;
+
+typedef struct {
+ float f;
+ double c[0];
+} foo0_t;
+
+typedef struct {
+ float f;
+ double c[1];
+} foo1_t;
+
+typedef struct {
+ float f;
+ double c[2];
+} foo2_t;
+
+// CIR-LABEL: @bar
+// LLVM-LABEL: @bar(
+// OGCG-LABEL: @bar(
+unsigned bar(foo_t *f) {
+ // CIR-NO-STRICT: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-0: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-1: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-2: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-3: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-3: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-3: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ return OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+// CIR-LABEL: @bar0
+// LLVM-LABEL: @bar0(
+// OGCG-LABEL: @bar0(
+unsigned bar0(foo0_t *f) {
+ // CIR-NO-STRICT: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-0: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-1: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-2: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-3: cir.const #cir.int<0>
+ // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-3: store i32 0
+ // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-3: ret i32 0
+ return OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+// CIR-LABEL: @bar1
+// LLVM-LABEL: @bar1(
+// OGCG-LABEL: @bar1(
+unsigned bar1(foo1_t *f) {
+ // CIR-NO-STRICT: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-0: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-1: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-2: cir.const #cir.int<8>
+ // CIR-STRICT-3: cir.const #cir.int<8>
+ // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-2: store i32 8
+ // LLVM-STRICT-3: store i32 8
+ // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-2: ret i32 8
+ // OGCG-STRICT-3: ret i32 8
+ return OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+// CIR-LABEL: @bar2
+// LLVM-LABEL: @bar2(
+// OGCG-LABEL: @bar2(
+unsigned bar2(foo2_t *f) {
+ // CIR-NO-STRICT: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-0: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-1: cir.const #cir.int<16>
+ // CIR-STRICT-2: cir.const #cir.int<16>
+ // CIR-STRICT-3: cir.const #cir.int<16>
+ // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // LLVM-STRICT-1: store i32 16
+ // LLVM-STRICT-2: store i32 16
+ // LLVM-STRICT-3: store i32 16
+ // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false)
+ // OGCG-STRICT-1: ret i32 16
+ // OGCG-STRICT-2: ret i32 16
+ // OGCG-STRICT-3: ret i32 16
+ return OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+#define DYNAMIC_OBJECT_SIZE_BUILTIN __builtin_dynamic_object_size
+
+// CIR-LABEL: @dyn_bar
+// LLVM-LABEL: @dyn_bar(
+// OGCG-LABEL: @dyn_bar(
+unsigned dyn_bar(foo_t *f) {
+ // CIR-NO-STRICT: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-0: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-1: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-2: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-3: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-3: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-3: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ return DYNAMIC_OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+// CIR-LABEL: @dyn_bar0
+// LLVM-LABEL: @dyn_bar0(
+// OGCG-LABEL: @dyn_bar0(
+unsigned dyn_bar0(foo0_t *f) {
+ // CIR-NO-STRICT: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-0: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-1: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-2: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-3: cir.const #cir.int<0>
+ // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-3: store i32 0
+ // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-3: ret i32 0
+ return DYNAMIC_OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+// CIR-LABEL: @dyn_bar1
+// LLVM-LABEL: @dyn_bar1(
+// OGCG-LABEL: @dyn_bar1(
+unsigned dyn_bar1(foo1_t *f) {
+ // CIR-NO-STRICT: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-0: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-1: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-2: cir.const #cir.int<8>
+ // CIR-STRICT-3: cir.const #cir.int<8>
+ // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-2: store i32 8
+ // LLVM-STRICT-3: store i32 8
+ // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-2: ret i32 8
+ // OGCG-STRICT-3: ret i32 8
+ return DYNAMIC_OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+// CIR-LABEL: @dyn_bar2
+// LLVM-LABEL: @dyn_bar2(
+// OGCG-LABEL: @dyn_bar2(
+unsigned dyn_bar2(foo2_t *f) {
+ // CIR-NO-STRICT: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-0: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i
+ // CIR-STRICT-1: cir.const #cir.int<16>
+ // CIR-STRICT-2: cir.const #cir.int<16>
+ // CIR-STRICT-3: cir.const #cir.int<16>
+ // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // LLVM-STRICT-1: store i32 16
+ // LLVM-STRICT-2: store i32 16
+ // LLVM-STRICT-3: store i32 16
+ // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true)
+ // OGCG-STRICT-1: ret i32 16
+ // OGCG-STRICT-2: ret i32 16
+ // OGCG-STRICT-3: ret i32 16
+ return DYNAMIC_OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+// Also checks for non-trailing flex-array like members
+
+typedef struct {
+ double c[0];
+ float f;
+} foofoo0_t;
+
+typedef struct {
+ double c[1];
+ float f;
+} foofoo1_t;
+
+typedef struct {
+ double c[2];
+ float f;
+} foofoo2_t;
+
+// CIR-LABEL: @babar0
+// LLVM-LABEL: @babar0(
+// OGCG-LABEL: @babar0(
+unsigned babar0(foofoo0_t *f) {
+ // CIR-NO-STRICT: cir.const #cir.int<0>
+ // CIR-STRICT-0: cir.const #cir.int<0>
+ // CIR-STRICT-1: cir.const #cir.int<0>
+ // CIR-STRICT-2: cir.const #cir.int<0>
+ // CIR-STRICT-3: cir.const #cir.int<0>
+ // LLVM-NO-STRICT: store i32 0
+ // LLVM-STRICT-0: store i32 0
+ // LLVM-STRICT-1: store i32 0
+ // LLVM-STRICT-2: store i32 0
+ // LLVM-STRICT-3: store i32 0
+ // OGCG-NO-STRICT: ret i32 0
+ // OGCG-STRICT-0: ret i32 0
+ // OGCG-STRICT-1: ret i32 0
+ // OGCG-STRICT-2: ret i32 0
+ // OGCG-STRICT-3: ret i32 0
+ return OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+// CIR-LABEL: @babar1
+// LLVM-LABEL: @babar1(
+// OGCG-LABEL: @babar1(
+unsigned babar1(foofoo1_t *f) {
+ // CIR-NO-STRICT: cir.const #cir.int<8>
+ // CIR-STRICT-0: cir.const #cir.int<8>
+ // CIR-STRICT-1: cir.const #cir.int<8>
+ // CIR-STRICT-2: cir.const #cir.int<8>
+ // CIR-STRICT-3: cir.const #cir.int<8>
+ // LLVM-NO-STRICT: store i32 8
+ // LLVM-STRICT-0: store i32 8
+ // LLVM-STRICT-1: store i32 8
+ // LLVM-STRICT-2: store i32 8
+ // LLVM-STRICT-3: store i32 8
+ // OGCG-NO-STRICT: ret i32 8
+ // OGCG-STRICT-0: ret i32 8
+ // OGCG-STRICT-1: ret i32 8
+ // OGCG-STRICT-2: ret i32 8
+ // OGCG-STRICT-3: ret i32 8
+ return OBJECT_SIZE_BUILTIN(f->c, 1);
+}
+
+// CIR-LABEL: @babar2
+// LLVM-LABEL: @babar2(
+// OGCG-LABEL: @babar2(
+unsigned babar2(foofoo2_t *f) {
+ // CIR-NO-STRICT: cir.const #cir.int<16>
+ // CIR-STRICT-0: cir.const #cir.int<16>
+ // CIR-STRICT-1: cir.const #cir.int<16>
+ // CIR-STRICT-2: cir.const #cir.int<16>
+ // CIR-STRICT-3: cir.const #cir.int<16>
+ // LLVM-NO-STRICT: store i32 16
+ // LLVM-STRICT-0: store i32 16
+ // LLVM-STRICT-1: store i32 16
+ // LLVM-STRICT-2: store i32 16
+ // LLVM-STRICT-3: store i32 16
+ // OGCG-NO-STRICT: ret i32 16
+ // OGCG-STRICT-0: ret i32 16
+ // OGCG-STRICT-1: ret i32 16
+ // OGCG-STRICT-2: ret i32 16
+ // OGCG-STRICT-3: ret i32 16
+ return OBJECT_SIZE_BUILTIN(f->c, 1);
+}
diff --git a/clang/test/CIR/CodeGen/object-size.c b/clang/test/CIR/CodeGen/object-size.c
new file mode 100644
index 0000000..1b10fb8b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/object-size.c
@@ -0,0 +1,877 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+char gbuf[63];
+char *gp;
+int gi, gj;
+
+// CIR-LABEL: @test1
+// LLVM-LABEL: define {{.*}} void @test1
+// OGCG-LABEL: define {{.*}} void @test1
+void test1(void) {
+ // CIR: cir.const #cir.int<59>
+ // LLVM: store i32 59
+ // OGCG: store i32 59
+ gi = __builtin_object_size(&gbuf[4], 1);
+}
+
+// CIR-LABEL: @test2
+// LLVM-LABEL: define {{.*}} void @test2
+// OGCG-LABEL: define {{.*}} void @test2
+void test2(void) {
+ // CIR: cir.const #cir.int<63>
+ // LLVM: store i32 63
+ // OGCG: store i32 63
+ gi = __builtin_object_size(gbuf, 1);
+}
+
+// CIR-LABEL: @test3
+// LLVM-LABEL: define {{.*}} void @test3
+// OGCG-LABEL: define {{.*}} void @test3
+void test3(void) {
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&gbuf[100], 1);
+}
+
+// CIR-LABEL: @test4
+// LLVM-LABEL: define {{.*}} void @test4
+// OGCG-LABEL: define {{.*}} void @test4
+void test4(void) {
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((char*)(void*)&gbuf[-1], 1);
+}
+
+// CIR-LABEL: @test5
+// LLVM-LABEL: define {{.*}} void @test5
+// OGCG-LABEL: define {{.*}} void @test5
+void test5(void) {
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(gp, 0);
+}
+
+// CIR-LABEL: @test6
+// LLVM-LABEL: define {{.*}} void @test6
+// OGCG-LABEL: define {{.*}} void @test6
+void test6(void) {
+ char buf[57];
+
+ // CIR: cir.const #cir.int<53>
+ // LLVM: store i32 53
+ // OGCG: store i32 53
+ gi = __builtin_object_size(&buf[4], 1);
+}
+
+// CIR-LABEL: @test18
+// LLVM-LABEL: define {{.*}} i32 @test18
+// OGCG-LABEL: define {{.*}} i32 @test18
+unsigned test18(int cond) {
+ int a[4], b[4];
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64
+ // OGCG: call i64 @llvm.objectsize.i64
+ return __builtin_object_size(cond ? a : b, 0);
+}
+
+// CIR-LABEL: @test19
+// LLVM-LABEL: define {{.*}} void @test19
+// OGCG-LABEL: define {{.*}} void @test19
+void test19(void) {
+ struct {
+ int a, b;
+ } foo;
+
+ // CIR: cir.const #cir.int<8>
+ // LLVM: store i32 8
+ // OGCG: store i32 8
+ gi = __builtin_object_size(&foo.a, 0);
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&foo.a, 1);
+
+ // CIR: cir.const #cir.int<8>
+ // LLVM: store i32 8
+ // OGCG: store i32 8
+ gi = __builtin_object_size(&foo.a, 2);
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&foo.a, 3);
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&foo.b, 0);
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&foo.b, 1);
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&foo.b, 2);
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&foo.b, 3);
+}
+
+// CIR-LABEL: @test20
+// LLVM-LABEL: define {{.*}} void @test20
+// OGCG-LABEL: define {{.*}} void @test20
+void test20(void) {
+ struct { int t[10]; } t[10];
+
+ // CIR: cir.const #cir.int<380>
+ // LLVM: store i32 380
+ // OGCG: store i32 380
+ gi = __builtin_object_size(&t[0].t[5], 0);
+
+ // CIR: cir.const #cir.int<20>
+ // LLVM: store i32 20
+ // OGCG: store i32 20
+ gi = __builtin_object_size(&t[0].t[5], 1);
+
+ // CIR: cir.const #cir.int<380>
+ // LLVM: store i32 380
+ // OGCG: store i32 380
+ gi = __builtin_object_size(&t[0].t[5], 2);
+
+ // CIR: cir.const #cir.int<20>
+ // LLVM: store i32 20
+ // OGCG: store i32 20
+ gi = __builtin_object_size(&t[0].t[5], 3);
+}
+
+// CIR-LABEL: @test21
+// LLVM-LABEL: define {{.*}} void @test21
+// OGCG-LABEL: define {{.*}} void @test21
+void test21(void) {
+ struct { int t; } t;
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t + 1, 0);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t + 1, 1);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t + 1, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t + 1, 3);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t.t + 1, 0);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t.t + 1, 1);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t.t + 1, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t.t + 1, 3);
+}
+
+// CIR-LABEL: @test22
+// LLVM-LABEL: define {{.*}} void @test22
+// OGCG-LABEL: define {{.*}} void @test22
+void test22(void) {
+ struct { int t[10]; } t[10];
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t[10], 0);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t[10], 1);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t[10], 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t[10], 3);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t[9].t[10], 0);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t[9].t[10], 1);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t[9].t[10], 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t[9].t[10], 3);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((char*)&t[0] + sizeof(t), 0);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((char*)&t[0] + sizeof(t), 1);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((char*)&t[0] + sizeof(t), 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((char*)&t[0] + sizeof(t), 3);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 0);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 1);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 3);
+}
+
+struct Test23Ty { int a; int t[10]; };
+
+// CIR-LABEL: @test23
+// LLVM-LABEL: define {{.*}} void @test23
+// OGCG-LABEL: define {{.*}} void @test23
+void test23(struct Test23Ty *p) {
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(p, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(p, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(p, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(p, 3);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(&p->a, 0);
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&p->a, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(&p->a, 2);
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&p->a, 3);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(&p->t[5], 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(&p->t[5], 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(&p->t[5], 2);
+
+ // CIR: cir.const #cir.int<20>
+ // LLVM: store i32 20
+ // OGCG: store i32 20
+ gi = __builtin_object_size(&p->t[5], 3);
+}
+
+// CIR-LABEL: @test24
+// LLVM-LABEL: define {{.*}} void @test24
+// OGCG-LABEL: define {{.*}} void @test24
+void test24(void) {
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size((void*)0, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size((void*)0, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size((void*)0, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((void*)0, 3);
+}
+
+// CIR-LABEL: @test25
+// LLVM-LABEL: define {{.*}} void @test25
+// OGCG-LABEL: define {{.*}} void @test25
+void test25(void) {
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size((void*)0x1000, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size((void*)0x1000, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size((void*)0x1000, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size((void*)0x1000, 3);
+
+ // Skipping (void*)0 + 0x1000 tests - void pointer arithmetic NYI in CIR
+}
+
+// CIR-LABEL: @test26
+// LLVM-LABEL: define {{.*}} void @test26
+// OGCG-LABEL: define {{.*}} void @test26
+void test26(void) {
+ struct { int v[10]; } t[10];
+
+ // CIR: cir.const #cir.int<316>
+ // LLVM: store i32 316
+ // OGCG: store i32 316
+ gi = __builtin_object_size(&t[1].v[11], 0);
+
+ // CIR: cir.const #cir.int<312>
+ // LLVM: store i32 312
+ // OGCG: store i32 312
+ gi = __builtin_object_size(&t[1].v[12], 1);
+
+ // CIR: cir.const #cir.int<308>
+ // LLVM: store i32 308
+ // OGCG: store i32 308
+ gi = __builtin_object_size(&t[1].v[13], 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&t[1].v[14], 3);
+}
+
+struct Test27IncompleteTy;
+
+// CIR-LABEL: @test27
+// LLVM-LABEL: define {{.*}} void @test27
+// OGCG-LABEL: define {{.*}} void @test27
+void test27(struct Test27IncompleteTy *t) {
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(t, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(t, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(t, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(t, 3);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(&test27, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(&test27, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(&test27, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(&test27, 3);
+}
+
+// CIR-LABEL: @test28
+// LLVM-LABEL: define {{.*}} void @test28
+// OGCG-LABEL: define {{.*}} void @test28
+void test28(void) {
+ struct { int v[10]; } t[10];
+
+ // CIR: cir.const #cir.int<360>
+ // LLVM: store i32 360
+ // OGCG: store i32 360
+ gi = __builtin_object_size((char*)((short*)(&t[1])), 0);
+
+ // CIR: cir.const #cir.int<360>
+ // LLVM: store i32 360
+ // OGCG: store i32 360
+ gi = __builtin_object_size((char*)((short*)(&t[1])), 1);
+
+ // CIR: cir.const #cir.int<360>
+ // LLVM: store i32 360
+ // OGCG: store i32 360
+ gi = __builtin_object_size((char*)((short*)(&t[1])), 2);
+
+ // CIR: cir.const #cir.int<360>
+ // LLVM: store i32 360
+ // OGCG: store i32 360
+ gi = __builtin_object_size((char*)((short*)(&t[1])), 3);
+
+ // CIR: cir.const #cir.int<356>
+ // LLVM: store i32 356
+ // OGCG: store i32 356
+ gi = __builtin_object_size((char*)((short*)(&t[1].v[1])), 0);
+
+ // CIR: cir.const #cir.int<36>
+ // LLVM: store i32 36
+ // OGCG: store i32 36
+ gi = __builtin_object_size((char*)((short*)(&t[1].v[1])), 1);
+
+ // CIR: cir.const #cir.int<356>
+ // LLVM: store i32 356
+ // OGCG: store i32 356
+ gi = __builtin_object_size((char*)((short*)(&t[1].v[1])), 2);
+
+ // CIR: cir.const #cir.int<36>
+ // LLVM: store i32 36
+ // OGCG: store i32 36
+ gi = __builtin_object_size((char*)((short*)(&t[1].v[1])), 3);
+}
+
+struct DynStructVar {
+ char fst[16];
+ char snd[];
+};
+
+struct DynStruct0 {
+ char fst[16];
+ char snd[0];
+};
+
+struct DynStruct1 {
+ char fst[16];
+ char snd[1];
+};
+
+struct StaticStruct {
+ char fst[16];
+ char snd[2];
+};
+
+// CIR-LABEL: @test29
+// LLVM-LABEL: define {{.*}} void @test29
+// OGCG-LABEL: define {{.*}} void @test29
+void test29(struct DynStructVar *dv, struct DynStruct0 *d0,
+ struct DynStruct1 *d1, struct StaticStruct *ss) {
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(dv->snd, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(dv->snd, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(dv->snd, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(dv->snd, 3);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(d0->snd, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(d0->snd, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(d0->snd, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(d0->snd, 3);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(d1->snd, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(d1->snd, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(d1->snd, 2);
+
+ // CIR: cir.const #cir.int<1>
+ // LLVM: store i32 1
+ // OGCG: store i32 1
+ gi = __builtin_object_size(d1->snd, 3);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(ss->snd, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(ss->snd, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(ss->snd, 2);
+
+ // CIR: cir.const #cir.int<2>
+ // LLVM: store i32 2
+ // OGCG: store i32 2
+ gi = __builtin_object_size(ss->snd, 3);
+}
+
+// CIR-LABEL: @test30
+// LLVM-LABEL: define {{.*}} void @test30
+// OGCG-LABEL: define {{.*}} void @test30
+void test30(void) {
+ struct { struct DynStruct1 fst, snd; } *nested;
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(nested->fst.snd, 0);
+
+ // CIR: cir.const #cir.int<1>
+ // LLVM: store i32 1
+ // OGCG: store i32 1
+ gi = __builtin_object_size(nested->fst.snd, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(nested->fst.snd, 2);
+
+ // CIR: cir.const #cir.int<1>
+ // LLVM: store i32 1
+ // OGCG: store i32 1
+ gi = __builtin_object_size(nested->fst.snd, 3);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(nested->snd.snd, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(nested->snd.snd, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(nested->snd.snd, 2);
+
+ // CIR: cir.const #cir.int<1>
+ // LLVM: store i32 1
+ // OGCG: store i32 1
+ gi = __builtin_object_size(nested->snd.snd, 3);
+
+ union { struct DynStruct1 d1; char c[1]; } *u;
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(u->c, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(u->c, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(u->c, 2);
+
+ // CIR: cir.const #cir.int<1>
+ // LLVM: store i32 1
+ // OGCG: store i32 1
+ gi = __builtin_object_size(u->c, 3);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(u->d1.snd, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(u->d1.snd, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(u->d1.snd, 2);
+
+ // CIR: cir.const #cir.int<1>
+ // LLVM: store i32 1
+ // OGCG: store i32 1
+ gi = __builtin_object_size(u->d1.snd, 3);
+}
+
+// CIR-LABEL: @test32
+// LLVM-LABEL: define {{.*}} i64 @test32
+// OGCG-LABEL: define {{.*}} i64 @test32
+static struct DynStructVar D32 = {
+ .fst = {},
+ .snd = { 0, 1, 2, },
+};
+unsigned long test32(void) {
+ // CIR: cir.const #cir.int<19>
+ // LLVM: store i64 19
+ // OGCG: ret i64 19
+ return __builtin_object_size(&D32, 1);
+}
+
+// CIR-LABEL: @test33
+// LLVM-LABEL: define {{.*}} i64 @test33
+// OGCG-LABEL: define {{.*}} i64 @test33
+static struct DynStructVar D33 = {
+ .fst = {},
+ .snd = {},
+};
+unsigned long test33(void) {
+ // CIR: cir.const #cir.int<16>
+ // LLVM: store i64 16
+ // OGCG: ret i64 16
+ return __builtin_object_size(&D33, 1);
+}
+
+// CIR-LABEL: @test34
+// LLVM-LABEL: define {{.*}} i64 @test34
+// OGCG-LABEL: define {{.*}} i64 @test34
+static struct DynStructVar D34 = {
+ .fst = {},
+};
+unsigned long test34(void) {
+ // CIR: cir.const #cir.int<16>
+ // LLVM: store i64 16
+ // OGCG: ret i64 16
+ return __builtin_object_size(&D34, 1);
+}
+
+// CIR-LABEL: @test35
+// LLVM-LABEL: define {{.*}} i64 @test35
+// OGCG-LABEL: define {{.*}} i64 @test35
+unsigned long test35(void) {
+ // CIR: cir.const #cir.int<16>
+ // LLVM: store i64 16
+ // OGCG: ret i64 16
+ return __builtin_object_size(&(struct DynStructVar){}, 1);
+}
+
+// CIR-LABEL: @test37
+// LLVM-LABEL: define {{.*}} i64 @test37
+// OGCG-LABEL: define {{.*}} i64 @test37
+struct Z { struct A { int x, y[]; } z; int a; int b[]; };
+static struct Z my_z = { .b = {1,2,3} };
+unsigned long test37(void) {
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i64 4
+ // OGCG: ret i64 4
+ return __builtin_object_size(&my_z.z, 1);
+}
+
+// CIR-LABEL: @PR30346
+// LLVM-LABEL: define {{.*}} void @PR30346
+// OGCG-LABEL: define {{.*}} void @PR30346
+void PR30346(void) {
+ struct sa_family_t {};
+ struct sockaddr {
+ struct sa_family_t sa_family;
+ char sa_data[14];
+ };
+
+ struct sockaddr *sa;
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(sa->sa_data, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1
+ gi = __builtin_object_size(sa->sa_data, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1
+ gi = __builtin_object_size(sa->sa_data, 2);
+
+ // CIR: cir.const #cir.int<14>
+ // LLVM: store i32 14
+ // OGCG: store i32 14
+ gi = __builtin_object_size(sa->sa_data, 3);
+}
+
+extern char incomplete_char_array[];
+
+// CIR-LABEL: @incomplete_and_function_types
+// LLVM-LABEL: define {{.*}} void @incomplete_and_function_types
+// OGCG-LABEL: define {{.*}} void @incomplete_and_function_types
+void incomplete_and_function_types(void) {
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0
+ // OGCG: call i64 @llvm.objectsize.i64.p0
+ gi = __builtin_object_size(incomplete_char_array, 0);
+
+ // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0
+ // OGCG: call i64 @llvm.objectsize.i64.p0
+ gi = __builtin_object_size(incomplete_char_array, 1);
+
+ // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0
+ // OGCG: call i64 @llvm.objectsize.i64.p0
+ gi = __builtin_object_size(incomplete_char_array, 2);
+
+ // CIR: cir.const #cir.int<0>
+ // LLVM: store i32 0
+ // OGCG: store i32 0
+ gi = __builtin_object_size(incomplete_char_array, 3);
+}
+
+// CIR-LABEL: @deeply_nested
+// LLVM-LABEL: define {{.*}} void @deeply_nested
+// OGCG-LABEL: define {{.*}} void @deeply_nested
+void deeply_nested(void) {
+ struct {
+ struct {
+ struct {
+ struct {
+ int e[2];
+ char f;
+ } d[2];
+ } c[2];
+ } b[2];
+ } *a;
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&a->b[1].c[1].d[1].e[1], 1);
+
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size(&a->b[1].c[1].d[1].e[1], 3);
+}
diff --git a/clang/test/CIR/CodeGen/object-size.cpp b/clang/test/CIR/CodeGen/object-size.cpp
new file mode 100644
index 0000000..b60e245
--- /dev/null
+++ b/clang/test/CIR/CodeGen/object-size.cpp
@@ -0,0 +1,108 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+// C++-specific tests for __builtin_object_size
+
+int gi;
+
+// CIR-LABEL: @_Z5test1v
+// LLVM-LABEL: define{{.*}} void @_Z5test1v()
+// OGCG-LABEL: define{{.*}} void @_Z5test1v()
+void test1() {
+ // Guaranteeing that our cast removal logic doesn't break more interesting
+ // cases.
+ struct A { int a; };
+ struct B { int b; };
+ struct C: public A, public B {};
+
+ C c;
+
+ // CIR: cir.const #cir.int<8>
+ // LLVM: store i32 8
+ // OGCG: store i32 8
+ gi = __builtin_object_size(&c, 0);
+ // CIR: cir.const #cir.int<8>
+ // LLVM: store i32 8
+ // OGCG: store i32 8
+ gi = __builtin_object_size((A*)&c, 0);
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size((B*)&c, 0);
+
+ // CIR: cir.const #cir.int<8>
+ // LLVM: store i32 8
+ // OGCG: store i32 8
+ gi = __builtin_object_size((char*)&c, 0);
+ // CIR: cir.const #cir.int<8>
+ // LLVM: store i32 8
+ // OGCG: store i32 8
+ gi = __builtin_object_size((char*)(A*)&c, 0);
+ // CIR: cir.const #cir.int<4>
+ // LLVM: store i32 4
+ // OGCG: store i32 4
+ gi = __builtin_object_size((char*)(B*)&c, 0);
+}
+
+// CIR-LABEL: @_Z5test2v()
+// LLVM-LABEL: define{{.*}} void @_Z5test2v()
+// OGCG-LABEL: define{{.*}} void @_Z5test2v()
+void test2() {
+ struct A { char buf[16]; };
+ struct B : A {};
+ struct C { int i; B bs[1]; } *c;
+
+ // CIR: cir.objsize max nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0], 0);
+ // CIR: cir.objsize max nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0], 1);
+ // CIR: cir.objsize min nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false)
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0], 2);
+ // CIR: cir.const #cir.int<16>
+ // LLVM: store i32 16
+ // OGCG: store i32 16
+ gi = __builtin_object_size(&c->bs[0], 3);
+
+ // NYI: DerivedToBase cast
+ // gi = __builtin_object_size((A*)&c->bs[0], 0);
+
+ // CIR: cir.const #cir.int<16>
+ // LLVM: store i32 16
+ // OGCG: store i32 16
+ gi = __builtin_object_size((A*)&c->bs[0], 1);
+
+ // NYI: DerivedToBase cast
+ // gi = __builtin_object_size((A*)&c->bs[0], 2);
+
+ // CIR: cir.const #cir.int<16>
+ // LLVM: store i32 16
+ // OGCG: store i32 16
+ gi = __builtin_object_size((A*)&c->bs[0], 3);
+
+ // CIR: cir.objsize max nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0].buf[0], 0);
+ // CIR: cir.const #cir.int<16>
+ // LLVM: store i32 16
+ // OGCG: store i32 16
+ gi = __builtin_object_size(&c->bs[0].buf[0], 1);
+ // CIR: cir.objsize min nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i
+ // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false)
+ // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0].buf[0], 2);
+ // CIR: cir.const #cir.int<16>
+ // LLVM: store i32 16
+ // OGCG: store i32 16
+ gi = __builtin_object_size(&c->bs[0].buf[0], 3);
+}
diff --git a/clang/test/CIR/IR/objsize.cir b/clang/test/CIR/IR/objsize.cir
new file mode 100644
index 0000000..bc24551
--- /dev/null
+++ b/clang/test/CIR/IR/objsize.cir
@@ -0,0 +1,89 @@
+// Test the cir.objsize operation can parse and print correctly (roundtrip)
+// with all possible combinations of optional attributes
+
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u64i = !cir.int<u, 64>
+!void = !cir.void
+
+module {
+ cir.func @test_max(%arg0: !cir.ptr<!void>) -> !u64i {
+ %0 = cir.objsize max %arg0 : !cir.ptr<!void> -> !u64i
+ cir.return %0 : !u64i
+ }
+
+ cir.func @test_max_nullunknown(%arg0: !cir.ptr<!void>) -> !u64i {
+ %0 = cir.objsize max nullunknown %arg0 : !cir.ptr<!void> -> !u64i
+ cir.return %0 : !u64i
+ }
+
+ cir.func @test_max_dynamic(%arg0: !cir.ptr<!void>) -> !u64i {
+ %0 = cir.objsize max dynamic %arg0 : !cir.ptr<!void> -> !u64i
+ cir.return %0 : !u64i
+ }
+
+ cir.func @test_max_nullunknown_dynamic(%arg0: !cir.ptr<!void>) -> !u64i {
+ %0 = cir.objsize max nullunknown dynamic %arg0 : !cir.ptr<!void> -> !u64i
+ cir.return %0 : !u64i
+ }
+
+ cir.func @test_min(%arg0: !cir.ptr<!void>) -> !u64i {
+ %0 = cir.objsize min %arg0 : !cir.ptr<!void> -> !u64i
+ cir.return %0 : !u64i
+ }
+
+ cir.func @test_min_nullunknown(%arg0: !cir.ptr<!void>) -> !u64i {
+ %0 = cir.objsize min nullunknown %arg0 : !cir.ptr<!void> -> !u64i
+ cir.return %0 : !u64i
+ }
+
+ cir.func @test_min_dynamic(%arg0: !cir.ptr<!void>) -> !u64i {
+ %0 = cir.objsize min dynamic %arg0 : !cir.ptr<!void> -> !u64i
+ cir.return %0 : !u64i
+ }
+
+ cir.func @test_min_nullunknown_dynamic(%arg0: !cir.ptr<!void>) -> !u64i {
+ %0 = cir.objsize min nullunknown dynamic %arg0 : !cir.ptr<!void> -> !u64i
+ cir.return %0 : !u64i
+ }
+}
+
+// CHECK: cir.func @test_max(%arg0: !cir.ptr<!void>) -> !u64i {
+// CHECK: %0 = cir.objsize max %arg0 : !cir.ptr<!void> -> !u64i
+// CHECK: cir.return %0 : !u64i
+// CHECK: }
+
+// CHECK: cir.func @test_max_nullunknown(%arg0: !cir.ptr<!void>) -> !u64i {
+// CHECK: %0 = cir.objsize max nullunknown %arg0 : !cir.ptr<!void> -> !u64i
+// CHECK: cir.return %0 : !u64i
+// CHECK: }
+
+// CHECK: cir.func @test_max_dynamic(%arg0: !cir.ptr<!void>) -> !u64i {
+// CHECK: %0 = cir.objsize max dynamic %arg0 : !cir.ptr<!void> -> !u64i
+// CHECK: cir.return %0 : !u64i
+// CHECK: }
+
+// CHECK: cir.func @test_max_nullunknown_dynamic(%arg0: !cir.ptr<!void>) -> !u64i {
+// CHECK: %0 = cir.objsize max nullunknown dynamic %arg0 : !cir.ptr<!void> -> !u64i
+// CHECK: cir.return %0 : !u64i
+// CHECK: }
+
+// CHECK: cir.func @test_min(%arg0: !cir.ptr<!void>) -> !u64i {
+// CHECK: %0 = cir.objsize min %arg0 : !cir.ptr<!void> -> !u64i
+// CHECK: cir.return %0 : !u64i
+// CHECK: }
+
+// CHECK: cir.func @test_min_nullunknown(%arg0: !cir.ptr<!void>) -> !u64i {
+// CHECK: %0 = cir.objsize min nullunknown %arg0 : !cir.ptr<!void> -> !u64i
+// CHECK: cir.return %0 : !u64i
+// CHECK: }
+
+// CHECK: cir.func @test_min_dynamic(%arg0: !cir.ptr<!void>) -> !u64i {
+// CHECK: %0 = cir.objsize min dynamic %arg0 : !cir.ptr<!void> -> !u64i
+// CHECK: cir.return %0 : !u64i
+// CHECK: }
+
+// CHECK: cir.func @test_min_nullunknown_dynamic(%arg0: !cir.ptr<!void>) -> !u64i {
+// CHECK: %0 = cir.objsize min nullunknown dynamic %arg0 : !cir.ptr<!void> -> !u64i
+// CHECK: cir.return %0 : !u64i
+// CHECK: }
diff --git a/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl b/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl
index 7aeb877..b0abaed 100644
--- a/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl
+++ b/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl
@@ -24,4 +24,3 @@ void foo(uint Idx : SV_DispatchThreadID) {}
[shader("compute")]
[numthreads(8,8,1)]
void bar(uint2 Idx : SV_DispatchThreadID) {}
-
diff --git a/clang/test/CodeGenHLSL/semantics/semantic.arbitrary.hlsl b/clang/test/CodeGenHLSL/semantics/semantic.arbitrary.hlsl
new file mode 100644
index 0000000..96d5b99
--- /dev/null
+++ b/clang/test/CodeGenHLSL/semantics/semantic.arbitrary.hlsl
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple spirv-unknown-vulkan-vertex -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV -DTARGET=spv
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-vertex -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL -DTARGET=dx
+
+// CHECK-SPIRV-DAG: @AAA0 = external hidden thread_local addrspace(7) externally_initialized constant float, !spirv.Decorations ![[#METADATA_0:]]
+// CHECK-SPIRV-DAG: @B0 = external hidden thread_local addrspace(7) externally_initialized constant i32, !spirv.Decorations ![[#METADATA_2:]]
+// CHECK-SPIRV-DAG: @CC0 = external hidden thread_local addrspace(7) externally_initialized constant <2 x float>, !spirv.Decorations ![[#METADATA_4:]]
+
+
+// FIXME: replace `float2 c` with a matrix when available.
+void main(float a : AAA, int b : B, float2 c : CC) {
+ float tmp = a + b + c.x + c.y;
+}
+// CHECK-SPIRV: define internal spir_func void @_Z4mainfiDv2_f(float noundef nofpclass(nan inf) %a, i32 noundef %b, <2 x float> noundef nofpclass(nan inf) %c) #0 {
+
+// CHECK: define void @main()
+
+// CHECK-DXIL: %AAA0 = call float @llvm.dx.load.input.f32(i32 4, i32 0, i32 0, i8 0, i32 poison)
+// CHECK-DXIL: %B0 = call i32 @llvm.dx.load.input.i32(i32 4, i32 0, i32 0, i8 0, i32 poison)
+// CHECK-DXIL %CC0 = call <2 x float> @llvm.dx.load.input.v2f32(i32 4, i32 0, i32 0, i8 0, i32 poison)
+// CHECK-DXIL: call void @_Z4mainfiDv2_f(float %AAA0, i32 %B0, <2 x float> %CC0)
+
+// CHECK-SPIRV: %[[#AAA0:]] = load float, ptr addrspace(7) @AAA0, align 4
+// CHECK-SPIRV: %[[#B0:]] = load i32, ptr addrspace(7) @B0, align 4
+// CHECK-SPIRV: %[[#CC0:]] = load <2 x float>, ptr addrspace(7) @CC0, align 8
+// CHECK-SPIRV: call spir_func void @_Z4mainfiDv2_f(float %[[#AAA0]], i32 %[[#B0]], <2 x float> %[[#CC0]]) [ "convergencectrl"(token %0) ]
+
+
+// CHECK-SPIRV-DAG: ![[#METADATA_0]] = !{![[#METADATA_1:]]}
+// CHECK-SPIRV-DAG: ![[#METADATA_2]] = !{![[#METADATA_3:]]}
+// CHECK-SPIRV-DAG: ![[#METADATA_4]] = !{![[#METADATA_5:]]}
+
+// CHECK-SPIRV-DAG: ![[#METADATA_1]] = !{i32 30, i32 0}
+// CHECK-SPIRV-DAG: ![[#METADATA_3]] = !{i32 30, i32 1}
+// CHECK-SPIRV-DAG: ![[#METADATA_5]] = !{i32 30, i32 2}
+// | `- Location index
+// `-> Decoration "Location"
diff --git a/clang/test/CodeGenHLSL/semantics/semantic.array.hlsl b/clang/test/CodeGenHLSL/semantics/semantic.array.hlsl
new file mode 100644
index 0000000..b2cb3da
--- /dev/null
+++ b/clang/test/CodeGenHLSL/semantics/semantic.array.hlsl
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV -DTARGET=spv
+// RUN: %clang_cc1 -triple dxil-px-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL -DTARGET=dx
+
+struct S0 {
+ float4 position[2];
+ float4 color;
+};
+
+// CHECK: %struct.S0 = type { [2 x <4 x float>], <4 x float> }
+
+// CHECK-SPIRV: @A0 = external hidden thread_local addrspace(7) externally_initialized constant [2 x <4 x float>], !spirv.Decorations ![[#MD_0:]]
+// CHECK-SPIRV: @A2 = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations ![[#MD_2:]]
+
+// CHECK: define void @main0()
+// CHECK-DXIL: %A0 = call [2 x <4 x float>] @llvm.dx.load.input.a2v4f32(i32 4, i32 0, i32 0, i8 0, i32 poison)
+// CHECK-DXIL: %[[#TMP0:]] = insertvalue %struct.S0 poison, [2 x <4 x float>] %A0, 0
+// CHECK-DXIL: %A2 = call <4 x float> @llvm.dx.load.input.v4f32(i32 4, i32 0, i32 0, i8 0, i32 poison)
+// CHECK-DXIL: %[[#TMP1:]] = insertvalue %struct.S0 %[[#TMP0]], <4 x float> %A2, 1
+
+// CHECK-SPIRV: %[[#A0:]] = load [2 x <4 x float>], ptr addrspace(7) @A0, align 16
+// CHECK-SPIRV: %[[#TMP0:]] = insertvalue %struct.S0 poison, [2 x <4 x float>] %[[#A0]], 0
+// CHECK-SPIRV: %[[#A2:]] = load <4 x float>, ptr addrspace(7) @A2, align 16
+// CHECK-SPIRV: %[[#TMP1:]] = insertvalue %struct.S0 %[[#TMP0]], <4 x float> %[[#A2]], 1
+
+// CHECK: %[[#ARG:]] = alloca %struct.S0, align 16
+// CHECK: store %struct.S0 %[[#TMP1]], ptr %[[#ARG]], align 16
+// CHECK-DXIL: call void @{{.*}}main0{{.*}}(ptr %[[#ARG]])
+// CHECK-SPIRV: call spir_func void @{{.*}}main0{{.*}}(ptr %[[#ARG]])
+[shader("pixel")]
+void main0(S0 p : A) {
+ float tmp = p.position[0] + p.position[1] + p.color;
+}
+
+// CHECK-SPIRV: ![[#MD_0]] = !{![[#MD_1:]]}
+// CHECK-SPIRV: ![[#MD_1]] = !{i32 30, i32 0}
+// CHECK-SPIRV: ![[#MD_2]] = !{![[#MD_3:]]}
+// CHECK-SPIRV: ![[#MD_3]] = !{i32 30, i32 2}
diff --git a/clang/test/CodeGenHLSL/semantics/semantic.struct.hlsl b/clang/test/CodeGenHLSL/semantics/semantic.struct.hlsl
new file mode 100644
index 0000000..733cf3a
--- /dev/null
+++ b/clang/test/CodeGenHLSL/semantics/semantic.struct.hlsl
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL -DTARGET=dx
+// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV -DTARGET=spv
+
+struct S0 {
+ uint Idx : SV_DispatchThreadID;
+};
+
+// CHECK: define void @main0()
+// CHECK-DXIL: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0)
+// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id.i32(i32 0)
+// CHECK: %[[#TMP:]] = insertvalue %struct.S0 poison, i32 %[[#ID:]], 0
+// CHECK: %[[#ARG:]] = alloca %struct.S0, align 8
+// CHECK: store %struct.S0 %[[#TMP]], ptr %[[#ARG]], align 4
+// CHECK-DXIL: call void @{{.*}}main0{{.*}}(ptr %[[#ARG]])
+// CHECK-SPIRV: call spir_func void @{{.*}}main0{{.*}}(ptr %[[#ARG]])
+[shader("compute")]
+[numthreads(8,8,1)]
+void main0(S0 p) {}
+
+struct S1 {
+ uint2 a : SV_DispatchThreadID;
+ uint2 b : SV_GroupThreadID;
+};
+
+// CHECK: define void @main1()
+// CHECK-DXIL: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0)
+// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id.i32(i32 0)
+// CHECK: %[[#AX_:]] = insertelement <2 x i32> poison, i32 %[[#ID]], i64 0
+// CHECK-DXIL: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 1)
+// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id.i32(i32 1)
+// CHECK: %[[#AXY:]] = insertelement <2 x i32> %[[#AX_]], i32 %[[#ID]], i64 1
+// CHECK: %[[#S1A_:]] = insertvalue %struct.S1 poison, <2 x i32> %[[#AXY]], 0
+// CHECK-DXIL: %[[#ID_X:]] = call i32 @llvm.[[TARGET]].thread.id.in.group(i32 0)
+// CHECK-SPIRV: %[[#ID_X:]] = call i32 @llvm.[[TARGET]].thread.id.in.group.i32(i32 0)
+// CHECK: %[[#ID_X_:]] = insertelement <2 x i32> poison, i32 %[[#ID_X]], i64 0
+// CHECK-DXIL: %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id.in.group(i32 1)
+// CHECK-SPIRV: %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id.in.group.i32(i32 1)
+// CHECK: %[[#ID_XY:]] = insertelement <2 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1
+// CHECK: %[[#S1AB:]] = insertvalue %struct.S1 %[[#S1A_]], <2 x i32> %[[#ID_XYZ:]], 1
+// CHECK: %[[#ARG:]] = alloca %struct.S1, align 8
+// CHECK: store %struct.S1 %[[#S1AB]], ptr %[[#ARG]], align 8
+// CHECK-DXIL: call void @{{.*}}main1{{.*}}(ptr %[[#ARG]])
+// CHECK-SPIRV: call spir_func void @{{.*}}main1{{.*}}(ptr %[[#ARG]])
+[shader("compute")]
+[numthreads(8,8,1)]
+void main1(S1 p) {}
+
+struct S2C {
+ uint2 b : SV_GroupThreadID;
+};
+
+struct S2 {
+ uint a : SV_DispatchThreadID;
+ S2C child;
+};
+
+// CHECK: define void @main2()
+// CHECK-DXIL: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0)
+// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id.i32(i32 0)
+// CHECK: %[[#S2A_:]] = insertvalue %struct.S2 poison, i32 %[[#ID:]], 0
+
+// CHECK-DXIL: %[[#ID_X:]] = call i32 @llvm.[[TARGET]].thread.id.in.group(i32 0)
+// CHECK-SPIRV: %[[#ID_X:]] = call i32 @llvm.[[TARGET]].thread.id.in.group.i32(i32 0)
+// CHECK: %[[#ID_X_:]] = insertelement <2 x i32> poison, i32 %[[#ID_X]], i64 0
+// CHECK-DXIL: %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id.in.group(i32 1)
+// CHECK-SPIRV: %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id.in.group.i32(i32 1)
+// CHECK: %[[#ID_XY:]] = insertelement <2 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1
+// CHECK: %[[#S2C:]] = insertvalue %struct.S2C poison, <2 x i32> %[[#ID_XY:]], 0
+
+// CHECK: %[[#S2AB:]] = insertvalue %struct.S2 %[[#S2A_]], %struct.S2C %[[#S2V:]], 1
+// CHECK: %[[#ARG:]] = alloca %struct.S2, align 8
+// CHECK: store %struct.S2 %[[#S2AB]], ptr %[[#ARG]], align 1
+// CHECK-DXIL: call void @{{.*}}main2{{.*}}(ptr %[[#ARG]])
+// CHECK-SPIRV: call spir_func void @{{.*}}main2{{.*}}(ptr %[[#ARG]])
+[shader("compute")]
+[numthreads(8,8,1)]
+void main2(S2 p) {}
diff --git a/clang/test/ParserHLSL/semantic_parsing.hlsl b/clang/test/ParserHLSL/semantic_parsing.hlsl
index 726dead..bff7bd0 100644
--- a/clang/test/ParserHLSL/semantic_parsing.hlsl
+++ b/clang/test/ParserHLSL/semantic_parsing.hlsl
@@ -12,30 +12,33 @@ void Pony(int GI : SV_IWantAPony) { }
// expected-note@+1 {{to match this '('}}
void SuperPony(int GI : 0) { }
-// expected-error@+1 {{unknown HLSL semantic '_'}}
+// '_' is a valid CPP identifier.
void MegaPony(int GI : _) { }
-// expected-error@+1 {{unknown HLSL semantic 'A0A'}}
+void GarguantuanPony(int GI : _1) { }
+
void CoolPony(int GI : A0A0) { }
-// expected-error@+1 {{unknown HLSL semantic 'A_'}}
void NicePony(int GI : A_0) { }
-// expected-error@+1 {{unknown HLSL semantic 'A'}}
void CutePony(int GI : A00) { }
-// expected-error@+3 {{unknown HLSL semantic 'A'}}
// expected-error@+2 {{expected ')'}}
// expected-note@+1 {{to match this '('}}
void DoublePony(int GI : A00 B) { }
-// expected-error@+1 {{unknown HLSL semantic 'é'}}
-void BigPony(int GI : é) { }
+// Unicode can be used:
+// https://timsong-cpp.github.io/cppwp/n3337/charname.allowed
+void FrenchPony(int GI : garçon_de_café) { }
+void UnicodePony(int GI : â„®) { }
+
+// Since P1949 seems Emojis are not allowed, even if in the range
+// mentioned in N3337.
+// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p1949r7.html
// expected-error@+2 {{unexpected character <U+1F60A>}}
// expected-error@+1 {{expected HLSL Semantic identifier}}
void UTFPony(int GI : 😊) { }
-// expected-error@+2 {{character <U+1F60A> not allowed in an identifier}}
-// expected-error@+1 {{unknown HLSL semantic 'PonyWithA😊'}}
+// expected-error@+1 {{character <U+1F60A> not allowed in an identifier}}
void SmilingPony(int GI : PonyWithA😊) { }
diff --git a/clang/test/SemaCXX/dependent-switch-case.cpp b/clang/test/SemaCXX/dependent-switch-case.cpp
new file mode 100644
index 0000000..bbeab3a
--- /dev/null
+++ b/clang/test/SemaCXX/dependent-switch-case.cpp
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -std=c++20 %s -verify
+// RUN: %clang_cc1 -std=c++20 %s -verify -fexperimental-new-constant-interpreter
+
+constexpr bool e(int){switch(0)0=0:return t(;} // expected-error {{expression is not assignable}} \
+ // expected-error {{expected 'case' keyword before expression}} \
+ // expected-error {{expected expression}}
diff --git a/clang/test/SemaHLSL/Semantics/semantics-invalid.hlsl b/clang/test/SemaHLSL/Semantics/semantics-invalid.hlsl
new file mode 100644
index 0000000..fdba6f6
--- /dev/null
+++ b/clang/test/SemaHLSL/Semantics/semantics-invalid.hlsl
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -fsyntax-only -hlsl-entry main -verify %s
+
+typedef float t_f : SEMANTIC; // expected-warning{{'SEMANTIC' attribute only applies to parameters, non-static data members, and functions}}
+
+struct semantic_on_struct : SEMANTIC { // expected-error{{expected class name}}
+ float a;
+};
+
+struct s_fields_multiple_semantics {
+ float a : semantic_a : semantic_c; // expected-error{{use of undeclared identifier 'semantic_c'}}
+ float b : semantic_b;
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+ float a : SEM_A; // expected-warning{{'SEM_A' attribute only applies to parameters, non-static data members, and functions}}
+}
diff --git a/clang/test/SemaHLSL/Semantics/semantics-valid.hlsl b/clang/test/SemaHLSL/Semantics/semantics-valid.hlsl
new file mode 100644
index 0000000..1e6bae4
--- /dev/null
+++ b/clang/test/SemaHLSL/Semantics/semantics-valid.hlsl
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -hlsl-entry CSMain -x hlsl -finclude-default-header -ast-dump -o - %s | FileCheck %s
+
+struct s_fields {
+ float a : semantic_a;
+ float b : semantic_b;
+// CHECK: |-CXXRecordDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> line:[[@LINE-3]]:8 struct s_fields definition
+// CHECK: | |-FieldDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:9 a 'float'
+// CHECK: | | `-HLSLUserSemanticAttr 0x{{[0-9a-fA-F]+}} <col:13>
+// CHECK: | `-FieldDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:9 b 'float'
+// CHECK: | `-HLSLUserSemanticAttr 0x{{[0-9a-fA-F]+}} <col:13>
+};
+
+float fn_foo1(float a : a, float b : b) : sem_ret { return 1.0f; }
+// CHECK: |-FunctionDecl {{.*}} <{{.*}}> col:7 fn_foo1 'float (float, float)'
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} <{{.*}}> col:21 a 'float'
+// CHECK-NEXT: | | `-HLSLUserSemanticAttr {{.*}} <{{.*}}>
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} <{{.*}}> col:34 b 'float'
+// CHECK-NEXT: | | `-HLSLUserSemanticAttr {{.*}} <{{.*}}>
+// CHECK-NEXT: | |-CompoundStmt {{.*}} <{{.*}}>
+// CHECK-NEXT: | | `-ReturnStmt {{.*}} <{{.*}}>
+// CHECK-NEXT: | | `-FloatingLiteral {{.*}} <{{.*}}> 'float' 1.000000e+00
+// CHECK-NEXT: | `-HLSLUserSemanticAttr {{.*}} <{{.*}}>
+float fn_foo2(float a : a, float b : b) : sem_ret : also_ret { return 1.0f; }
+// CHECK: `-FunctionDecl {{.*}} <{{.*}}> col:7 fn_foo2 'float (float, float)'
+// CHECK-NEXT: |-ParmVarDecl {{.*}} <{{.*}}> col:21 a 'float'
+// CHECK-NEXT: | `-HLSLUserSemanticAttr {{.*}} <{{.*}}>
+// CHECK-NEXT: |-ParmVarDecl {{.*}} <{{.*}}> col:34 b 'float'
+// CHECK-NEXT: | `-HLSLUserSemanticAttr {{.*}} <{{.*}}>
+// CHECK-NEXT: |-CompoundStmt {{.*}} <{{.*}}>
+// CHECK-NEXT: | `-ReturnStmt {{.*}} <{{.*}}>
+// CHECK-NEXT: | `-FloatingLiteral {{.*}} <{{.*}}> 'float' 1.000000e+00
+// CHECK-NEXT: |-HLSLUserSemanticAttr {{.*}} <{{.*}}>
+// CHECK-NEXT: `-HLSLUserSemanticAttr {{.*}} <{{.*}}>
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
index 9692d6e..3fcb558 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
@@ -1179,6 +1179,12 @@ TEST_P(ASTMatchersTest, PredefinedExpr) {
has(stringLiteral()))));
}
+TEST_P(ASTMatchersTest, FileScopeAsmDecl) {
+ EXPECT_TRUE(matches("__asm(\"nop\");", fileScopeAsmDecl()));
+ EXPECT_TRUE(
+ notMatches("void f() { __asm(\"mov al, 2\"); }", fileScopeAsmDecl()));
+}
+
TEST_P(ASTMatchersTest, AsmStatement) {
EXPECT_TRUE(matches("void foo() { __asm(\"mov al, 2\"); }", asmStmt()));
}
@@ -2442,7 +2448,8 @@ TEST_P(ASTMatchersTest, LambdaCaptureTest_BindsToCaptureOfReferenceType) {
"int main() {"
" int a;"
" f(a);"
- "}", matcher));
+ "}",
+ matcher));
EXPECT_FALSE(matches("template <class ...T> void f(T &...args) {"
" [...args = args] () mutable {"
" }();"
@@ -2450,7 +2457,8 @@ TEST_P(ASTMatchersTest, LambdaCaptureTest_BindsToCaptureOfReferenceType) {
"int main() {"
" int a;"
" f(a);"
- "}", matcher));
+ "}",
+ matcher));
}
TEST_P(ASTMatchersTest, IsDerivedFromRecursion) {
@@ -2628,7 +2636,7 @@ TEST(ASTMatchersTestObjC, ObjCStringLiteral) {
" [Test someFunction:@\"Ola!\"]; "
"}\n"
"@end ";
- EXPECT_TRUE(matchesObjC(Objc1String, objcStringLiteral()));
+ EXPECT_TRUE(matchesObjC(Objc1String, objcStringLiteral()));
}
TEST(ASTMatchersTestObjC, ObjCDecls) {
diff --git a/clang/unittests/Support/TimeProfilerTest.cpp b/clang/unittests/Support/TimeProfilerTest.cpp
index e544c89..3b18aa83 100644
--- a/clang/unittests/Support/TimeProfilerTest.cpp
+++ b/clang/unittests/Support/TimeProfilerTest.cpp
@@ -186,7 +186,8 @@ std::string buildTraceGraph(StringRef Json) {
} // namespace
-TEST(TimeProfilerTest, ConstantEvaluationCxx20) {
+// FIXME: Flaky test. See https://github.com/llvm/llvm-project/pull/138613
+TEST(TimeProfilerTest, DISABLED_ConstantEvaluationCxx20) {
std::string Code = R"(
void print(double value);
diff --git a/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c b/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c
index 08a04fc..fc83b21 100644
--- a/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c
+++ b/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c
@@ -3,12 +3,12 @@
// Default compiler instrumentation works with any shadow base (dynamic or fixed).
// RUN: %clang_hwasan %s -o %t
// RUN: %run %t
-// RUN: env HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t
+// RUN: env HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t 2>%t.out || (cat %t.out | FileCheck %s)
// RUN: env HWASAN_OPTIONS=fixed_shadow_base=4398046511104 %run %t
//
// If -hwasan-mapping-offset is set, then the fixed_shadow_base needs to match.
// RUN: %clang_hwasan %s -mllvm -hwasan-mapping-offset=263878495698944 -o %t
-// RUN: env HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t
+// RUN: env HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t 2>%t.out || (cat %t.out | FileCheck %s)
// RUN: env HWASAN_OPTIONS=fixed_shadow_base=4398046511104 not %run %t
// RUN: %clang_hwasan %s -mllvm -hwasan-mapping-offset=4398046511104 -o %t
@@ -26,6 +26,8 @@
//
// UNSUPPORTED: android
+// CHECK: FATAL: HWAddressSanitizer: Shadow range {{.*}} is not available
+
#include <assert.h>
#include <sanitizer/allocator_interface.h>
#include <sanitizer/hwasan_interface.h>
diff --git a/libc/src/__support/CPP/type_traits/is_destructible.h b/libc/src/__support/CPP/type_traits/is_destructible.h
index 7ada223..dc5e62b 100644
--- a/libc/src/__support/CPP/type_traits/is_destructible.h
+++ b/libc/src/__support/CPP/type_traits/is_destructible.h
@@ -15,6 +15,7 @@
#include "src/__support/CPP/type_traits/remove_all_extents.h"
#include "src/__support/CPP/type_traits/true_type.h"
#include "src/__support/CPP/type_traits/type_identity.h"
+#include "src/__support/CPP/utility/declval.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp
index c089a14..4740067 100644
--- a/libc/startup/baremetal/arm/start.cpp
+++ b/libc/startup/baremetal/arm/start.cpp
@@ -131,6 +131,32 @@ namespace LIBC_NAMESPACE_DECL {
__arm_wsr("CPSR_c", 0x13); // SVC
#endif
+#ifdef __ARM_FP
+// Enable FPU
+#if __ARM_ARCH_PROFILE == 'M'
+ // Based on
+ // https://developer.arm.com/documentation/dui0646/c/Cortex-M7-Peripherals/Floating-Point-Unit/Enabling-the-FPU
+ // Set CPACR cp10 and cp11
+ auto cpacr = (volatile uint32_t *const)0xE000ED88;
+ *cpacr |= (0xF << 20);
+ __dsb(0xF);
+ __isb(0xF);
+#elif __ARM_ARCH_PROFILE == 'A' || __ARM_ARCH_PROFILE == 'R'
+ // Based on
+ // https://developer.arm.com/documentation/dui0472/m/Compiler-Coding-Practices/Enabling-NEON-and-FPU-for-bare-metal
+ // Set CPACR cp10 and cp11
+ uint32_t cpacr = __arm_rsr("p15:0:c1:c0:2");
+ cpacr |= (0xF << 20);
+ __arm_wsr("p15:0:c1:c0:2", cpacr);
+ __isb(0xF);
+ // Set FPEXC.EN
+ uint32_t fpexc;
+ __asm__ __volatile__("vmrs %0, FPEXC" : "=r"(fpexc) : :);
+ fpexc |= (1 << 30);
+ __asm__ __volatile__("vmsr FPEXC, %0" : : "r"(fpexc) :);
+#endif
+#endif
+
// Perform the equivalent of scatterloading
LIBC_NAMESPACE::memcpy(__data_start, __data_source,
reinterpret_cast<uintptr_t>(__data_size));
diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst
index 7499613..9ecd390 100644
--- a/llvm/docs/SPIRVUsage.rst
+++ b/llvm/docs/SPIRVUsage.rst
@@ -241,6 +241,8 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e
- Adds predicated load and store instructions that conditionally read from or write to memory based on a boolean predicate.
* - ``SPV_KHR_maximal_reconvergence``
- Adds execution mode and capability to enable maximal reconvergence.
+ * - ``SPV_ALTERA_blocking_pipes``
+ - Adds new pipe read and write functions that have blocking semantics instead of the non-blocking semantics of the existing pipe read/write functions.
SPIR-V representation in LLVM IR
================================
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 221d8f1..f585257 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1331,8 +1331,8 @@ public:
bool SplitDst =
TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
TargetLowering::TypeSplitVector;
- if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
- DstVTy->getElementCount().isVector()) {
+ if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isKnownEven() &&
+ DstVTy->getElementCount().isKnownEven()) {
Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
const T *TTI = thisT();
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 268025e7..9d6038d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -297,6 +297,10 @@ private:
/// \pre \p U is a call instruction.
bool translateCall(const User &U, MachineIRBuilder &MIRBuilder);
+ bool translateIntrinsic(
+ const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder,
+ const TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
+
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
/// many places it could ultimately go. In the IR, we have a single unwind
/// destination, but in the machine CFG, we enumerate all the possible blocks.
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 9924b90..d7db935 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -176,4 +176,10 @@ def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, l
def int_dx_group_memory_barrier_with_group_sync
: DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
+
+def int_dx_load_input
+ : DefaultAttrsIntrinsic<[llvm_any_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i8_ty,
+ llvm_i32_ty],
+ [IntrConvergent]>;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 4fd2204..be1b51f 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2821,20 +2821,34 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (translateKnownIntrinsic(CI, ID, MIRBuilder))
return true;
+ TargetLowering::IntrinsicInfo Info;
+ bool IsTgtMemIntrinsic = TLI->getTgtMemIntrinsic(Info, CI, *MF, ID);
+
+ return translateIntrinsic(CI, ID, MIRBuilder,
+ IsTgtMemIntrinsic ? &Info : nullptr);
+}
+
+/// Translate a call to an intrinsic.
+/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo
+/// is a pointer to the correspondingly populated IntrinsicInfo object.
+/// Otherwise, this pointer is null.
+bool IRTranslator::translateIntrinsic(
+ const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder,
+ const TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
ArrayRef<Register> ResultRegs;
- if (!CI.getType()->isVoidTy())
- ResultRegs = getOrCreateVRegs(CI);
+ if (!CB.getType()->isVoidTy())
+ ResultRegs = getOrCreateVRegs(CB);
// Ignore the callsite attributes. Backend code is most likely not expecting
// an intrinsic to sometimes have side effects and sometimes not.
MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
- if (isa<FPMathOperator>(CI))
- MIB->copyIRFlags(CI);
+ if (isa<FPMathOperator>(CB))
+ MIB->copyIRFlags(CB);
- for (const auto &Arg : enumerate(CI.args())) {
+ for (const auto &Arg : enumerate(CB.args())) {
// If this is required to be an immediate, don't materialize it in a
// register.
- if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
+ if (CB.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
// imm arguments are more convenient than cimm (and realistically
// probably sufficient), so use them.
@@ -2863,29 +2877,33 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
}
// Add a MachineMemOperand if it is a target mem intrinsic.
- TargetLowering::IntrinsicInfo Info;
- // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
- if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) {
- Align Alignment = Info.align.value_or(
- DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
- LLT MemTy = Info.memVT.isSimple()
- ? getLLTForMVT(Info.memVT.getSimpleVT())
- : LLT::scalar(Info.memVT.getStoreSizeInBits());
+ if (TgtMemIntrinsicInfo) {
+ const Function *F = CB.getCalledFunction();
+
+ Align Alignment = TgtMemIntrinsicInfo->align.value_or(DL->getABITypeAlign(
+ TgtMemIntrinsicInfo->memVT.getTypeForEVT(F->getContext())));
+ LLT MemTy =
+ TgtMemIntrinsicInfo->memVT.isSimple()
+ ? getLLTForMVT(TgtMemIntrinsicInfo->memVT.getSimpleVT())
+ : LLT::scalar(TgtMemIntrinsicInfo->memVT.getStoreSizeInBits());
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
// didn't yield anything useful.
MachinePointerInfo MPI;
- if (Info.ptrVal)
- MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
- else if (Info.fallbackAddressSpace)
- MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
+ if (TgtMemIntrinsicInfo->ptrVal) {
+ MPI = MachinePointerInfo(TgtMemIntrinsicInfo->ptrVal,
+ TgtMemIntrinsicInfo->offset);
+ } else if (TgtMemIntrinsicInfo->fallbackAddressSpace) {
+ MPI = MachinePointerInfo(*TgtMemIntrinsicInfo->fallbackAddressSpace);
+ }
MIB.addMemOperand(MF->getMachineMemOperand(
- MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata(),
- /*Ranges=*/nullptr, Info.ssid, Info.order, Info.failureOrder));
+ MPI, TgtMemIntrinsicInfo->flags, MemTy, Alignment, CB.getAAMetadata(),
+ /*Ranges=*/nullptr, TgtMemIntrinsicInfo->ssid,
+ TgtMemIntrinsicInfo->order, TgtMemIntrinsicInfo->failureOrder));
}
- if (CI.isConvergent()) {
- if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ if (CB.isConvergent()) {
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
Register TokenReg = getOrCreateVReg(*Token);
MIB.addUse(TokenReg, RegState::Implicit);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fa0c899..9961c98 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3526,8 +3526,7 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
- for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
- BasicBlock *Dest = I.getIndirectDest(i);
+ for (BasicBlock *Dest : I.getIndirectDests()) {
MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
Target->setIsInlineAsmBrIndirectTarget();
// If we introduce a type of asm goto statement that is permitted to use an
@@ -5313,18 +5312,26 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
DAG.setRoot(OutChain);
}
-/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
-/// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
- unsigned Intrinsic) {
- // Ignore the callsite's attributes. A specific call site may be marked with
- // readnone, but the lowering code will expect the chain based on the
- // definition.
+/// Check if this intrinsic call depends on the chain (1st return value)
+/// and if it only *loads* memory.
+/// Ignore the callsite's attributes. A specific call site may be marked with
+/// readnone, but the lowering code will expect the chain based on the
+/// definition.
+std::pair<bool, bool>
+SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) {
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
bool OnlyLoad =
HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow();
+ return {HasChain, OnlyLoad};
+}
+
+SmallVector<SDValue, 8> SelectionDAGBuilder::getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
// Build the operand list.
SmallVector<SDValue, 8> Ops;
if (HasChain) { // If this intrinsic has side-effects, chainify it.
@@ -5336,17 +5343,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
- // Info is set by getTgtMemIntrinsic
- TargetLowering::IntrinsicInfo Info;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
- DAG.getMachineFunction(),
- Intrinsic);
-
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
- if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
- Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+ if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID ||
+ TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
@@ -5369,13 +5369,85 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
+ if (std::optional<OperandBundleUse> Bundle =
+ I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ Value *Token = Bundle->Inputs[0].get();
+ SDValue ConvControlToken = getValue(Token);
+ assert(Ops.back().getValueType() != MVT::Glue &&
+ "Did not expect another glue node here.");
+ ConvControlToken =
+ DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
+ Ops.push_back(ConvControlToken);
+ }
+
+ return Ops;
+}
+
+SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I,
+ bool HasChain) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
- SDVTList VTs = DAG.getVTList(ValueVTs);
+ return DAG.getVTList(ValueVTs);
+}
+
+/// Get an INTRINSIC node for a target intrinsic which does not touch memory.
+SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode(
+ const Type &IntrinsicVT, bool HasChain, ArrayRef<SDValue> Ops,
+ const SDVTList &VTs) {
+ if (!HasChain)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
+ if (!IntrinsicVT.isVoidTy())
+ return DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
+ return DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+}
+
+/// Set root, convert return type if necessary and check alignment.
+SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I,
+ bool HasChain,
+ bool OnlyLoad,
+ SDValue Result) {
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (I.getType()->isVoidTy())
+ return Result;
+
+ if (MaybeAlign Alignment = I.getRetAlign(); InsertAssertAlign && Alignment) {
+ // Insert `assertalign` node if there's an alignment.
+ Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+ } else if (!isa<VectorType>(I.getType())) {
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
+ }
+
+ return Result;
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
+
+ // Info is set by getTgtMemIntrinsic
+ TargetLowering::IntrinsicInfo Info;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtMemIntrinsic =
+ TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic);
+
+ SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands(
+ I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr);
+ SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
// Propagate fast-math-flags from IR to node(s).
SDNodeFlags Flags;
@@ -5386,19 +5458,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Create the node.
SDValue Result;
- if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
- auto *Token = Bundle->Inputs[0].get();
- SDValue ConvControlToken = getValue(Token);
- assert(Ops.back().getValueType() != MVT::Glue &&
- "Did not expected another glue node here.");
- ConvControlToken =
- DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
- Ops.push_back(ConvControlToken);
- }
-
// In some cases, custom collection of operands from CallInst I may be needed.
TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
- if (IsTgtIntrinsic) {
+ if (IsTgtMemIntrinsic) {
// This is target intrinsic that touches memory
//
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
@@ -5418,34 +5480,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Info.ssid, Info.order, Info.failureOrder);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, MemVT, MMO);
- } else if (!HasChain) {
- Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
- } else if (!I.getType()->isVoidTy()) {
- Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
} else {
- Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+ Result = getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs);
}
- if (HasChain) {
- SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
- if (OnlyLoad)
- PendingLoads.push_back(Chain);
- else
- DAG.setRoot(Chain);
- }
-
- if (!I.getType()->isVoidTy()) {
- if (!isa<VectorType>(I.getType()))
- Result = lowerRangeToAssertZExt(DAG, I, Result);
-
- MaybeAlign Alignment = I.getRetAlign();
-
- // Insert `assertalign` node if there's an alignment.
- if (InsertAssertAlign && Alignment) {
- Result =
- DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
- }
- }
+ Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
setValue(&I, Result);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 47e19f7..ed63bee 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -727,6 +727,17 @@ private:
MCSymbol *&BeginLabel);
SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+
+ std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase &I);
+ SmallVector<SDValue, 8> getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
+ SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain);
+ SDValue getTargetNonMemIntrinsicNode(const Type &IntrinsicVT, bool HasChain,
+ ArrayRef<SDValue> Ops,
+ const SDVTList &VTs);
+ SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain,
+ bool OnlyLoad, SDValue Result);
};
/// This struct represents the registers (physical or virtual)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d08f9b9..40e6400 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -50,6 +50,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
@@ -104,7 +105,6 @@
#include <vector>
using namespace llvm;
-using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64-lower"
@@ -1174,6 +1174,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::VECTOR_DEINTERLEAVE);
+ setTargetDAGCombine(ISD::CTPOP);
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
@@ -11330,9 +11331,10 @@ SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
break;
}
+ // Note: This lowering only overrides NEON for v1i64 and v2i64, where we
+ // prefer using SVE if available.
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
+ useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
@@ -17554,6 +17556,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
// udot instruction.
if (SrcWidth * 4 <= DstWidth) {
if (all_of(I->users(), [&](auto *U) {
+ using namespace llvm::PatternMatch;
auto *SingleUser = cast<Instruction>(&*U);
if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
return true;
@@ -17825,6 +17828,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// into shift / and masks. For the moment we do this just for uitofp (not
// zext) to avoid issues with widening instructions.
if (Shuffles.size() == 4 && all_of(Shuffles, [](ShuffleVectorInst *SI) {
+ using namespace llvm::PatternMatch;
return SI->hasOneUse() && match(SI->user_back(), m_UIToFP(m_Value())) &&
SI->getType()->getScalarSizeInBits() * 4 ==
SI->user_back()->getType()->getScalarSizeInBits();
@@ -27841,6 +27845,35 @@ static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG) {
{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
}
+static SDValue performCTPOPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ using namespace llvm::SDPatternMatch;
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ // ctpop(zext(bitcast(vector_mask))) -> neg(signed_reduce_add(vector_mask))
+ SDValue Mask;
+ if (!sd_match(N->getOperand(0), m_ZExt(m_BitCast(m_Value(Mask)))))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ EVT MaskVT = Mask.getValueType();
+
+ if (VT.isVector() || !MaskVT.isFixedLengthVector() ||
+ MaskVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ EVT ReduceInVT =
+ EVT::getVectorVT(*DAG.getContext(), VT, MaskVT.getVectorElementCount());
+
+ SDLoc DL(N);
+ // Sign extend to best fit ZeroOrNegativeOneBooleanContent.
+ SDValue ExtMask = DAG.getNode(ISD::SIGN_EXTEND, DL, ReduceInVT, Mask);
+ SDValue NegPopCount = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, ExtMask);
+ return DAG.getNegative(NegPopCount, DL, VT);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -28186,6 +28219,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performScalarToVectorCombine(N, DCI, DAG);
case ISD::SHL:
return performSHLCombine(N, DCI, DAG);
+ case ISD::CTPOP:
+ return performCTPOPCombine(N, DCI, DAG);
}
return SDValue();
}
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 636e31c..bf9de0a 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1583,7 +1583,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (!TII->isAddImmediate(*DeadMI, Reg))
continue;
LIS->RemoveMachineInstrFromMaps(*DeadMI);
+ Register AddReg = DeadMI->getOperand(1).getReg();
DeadMI->eraseFromParent();
+ if (AddReg.isVirtual())
+ LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
}
@@ -1869,11 +1872,15 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
// Loop over the dead AVL values, and delete them now. This has
// to be outside the above loop to avoid invalidating iterators.
for (auto *MI : ToDelete) {
+ assert(MI->getOpcode() == RISCV::ADDI);
+ Register AddReg = MI->getOperand(1).getReg();
if (LIS) {
LIS->removeInterval(MI->getOperand(0).getReg());
LIS->RemoveMachineInstrFromMaps(*MI);
}
MI->eraseFromParent();
+ if (LIS && AddReg.isVirtual())
+ LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 56a38bb..b2cbdb2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -2390,6 +2390,15 @@ static bool generateBindlessImageINTELInst(const SPIRV::IncomingCall *Call,
return buildBindlessImageINTELInst(Call, Opcode, MIRBuilder, GR);
}
+static bool generateBlockingPipesInst(const SPIRV::IncomingCall *Call,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVGlobalRegistry *GR) {
+ const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
+ unsigned Opcode =
+ SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode;
+ return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0));
+}
+
static bool
generateTernaryBitwiseFunctionINTELInst(const SPIRV::IncomingCall *Call,
MachineIRBuilder &MIRBuilder,
@@ -3050,6 +3059,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
return generatePipeInst(Call.get(), MIRBuilder, GR);
case SPIRV::PredicatedLoadStore:
return generatePredicatedLoadStoreInst(Call.get(), MIRBuilder, GR);
+ case SPIRV::BlockingPipes:
+ return generateBlockingPipesInst(Call.get(), MIRBuilder, GR);
}
return false;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index c259cce..492a98e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -71,6 +71,7 @@ def TernaryBitwiseINTEL : BuiltinGroup;
def Block2DLoadStore : BuiltinGroup;
def Pipe : BuiltinGroup;
def PredicatedLoadStore : BuiltinGroup;
+def BlockingPipes : BuiltinGroup;
//===----------------------------------------------------------------------===//
// Class defining a demangled builtin record. The information in the record
@@ -1174,6 +1175,10 @@ defm : DemangledNativeBuiltin<"clock_read_sub_group", OpenCL_std, KernelClock, 0
defm : DemangledNativeBuiltin<"clock_read_hilo_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
defm : DemangledNativeBuiltin<"clock_read_hilo_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
defm : DemangledNativeBuiltin<"clock_read_hilo_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+
+//SPV_ALTERA_blocking_pipes
+defm : DemangledNativeBuiltin<"__spirv_WritePipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpWritePipeBlockingALTERA>;
+defm : DemangledNativeBuiltin<"__spirv_ReadPipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpReadPipeBlockingALTERA>;
defm : DemangledNativeBuiltin<"__spirv_ReadClockKHR", OpenCL_std, KernelClock, 1, 1, OpReadClockKHR>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 43b2869..f681b0d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -159,7 +159,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
{"SPV_KHR_maximal_reconvergence",
SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence},
{"SPV_INTEL_kernel_attributes",
- SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}};
+ SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes},
+ {"SPV_ALTERA_blocking_pipes",
+ SPIRV::Extension::Extension::SPV_ALTERA_blocking_pipes}};
bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
StringRef ArgValue,
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index a61351e..03bd61b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -993,3 +993,9 @@ def OpPredicatedLoadINTEL: Op<6528, (outs ID:$res), (ins TYPE:$resType, ID:$ptr,
"$res = OpPredicatedLoadINTEL $resType $ptr $predicate $default_value">;
def OpPredicatedStoreINTEL: Op<6529, (outs), (ins ID:$ptr, ID:$object, ID:$predicate, variable_ops),
"OpPredicatedStoreINTEL $ptr $object $predicate">;
+
+//SPV_ALTERA_blocking_pipes
+def OpReadPipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment),
+ "OpReadPipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">;
+def OpWritePipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment),
+ "OpWritePipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">;
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index e5ac76c4..af76016 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1885,6 +1885,13 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(
SPIRV::Capability::CooperativeMatrixCheckedInstructionsINTEL);
break;
+ case SPIRV::OpReadPipeBlockingALTERA:
+ case SPIRV::OpWritePipeBlockingALTERA:
+ if (ST.canUseExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes)) {
+ Reqs.addExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes);
+ Reqs.addCapability(SPIRV::Capability::BlockingPipesALTERA);
+ }
+ break;
case SPIRV::OpCooperativeMatrixGetElementCoordINTEL:
if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_joint_matrix))
report_fatal_error("OpCooperativeMatrixGetElementCoordINTEL requires the "
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 4e4e6fb..be88f33 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -56,6 +56,13 @@ public:
}
};
+static cl::list<std::string> SPVAllowUnknownIntrinsics(
+ "spv-allow-unknown-intrinsics", cl::CommaSeparated,
+ cl::desc("Emit unknown intrinsics as calls to external functions. A "
+ "comma-separated input list of intrinsic prefixes must be "
+ "provided, and only intrinsics carrying a listed prefix get "
+ "emitted as described."),
+ cl::value_desc("intrinsic_prefix_0,intrinsic_prefix_1"), cl::ValueOptional);
} // namespace
char SPIRVPrepareFunctions::ID = 0;
@@ -445,6 +452,15 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
EraseFromParent);
Changed = true;
break;
+ default:
+ if (TM.getTargetTriple().getVendor() == Triple::AMD ||
+ any_of(SPVAllowUnknownIntrinsics, [II](auto &&Prefix) {
+ if (Prefix.empty())
+ return false;
+ return II->getCalledFunction()->getName().starts_with(Prefix);
+ }))
+ Changed |= lowerIntrinsicToFunction(II);
+ break;
}
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 1b4b29b..65a8885 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -309,7 +309,7 @@ defm SPV_KHR_shader_clock : ExtensionOperand<54, [EnvVulkan, EnvOpenCL]>;
defm SPV_INTEL_unstructured_loop_controls : ExtensionOperand<55, [EnvOpenCL]>;
defm SPV_EXT_demote_to_helper_invocation : ExtensionOperand<56, [EnvVulkan]>;
defm SPV_INTEL_fpga_reg : ExtensionOperand<57, [EnvOpenCL]>;
-defm SPV_INTEL_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>;
+defm SPV_ALTERA_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>;
defm SPV_GOOGLE_user_type : ExtensionOperand<59, [EnvVulkan]>;
defm SPV_KHR_physical_storage_buffer : ExtensionOperand<60, [EnvVulkan]>;
defm SPV_INTEL_kernel_attributes : ExtensionOperand<61, [EnvOpenCL]>;
@@ -611,6 +611,7 @@ defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tenso
defm BFloat16TypeKHR : CapabilityOperand<5116, 0, 0, [SPV_KHR_bfloat16], []>;
defm BFloat16DotProductKHR : CapabilityOperand<5117, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR]>;
defm BFloat16CooperativeMatrixKHR : CapabilityOperand<5118, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR, CooperativeMatrixKHR]>;
+defm BlockingPipesALTERA : CapabilityOperand<5945, 0, 0, [SPV_ALTERA_blocking_pipes], []>;
//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4d44227b3..168e041 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53442,7 +53442,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
}
SDValue NewStore =
- DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
+ DAG.getStore(St->getChain(), DL, Res, NewPtr,
+ MachinePointerInfo(St->getPointerInfo().getAddrSpace()),
Align(), St->getMemOperand()->getFlags());
// If there are other uses of StoredVal, replace with a new load of the
@@ -54639,7 +54640,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue NewPtr = DAG.getMemBasePlusOffset(
Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap);
SDValue NewLoad =
- DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),
+ DAG.getLoad(VT, DL, Ld->getChain(), NewPtr,
+ MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()),
Align(), Ld->getMemOperand()->getFlags());
DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return NewLoad;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8670822..3062e1c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1163,10 +1163,10 @@ public:
bool opcodeMayReadOrWriteFromMemory() const;
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override;
+ bool usesFirstLaneOnly(const VPValue *Op) const override;
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override;
+ bool usesFirstPartOnly(const VPValue *Op) const override;
/// Returns true if this VPInstruction produces a scalar value from a vector,
/// e.g. by performing a reduction or extracting a lane.
@@ -1393,13 +1393,13 @@ public:
return true;
}
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -1628,7 +1628,7 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
- bool onlyFirstLaneUsed(const VPValue *Op) const override;
+ bool usesFirstLaneOnly(const VPValue *Op) const override;
};
/// A recipe for widening Call instructions using library calls.
@@ -1767,7 +1767,7 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getCond() && isInvariantCond();
@@ -1833,7 +1833,7 @@ public:
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
if (Op == getOperand(0))
@@ -1870,7 +1870,7 @@ public:
void execute(VPTransformState &State) override;
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -1884,7 +1884,7 @@ public:
}
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
assert(getNumOperands() <= 2 && "must have at most two operands");
@@ -1922,14 +1922,14 @@ public:
Type *getSourceElementType() const { return SourceElementTy; }
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
assert(getNumOperands() <= 2 && "must have at most two operands");
@@ -2110,7 +2110,7 @@ public:
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// The recipe creates its own wide start value, so it only requests the
@@ -2325,7 +2325,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getStartValue();
@@ -2399,7 +2399,7 @@ public:
bool isInLoop() const { return IsInLoop; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return isOrdered() || isInLoop();
@@ -2468,13 +2468,13 @@ public:
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Recursing through Blend recipes only, must terminate at header phi's the
// latest.
return all_of(users(),
- [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
+ [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
}
};
@@ -2562,7 +2562,7 @@ public:
VPCostContext &Ctx) const override;
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
+ bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
/// Returns the number of stored operands of this interleave group. Returns 0
/// for load interleave groups.
@@ -2608,7 +2608,7 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
@@ -2656,7 +2656,7 @@ public:
#endif
/// The recipe only uses the first lane of the address, and EVL operand.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
@@ -2862,7 +2862,7 @@ public:
VPValue *getEVL() const { return getOperand(2); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getEVL();
@@ -2924,7 +2924,7 @@ public:
bool isPredicated() const { return IsPredicated; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return isSingleScalar();
@@ -3212,9 +3212,8 @@ protected:
Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive),
Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- assert(isa<VPVectorEndPointerRecipe>(getAddr()) ||
- !Reverse &&
- "Reversed acccess without VPVectorEndPointerRecipe address?");
+ assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) &&
+ "Reversed acccess without VPVectorEndPointerRecipe address?");
}
public:
@@ -3300,7 +3299,7 @@ struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened, consecutive loads operations only demand the first lane of
@@ -3341,7 +3340,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened loads only demand the first lane of EVL and consecutive loads
@@ -3382,7 +3381,7 @@ struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened, consecutive stores only demand the first lane of their address,
@@ -3425,7 +3424,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
if (Op == getEVL()) {
@@ -3509,14 +3508,14 @@ public:
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -3591,7 +3590,7 @@ public:
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -3701,7 +3700,7 @@ public:
VPValue *getStepValue() const { return getOperand(2); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -3766,7 +3765,7 @@ public:
VPValue *getStepValue() const { return getOperand(1); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f792d0a..80cd112 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1276,7 +1276,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
}
}
-bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
+bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
return vputils::onlyFirstLaneUsed(this);
@@ -1325,7 +1325,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
llvm_unreachable("switch should return");
}
-bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const {
+bool VPInstruction::usesFirstPartOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
if (Instruction::isBinaryOp(getOpcode()))
return vputils::onlyFirstPartUsed(this);
@@ -1692,7 +1692,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
if (!VFTy->getParamType(I.index())->isVectorTy())
Arg = State.get(I.value(), VPLane(0));
else
- Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
+ Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));
Args.push_back(Arg);
}
@@ -1761,7 +1761,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
State.TTI))
Arg = State.get(I.value(), VPLane(0));
else
- Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
+ Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
State.TTI))
TysForDecl.push_back(Arg->getType());
@@ -1843,7 +1843,7 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
return Intrinsic::getBaseName(VectorIntrinsicID);
}
-bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
+bool VPWidenIntrinsicRecipe::usesFirstLaneOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
return all_of(enumerate(operands()), [this, &Op](const auto &X) {
auto [Idx, V] = X;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 82bf79e..48bd697 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -204,7 +204,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
return cast<VPRecipeBase>(U)->getParent() != SinkTo;
});
if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {
- return !U->onlyFirstLaneUsed(SinkCandidate);
+ return !U->usesFirstLaneOnly(SinkCandidate);
}))
continue;
bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index d6a0028..d4b8b72b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -582,7 +582,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
/// Users that only demand the first lane can use the definition for lane
/// 0.
DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
- return U.onlyFirstLaneUsed(DefR);
+ return U.usesFirstLaneOnly(DefR);
});
// Update each build vector user that currently has DefR as its only
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index c6380d3..e22c5df 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -18,12 +18,12 @@ using namespace llvm::VPlanPatternMatch;
bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
return all_of(Def->users(),
- [Def](const VPUser *U) { return U->onlyFirstLaneUsed(Def); });
+ [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); });
}
bool vputils::onlyFirstPartUsed(const VPValue *Def) {
return all_of(Def->users(),
- [Def](const VPUser *U) { return U->onlyFirstPartUsed(Def); });
+ [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); });
}
bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 83e3fca..5da7463 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -274,12 +274,12 @@ public:
virtual bool usesScalars(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
- return onlyFirstLaneUsed(Op);
+ return usesFirstLaneOnly(Op);
}
/// Returns true if the VPUser only uses the first lane of operand \p Op.
/// Conservatively returns false.
- virtual bool onlyFirstLaneUsed(const VPValue *Op) const {
+ virtual bool usesFirstLaneOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return false;
@@ -287,7 +287,7 @@ public:
/// Returns true if the VPUser only uses the first part of operand \p Op.
/// Conservatively returns false.
- virtual bool onlyFirstPartUsed(const VPValue *Op) const {
+ virtual bool usesFirstPartOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return false;
diff --git a/llvm/test/CodeGen/AArch64/popcount_vmask.ll b/llvm/test/CodeGen/AArch64/popcount_vmask.ll
new file mode 100644
index 0000000..e784ead
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/popcount_vmask.ll
@@ -0,0 +1,315 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @vmask_popcount_i32_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: saddlv s0, v0.8h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i8> %a, %b
+ %t1 = bitcast <8 x i1> %mask to i8
+ %t2 = call i8 @llvm.ctpop(i8 %t1)
+ %t3 = zext i8 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <16 x i8> %a, %b
+ %t1 = bitcast <16 x i1> %mask to i16
+ %t2 = call i16 @llvm.ctpop(i16 %t1)
+ %t3 = zext i16 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: saddlv s0, v0.4h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <4 x i16> %a, %b
+ %t1 = bitcast <4 x i1> %mask to i4
+ %t2 = call i4 @llvm.ctpop(i4 %t1)
+ %t3 = zext i4 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: saddlv s0, v0.8h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i16> %a, %b
+ %t1 = bitcast <8 x i1> %mask to i8
+ %t2 = call i8 @llvm.ctpop(i8 %t1)
+ %t3 = zext i8 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <2 x i32> %a, %b
+ %t1 = bitcast <2 x i1> %mask to i2
+ %t2 = call i2 @llvm.ctpop(i2 %t1)
+ %t3 = zext i2 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <4 x i32> %a, %b
+ %t1 = bitcast <4 x i1> %mask to i4
+ %t2 = call i4 @llvm.ctpop(i4 %t1)
+ %t3 = zext i4 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+ %mask = icmp slt <1 x i64> %a, %b
+ %t1 = bitcast <1 x i1> %mask to i1
+ %t2 = call i1 @llvm.ctpop(i1 %t1)
+ %t3 = zext i1 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <2 x i64> %a, %b
+ %t1 = bitcast <2 x i1> %mask to i2
+ %t2 = call i2 @llvm.ctpop(i2 %t1)
+ %t3 = zext i2 %t2 to i32
+ ret i32 %t3
+}
+
+define i64 @vmask_popcount_i64_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: saddlv s0, v0.8h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i8> %a, %b
+ %t1 = bitcast <8 x i1> %mask to i8
+ %t2 = call i8 @llvm.ctpop(i8 %t1)
+ %t3 = zext i8 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <16 x i8> %a, %b
+ %t1 = bitcast <16 x i1> %mask to i16
+ %t2 = call i16 @llvm.ctpop(i16 %t1)
+ %t3 = zext i16 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: saddlv s0, v0.4h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <4 x i16> %a, %b
+ %t1 = bitcast <4 x i1> %mask to i4
+ %t2 = call i4 @llvm.ctpop(i4 %t1)
+ %t3 = zext i4 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: saddlv s0, v0.8h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i16> %a, %b
+ %t1 = bitcast <8 x i1> %mask to i8
+ %t2 = call i8 @llvm.ctpop(i8 %t1)
+ %t3 = zext i8 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <2 x i32> %a, %b
+ %t1 = bitcast <2 x i1> %mask to i2
+ %t2 = call i2 @llvm.ctpop(i2 %t1)
+ %t3 = zext i2 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <4 x i32> %a, %b
+ %t1 = bitcast <4 x i1> %mask to i4
+ %t2 = call i4 @llvm.ctpop(i4 %t1)
+ %t3 = zext i4 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+ %mask = icmp slt <1 x i64> %a, %b
+ %t1 = bitcast <1 x i1> %mask to i1
+ %t2 = call i1 @llvm.ctpop(i1 %t1)
+ %t3 = zext i1 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <2 x i64> %a, %b
+ %t1 = bitcast <2 x i1> %mask to i2
+ %t2 = call i2 @llvm.ctpop(i2 %t1)
+ %t3 = zext i2 %t2 to i64
+ ret i64 %t3
+}
+
+define i32 @non_vmask_popcount_1(half %a) {
+; CHECK-LABEL: non_vmask_popcount_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: and w8, w8, #0xffff
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: cnt v0.8b, v0.8b
+; CHECK-NEXT: addv b0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+ %t1 = bitcast half %a to i16
+ %t2 = call i16 @llvm.ctpop(i16 %t1)
+ %t3 = zext i16 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @non_vmask_popcount_2(<8 x i16> %a) {
+; CHECK-LABEL: non_vmask_popcount_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: umov w8, v0.b[0]
+; CHECK-NEXT: umov w9, v0.b[1]
+; CHECK-NEXT: umov w10, v0.b[2]
+; CHECK-NEXT: and w8, w8, #0x3
+; CHECK-NEXT: bfi w8, w9, #2, #2
+; CHECK-NEXT: umov w9, v0.b[3]
+; CHECK-NEXT: bfi w8, w10, #4, #2
+; CHECK-NEXT: umov w10, v0.b[4]
+; CHECK-NEXT: bfi w8, w9, #6, #2
+; CHECK-NEXT: umov w9, v0.b[5]
+; CHECK-NEXT: bfi w8, w10, #8, #2
+; CHECK-NEXT: umov w10, v0.b[6]
+; CHECK-NEXT: bfi w8, w9, #10, #2
+; CHECK-NEXT: umov w9, v0.b[7]
+; CHECK-NEXT: bfi w8, w10, #12, #2
+; CHECK-NEXT: orr w8, w8, w9, lsl #14
+; CHECK-NEXT: and w8, w8, #0xffff
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: cnt v0.8b, v0.8b
+; CHECK-NEXT: addv b0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %mask = trunc <8 x i16> %a to <8 x i2>
+ %t1 = bitcast <8 x i2> %mask to i16
+ %t2 = call i16 @llvm.ctpop(i16 %t1)
+ %t3 = zext i16 %t2 to i32
+ ret i32 %t3
+}
diff --git a/llvm/test/CodeGen/AArch64/vector-minmax.ll b/llvm/test/CodeGen/AArch64/vector-minmax.ll
new file mode 100644
index 0000000..6696f94
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vector-minmax.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon,+sve | FileCheck %s --check-prefix=CHECK-SVE
+
+define <2 x i64> @smax_v2i64(<2 x i64> %a, <2 x i64> %b){
+; CHECK-LABEL: smax_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: smax_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @smin_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: smin_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: smin_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @umax_v2i64(<2 x i64> %a, <2 x i64> %b){
+; CHECK-LABEL: umax_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: umax_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @umin_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: umin_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: umin_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: umin z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %0
+}
+
+define <1 x i64> @smax_v1i64(<1 x i64> %a, <1 x i64> %b){
+; CHECK-LABEL: smax_v1i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt d2, d0, d1
+; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: smax_v1i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl1
+; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 def $z1
+; CHECK-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <1 x i64> @llvm.smax.v2i64(<1 x i64> %a, <1 x i64> %b)
+ ret <1 x i64> %0
+}
+
+; This is legal for Neon, so this should use the Neon smax.
+define <4 x i32> @smax_v4i32(<4 x i32> %a, <4 x i32> %b){
+; CHECK-LABEL: smax_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: smax_v4i32:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 20034b6..b6e29cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -863,3 +863,19 @@ entry:
i64 2)
ret <vscale x 1 x double> %2
}
+
+; The two vsetvlis will be coalesced so the add will be made dead and
+; removed. Make sure we shrink the live interval of %x.
+define void @non_li_addi(i64 %x, ptr %p) {
+; CHECK-LABEL: non_li_addi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: ret
+entry:
+ %add = add i64 %x, 1
+ %0 = tail call i64 @llvm.riscv.vsetvli(i64 %add, i64 3, i64 0)
+ %1 = call <vscale x 8 x i8> @llvm.riscv.vle(<vscale x 8 x i8> poison, ptr %p, i64 %0)
+ %2 = tail call i64 @llvm.riscv.vsetvli(i64 1, i64 3, i64 0)
+ %3 = tail call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff(<vscale x 8 x i8> poison, ptr %p, i64 %2)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
index fdd30c9..f9929c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
@@ -104,6 +104,10 @@
ret void
}
+ define void @non_li_addi() {
+ ret void
+ }
+
declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>, ptr nocapture, i64) #4
@@ -664,3 +668,23 @@ body: |
bb.2:
$x10 = COPY %vl
PseudoRET implicit killed $x10
+...
+---
+# The two vsetvlis will be coalesced so the ADDI will be made dead and removed.
+# Make sure we shrink the live interval of %0.
+name: non_li_addi
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: non_li_addi
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: dead [[PseudoVSETIVLI:%[0-9]+]]:gprnox0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gprnox0 = ADDI %0, 1
+ %2:gprnox0 = PseudoVSETVLI %1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+ %3:gprnox0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+ PseudoRET
diff --git a/llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll b/llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll
new file mode 100644
index 0000000..677291a
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll
@@ -0,0 +1,36 @@
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics %s -o %t.spvt 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=notllvm %s -o %t.spvt 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm.some.custom %s -o %t.spvt 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm. %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm.,random.prefix %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-amd-amdhsa %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm. %s -o - -filetype=obj | spirv-val %}
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-amd-amdhsa %s -o - -filetype=obj | spirv-val %}
+
+; The test checks command-line option which allows to represent unknown
+; intrinsics as external function calls in SPIR-V.
+
+; CHECK-ERROR: LLVM ERROR: unable to legalize instruction: %3:iid(s64) = G_READCYCLECOUNTER (in function: foo)
+
+; CHECK: Name %[[READCYCLECOUNTER:[0-9]+]] "spirv.llvm_readcyclecounter"
+; CHECK: Name %[[SOME_CUSTOM_INTRINSIC:[0-9]+]] "spirv.llvm_some_custom_intrinsic"
+; CHECK-DAG: Decorate %[[READCYCLECOUNTER]] LinkageAttributes {{.*}} Import
+; CHECK: Decorate %[[SOME_CUSTOM_INTRINSIC]] LinkageAttributes {{.*}} Import
+; CHECK-DAG: %[[I64:[0-9]+]] = OpTypeInt 64
+; CHECK: %[[FnTy:[0-9]+]] = OpTypeFunction %[[I64]]
+; CHECK: %[[READCYCLECOUNTER]] = OpFunction %[[I64]] {{.*}} %[[FnTy]]
+; CHECK-DAG: %[[SOME_CUSTOM_INTRINSIC]] = OpFunction %[[I64]] {{.*}} %[[FnTy]]
+; CHECK-DAG: OpFunctionCall %[[I64]] %[[READCYCLECOUNTER]]
+; CHECK: OpFunctionCall %[[I64]] %[[SOME_CUSTOM_INTRINSIC]]
+
+define spir_func void @foo() {
+entry:
+; TODO: if and when the SPIR-V learns how to lower readcyclecounter, we will have to pick another unhandled intrinsic
+ %0 = call i64 @llvm.readcyclecounter()
+ %1 = call i64 @llvm.some.custom.intrinsic()
+ ret void
+}
+
+declare i64 @llvm.readcyclecounter()
+declare i64 @llvm.some.custom.intrinsic()
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll
new file mode 100644
index 0000000..f6b6115
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll
@@ -0,0 +1,98 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_blocking_pipes %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV
+; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_blocking_pipes %s -o - -filetype=obj | spirv-val %}
+
+%opencl.pipe_ro_t = type opaque
+%opencl.pipe_wo_t = type opaque
+
+; CHECK-SPIRV: OpCapability BlockingPipesALTERA
+; CHECK-SPIRV: OpExtension "SPV_ALTERA_blocking_pipes"
+; CHECK-SPIRV: %[[PipeRTy:[0-9]+]] = OpTypePipe ReadOnly
+; CHECK-SPIRV: %[[PipeWTy:[0-9]+]] = OpTypePipe WriteOnly
+; CHECK-SPIRV: %[[PipeR1:[0-9]+]] = OpLoad %[[PipeRTy]] %[[#]] Aligned 8
+; CHECK-SPIRV: OpReadPipeBlockingALTERA %[[PipeR1]] %[[#]] %[[#]] %[[#]]
+; CHECK-SPIRV: %[[PipeR2:[0-9]+]] = OpLoad %[[PipeRTy]] %[[#]] Aligned 8
+; CHECK-SPIRV: OpReadPipeBlockingALTERA %[[PipeR2]] %[[#]] %[[#]] %[[#]]
+; CHECK-SPIRV: %[[PipeW1:[0-9]+]] = OpLoad %[[PipeWTy]] %[[#]] Aligned 8
+; CHECK-SPIRV: OpWritePipeBlockingALTERA %[[PipeW1]] %[[#]] %[[#]] %[[#]]
+; CHECK-SPIRV: %[[PipeW2:[0-9]+]] = OpLoad %[[PipeWTy]] %[[#]] Aligned 8
+; CHECK-SPIRV: OpWritePipeBlockingALTERA %[[PipeW2]] %[[#]] %[[#]] %[[#]]
+
+define spir_func void @foo(target("spirv.Pipe", 0) %p, ptr addrspace(1) %ptr) {
+entry:
+ %p.addr = alloca target("spirv.Pipe", 0), align 8
+ %ptr.addr = alloca ptr addrspace(1), align 8
+ store target("spirv.Pipe", 0) %p, target("spirv.Pipe", 0)* %p.addr, align 8
+ store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8
+ %0 = load target("spirv.Pipe", 0), target("spirv.Pipe", 0)* %p.addr, align 8
+ %1 = load ptr addrspace(1), ptr %ptr.addr, align 8
+ %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4)
+ call spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePiii(target("spirv.Pipe", 0) %0, ptr addrspace(4) %2, i32 4, i32 4)
+ ret void
+}
+
+declare dso_local spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePiii(target("spirv.Pipe", 0), ptr addrspace(4), i32, i32)
+
+define spir_func void @bar(target("spirv.Pipe", 0) %p, ptr addrspace(1) %ptr) {
+entry:
+ %p.addr = alloca target("spirv.Pipe", 0), align 8
+ %ptr.addr = alloca ptr addrspace(1), align 8
+ store target("spirv.Pipe", 0) %p, target("spirv.Pipe", 0)* %p.addr, align 8
+ store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8
+ %0 = load target("spirv.Pipe", 0), target("spirv.Pipe", 0)* %p.addr, align 8
+ %1 = load ptr addrspace(1), ptr %ptr.addr, align 8
+ %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4)
+ call spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePvii(target("spirv.Pipe", 0) %0, ptr addrspace(4) %2, i32 4, i32 4)
+ ret void
+}
+
+declare dso_local spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePvii(target("spirv.Pipe", 0), ptr addrspace(4), i32, i32)
+
+define spir_func void @boo(target("spirv.Pipe", 1) %p, ptr addrspace(1) %ptr) {
+entry:
+ %p.addr = alloca target("spirv.Pipe", 1), align 8
+ %ptr.addr = alloca ptr addrspace(1), align 8
+ store target("spirv.Pipe", 1) %p, target("spirv.Pipe", 1)* %p.addr, align 8
+ store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8
+ %0 = load target("spirv.Pipe", 1), target("spirv.Pipe", 1)* %p.addr, align 8
+ %1 = load ptr addrspace(1), ptr %ptr.addr, align 8
+ %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4)
+ call spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePiii(target("spirv.Pipe", 1) %0, ptr addrspace(4) %2, i32 4, i32 4)
+ ret void
+}
+
+declare dso_local spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePiii(target("spirv.Pipe", 1), ptr addrspace(4), i32, i32)
+
+define spir_func void @baz(target("spirv.Pipe", 1) %p, ptr addrspace(1) %ptr) {
+entry:
+ %p.addr = alloca target("spirv.Pipe", 1), align 8
+ %ptr.addr = alloca ptr addrspace(1), align 8
+ store target("spirv.Pipe", 1) %p, target("spirv.Pipe", 1)* %p.addr, align 8
+ store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8
+ %0 = load target("spirv.Pipe", 1), target("spirv.Pipe", 1)* %p.addr, align 8
+ %1 = load ptr addrspace(1), ptr %ptr.addr, align 8
+ %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4)
+ call spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePvii(target("spirv.Pipe", 1) %0, ptr addrspace(4) %2, i32 4, i32 4)
+ ret void
+}
+
+declare dso_local spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePvii(target("spirv.Pipe", 1), ptr addrspace(4), i32, i32)
+
+; CHECK-LLVM: declare spir_func void @__read_pipe_2_bl(ptr addrspace(1), ptr addrspace(4), i32, i32)
+; CHECK-LLVM: declare spir_func void @__write_pipe_2_bl(ptr addrspace(1), ptr addrspace(4), i32, i32)
+
+define linkonce_odr dso_local spir_func void @WritePipeBLockingi9Pointer(ptr addrspace(4) align 2 dereferenceable(2) %_Data) {
+entry:
+ %_Data.addr = alloca ptr addrspace(4), align 8
+ %_WPipe = alloca target("spirv.Pipe", 1), align 8
+ %_Data.addr.ascast = addrspacecast ptr %_Data.addr to ptr addrspace(4)
+ %_WPipe.ascast = addrspacecast target("spirv.Pipe", 1)* %_WPipe to target("spirv.Pipe", 1) addrspace(4)*
+ store ptr addrspace(4) %_Data, ptr addrspace(4) %_Data.addr.ascast, align 8
+ %0 = bitcast target("spirv.Pipe", 1)* %_WPipe to ptr
+ %1 = load target("spirv.Pipe", 1), target("spirv.Pipe", 1) addrspace(4)* %_WPipe.ascast, align 8
+ %2 = load ptr addrspace(4), ptr addrspace(4) %_Data.addr.ascast, align 8
+ call spir_func void @_Z30__spirv_WritePipeBlockingINTELIDU9_Ev8ocl_pipePKT_ii(target("spirv.Pipe", 1) %1, ptr addrspace(4) %2, i32 2, i32 2)
+ ret void
+}
+
+declare dso_local spir_func void @_Z30__spirv_WritePipeBlockingINTELIDU9_Ev8ocl_pipePKT_ii(target("spirv.Pipe", 1), ptr addrspace(4), i32, i32)
+ \ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/pr166744.ll b/llvm/test/CodeGen/X86/pr166744.ll
new file mode 100644
index 0000000..21b25d8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr166744.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=POSTRA
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=haswell | FileCheck %s --check-prefixes=NOPOSTRA
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=NOPOSTRA
+
+; Ensure reloads are after narrowed i512 -> i32 store
+define i1 @PR166744(ptr %v, i64 %idx, i1 zeroext %b) {
+; POSTRA-LABEL: PR166744:
+; POSTRA: # %bb.0:
+; POSTRA-NEXT: movl $1029, %eax # imm = 0x405
+; POSTRA-NEXT: shlxl %esi, %edx, %edx
+; POSTRA-NEXT: bextrl %eax, %esi, %eax
+; POSTRA-NEXT: movl (%rdi,%rax,4), %ecx
+; POSTRA-NEXT: btrl %esi, %ecx
+; POSTRA-NEXT: orl %ecx, %edx
+; POSTRA-NEXT: movl %edx, (%rdi,%rax,4)
+; POSTRA-NEXT: movq 16(%rdi), %rax
+; POSTRA-NEXT: movq (%rdi), %rcx
+; POSTRA-NEXT: movq 24(%rdi), %rdx
+; POSTRA-NEXT: movq 8(%rdi), %rsi
+; POSTRA-NEXT: orq 56(%rdi), %rdx
+; POSTRA-NEXT: orq 40(%rdi), %rsi
+; POSTRA-NEXT: orq 48(%rdi), %rax
+; POSTRA-NEXT: orq 32(%rdi), %rcx
+; POSTRA-NEXT: orq %rdx, %rsi
+; POSTRA-NEXT: orq %rax, %rcx
+; POSTRA-NEXT: orq %rsi, %rcx
+; POSTRA-NEXT: setne %al
+; POSTRA-NEXT: retq
+;
+; NOPOSTRA-LABEL: PR166744:
+; NOPOSTRA: # %bb.0:
+; NOPOSTRA-NEXT: movl %esi, %eax
+; NOPOSTRA-NEXT: shrl $3, %eax
+; NOPOSTRA-NEXT: andl $60, %eax
+; NOPOSTRA-NEXT: movl (%rdi,%rax), %ecx
+; NOPOSTRA-NEXT: btrl %esi, %ecx
+; NOPOSTRA-NEXT: shlxl %esi, %edx, %edx
+; NOPOSTRA-NEXT: orl %ecx, %edx
+; NOPOSTRA-NEXT: movl %edx, (%rdi,%rax)
+; NOPOSTRA-NEXT: movq 16(%rdi), %rax
+; NOPOSTRA-NEXT: movq (%rdi), %rcx
+; NOPOSTRA-NEXT: movq 8(%rdi), %rdx
+; NOPOSTRA-NEXT: movq 24(%rdi), %rsi
+; NOPOSTRA-NEXT: orq 56(%rdi), %rsi
+; NOPOSTRA-NEXT: orq 40(%rdi), %rdx
+; NOPOSTRA-NEXT: orq 48(%rdi), %rax
+; NOPOSTRA-NEXT: orq 32(%rdi), %rcx
+; NOPOSTRA-NEXT: orq %rsi, %rdx
+; NOPOSTRA-NEXT: orq %rax, %rcx
+; NOPOSTRA-NEXT: orq %rdx, %rcx
+; NOPOSTRA-NEXT: setne %al
+; NOPOSTRA-NEXT: retq
+ %rem = and i64 %idx, 511
+ %sh_prom = zext nneg i64 %rem to i512
+ %shl = shl nuw i512 1, %sh_prom
+ %not = xor i512 %shl, -1
+ %load = load i512, ptr %v, align 8
+ %and = and i512 %load, %not
+ %conv2 = zext i1 %b to i512
+ %shl4 = shl nuw i512 %conv2, %sh_prom
+ %or = or i512 %and, %shl4
+ store i512 %or, ptr %v, align 8
+ %cmp = icmp ne i512 %or, 0
+ ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll b/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll
new file mode 100644
index 0000000..921bcf0
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll
@@ -0,0 +1,11 @@
+; RUN: opt -passes=vector-combine %s -S -o - | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() {
+;CHECK-LABEL: @interleave2_same_const_splat_nxv4i16(
+;CHECK: call <vscale x 4 x i16> @llvm.vector.interleave2
+;CHECK: ret <vscale x 4 x i16> %retval
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 4 x i16> %retval
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll
new file mode 100644
index 0000000..2926371
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM
+; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR
+
+define i64 @test1(i64 %i) nounwind readnone {
+ %loc = alloca i64
+ %j = load i64, ptr %loc
+ %r = add i64 %i, %j
+ ret i64 %r
+}
+
+define i64 @test2(i32 %i) nounwind readnone {
+ %loc = alloca i32
+ %j = load i32, ptr %loc
+ %r = add i32 %i, %j
+ %ext = zext i32 %r to i64
+ ret i64 %ext
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected
new file mode 100644
index 0000000..88cb03e
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM
+; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR
+
+define i64 @test1(i64 %i) nounwind readnone {
+; ASM-LABEL: test1:
+; ASM: # %bb.0:
+; ASM-NEXT: movq %rdi, %rax
+; ASM-NEXT: addq -{{[0-9]+}}(%rsp), %rax
+; ASM-NEXT: retq
+; MIR-LABEL: name: test1
+; MIR: bb.0 (%ir-block.0):
+; MIR-NEXT: liveins: $rdi
+; MIR-NEXT: {{ $}}
+; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
+; MIR-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s64) from %ir.loc)
+; MIR-NEXT: $rax = COPY [[ADD64rm]]
+; MIR-NEXT: RET 0, $rax
+ %loc = alloca i64
+ %j = load i64, ptr %loc
+ %r = add i64 %i, %j
+ ret i64 %r
+}
+
+define i64 @test2(i32 %i) nounwind readnone {
+; ASM-LABEL: test2:
+; ASM: # %bb.0:
+; ASM-NEXT: movl %edi, %eax
+; ASM-NEXT: addl -{{[0-9]+}}(%rsp), %eax
+; ASM-NEXT: retq
+; MIR-LABEL: name: test2
+; MIR: bb.0 (%ir-block.0):
+; MIR-NEXT: liveins: $edi
+; MIR-NEXT: {{ $}}
+; MIR-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi
+; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s32) from %ir.loc)
+; MIR-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[ADD32rm]], %subreg.sub_32bit
+; MIR-NEXT: $rax = COPY [[SUBREG_TO_REG]]
+; MIR-NEXT: RET 0, $rax
+ %loc = alloca i32
+ %j = load i32, ptr %loc
+ %r = add i32 %i, %j
+ %ext = zext i32 %r to i64
+ ret i64 %ext
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll
new file mode 100644
index 0000000..7167bcf
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK
+
+define i32 @add(i32 %a, i32 %b) {
+ %sum = add i32 %a, %b
+ ret i32 %sum
+}
+
+define i32 @sub(i32 %a, i32 %b) {
+ %diff = sub i32 %a, %b
+ ret i32 %diff
+}
+
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected
new file mode 100644
index 0000000..1ba920d
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK
+
+define i32 @add(i32 %a, i32 %b) {
+ %sum = add i32 %a, %b
+ ret i32 %sum
+}
+
+define i32 @sub(i32 %a, i32 %b) {
+ %diff = sub i32 %a, %b
+ ret i32 %diff
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test
new file mode 100644
index 0000000..6fc57b5
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test
@@ -0,0 +1,9 @@
+# REQUIRES: x86-registered-target
+## Test checking that update_llc_test_checks.py can generate both ASM and MIR checks in the same file
+
+# RUN: cp -f %S/Inputs/x86_asm_mir_mixed.ll %t.ll && %update_llc_test_checks %t.ll
+# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll
+
+## Verify that running the script again on an already updated file doesn't add duplicate checks
+# RUN: %update_llc_test_checks %t.ll
+# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test
new file mode 100644
index 0000000..0f8aaa54
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test
@@ -0,0 +1,8 @@
+# REQUIRES: x86-registered-target
+## Test that using the same prefix for both ASM and MIR outputs generates a warning
+## and doesn't produce any checks.
+
+# RUN: cp -f %S/Inputs/x86_asm_mir_same_prefix.ll %t.ll && %update_llc_test_checks %t.ll 2>&1 | FileCheck %s --check-prefix=WARNING
+# RUN: diff -u %S/Inputs/x86_asm_mir_same_prefix.ll.expected %t.ll
+
+# WARNING: WARNING: The following prefixes are used for both ASM and MIR output, which will cause FileCheck failures: CHECK
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index 2dad16a..baa0377 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -605,6 +605,7 @@ TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)")
MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
+STOP_PASS_RE = re.compile(r"-stop-(before|after)=(\w+)")
IS_DEBUG_RECORD_RE = re.compile(r"^(\s+)#dbg_")
IS_SWITCH_CASE_RE = re.compile(r"^\s+i\d+ \d+, label %\S+")
diff --git a/llvm/utils/UpdateTestChecks/mir.py b/llvm/utils/UpdateTestChecks/mir.py
index 24bb8b3..01ee0e1 100644
--- a/llvm/utils/UpdateTestChecks/mir.py
+++ b/llvm/utils/UpdateTestChecks/mir.py
@@ -163,13 +163,15 @@ def add_mir_checks_for_function(
print_fixed_stack,
first_check_is_next,
at_the_function_name,
+ check_indent=None,
):
printed_prefixes = set()
for run in run_list:
for prefix in run[0]:
if prefix in printed_prefixes:
break
- if not func_dict[prefix][func_name]:
+ # func_info can be empty if there was a prefix conflict.
+ if not func_dict[prefix].get(func_name):
continue
if printed_prefixes:
# Add some space between different check prefixes.
@@ -185,6 +187,7 @@ def add_mir_checks_for_function(
func_dict[prefix][func_name],
print_fixed_stack,
first_check_is_next,
+ check_indent,
)
break
else:
@@ -204,6 +207,7 @@ def add_mir_check_lines(
func_info,
print_fixed_stack,
first_check_is_next,
+ check_indent=None,
):
func_body = str(func_info).splitlines()
if single_bb:
@@ -220,7 +224,10 @@ def add_mir_check_lines(
first_line = func_body[0]
indent = len(first_line) - len(first_line.lstrip(" "))
# A check comment, indented the appropriate amount
- check = "{:>{}}; {}".format("", indent, prefix)
+ if check_indent is not None:
+ check = "{}; {}".format(check_indent, prefix)
+ else:
+ check = "{:>{}}; {}".format("", indent, prefix)
output_lines.append("{}-LABEL: name: {}".format(check, func_name))
diff --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py
index 8c57e75..98864be 100755
--- a/llvm/utils/update_llc_test_checks.py
+++ b/llvm/utils/update_llc_test_checks.py
@@ -15,7 +15,7 @@ import argparse
import os # Used to advertise this file's name ("autogenerated_note").
import sys
-from UpdateTestChecks import common
+from UpdateTestChecks import common, mir
# llc is the only llc-like in the LLVM tree but downstream forks can add
# additional ones here if they have them.
@@ -33,6 +33,7 @@ def update_test(ti: common.TestInfo):
break
run_list = []
+ mir_run_list = []
for l in ti.run_lines:
if "|" not in l:
common.warn("Skipping unparsable RUN line: " + l)
@@ -57,9 +58,14 @@ def update_test(ti: common.TestInfo):
if m:
march_in_cmd = m.groups()[0]
+ target_list = run_list
m = common.DEBUG_ONLY_ARG_RE.search(llc_cmd)
if m and m.groups()[0] == "isel":
from UpdateTestChecks import isel as output_type
+ elif not m and common.STOP_PASS_RE.search(llc_cmd):
+ # MIR output mode. If -debug-only is present assume
+ # the debug output is the main point of interest.
+ target_list = mir_run_list
else:
from UpdateTestChecks import asm as output_type
@@ -84,7 +90,7 @@ def update_test(ti: common.TestInfo):
# FIXME: We should use multiple check prefixes to common check lines. For
# now, we just ignore all but the last.
- run_list.append(
+ target_list.append(
(
check_prefixes,
llc_tool,
@@ -119,14 +125,20 @@ def update_test(ti: common.TestInfo):
ginfo=ginfo,
)
- for (
- prefixes,
- llc_tool,
- llc_args,
- preprocess_cmd,
- triple_in_cmd,
- march_in_cmd,
- ) in run_list:
+ # Dictionary to store MIR function bodies separately
+ mir_func_dict = {}
+ for run_tuple, is_mir in [(run, False) for run in run_list] + [
+ (run, True) for run in mir_run_list
+ ]:
+ (
+ prefixes,
+ llc_tool,
+ llc_args,
+ preprocess_cmd,
+ triple_in_cmd,
+ march_in_cmd,
+ ) = run_tuple
+
common.debug("Extracted LLC cmd:", llc_tool, llc_args)
common.debug("Extracted FileCheck prefixes:", str(prefixes))
@@ -141,22 +153,54 @@ def update_test(ti: common.TestInfo):
if not triple:
triple = common.get_triple_from_march(march_in_cmd)
- scrubber, function_re = output_type.get_run_handler(triple)
- if 0 == builder.process_run_line(
- function_re, scrubber, raw_tool_output, prefixes
- ):
- common.warn(
- "Couldn't match any function. Possibly the wrong target triple has been provided"
+ if is_mir:
+ # MIR output mode
+ common.debug("Detected MIR output mode for prefixes:", str(prefixes))
+ for prefix in prefixes:
+ if prefix not in mir_func_dict:
+ mir_func_dict[prefix] = {}
+
+ mir.build_function_info_dictionary(
+ ti.path,
+ raw_tool_output,
+ triple,
+ prefixes,
+ mir_func_dict,
+ ti.args.verbose,
)
- builder.processed_prefixes(prefixes)
+ else:
+ # ASM output mode
+ scrubber, function_re = output_type.get_run_handler(triple)
+ if 0 == builder.process_run_line(
+ function_re, scrubber, raw_tool_output, prefixes
+ ):
+ common.warn(
+ "Couldn't match any function. Possibly the wrong target triple has been provided"
+ )
+ builder.processed_prefixes(prefixes)
func_dict = builder.finish_and_get_func_dict()
+
+ # Check for conflicts: same prefix used for both ASM and MIR
+ conflicting_prefixes = set(func_dict.keys()) & set(mir_func_dict.keys())
+ if conflicting_prefixes:
+ common.warn(
+ "The following prefixes are used for both ASM and MIR output, which will cause FileCheck failures: {}".format(
+ ", ".join(sorted(conflicting_prefixes))
+ ),
+ test_file=ti.path,
+ )
+ for prefix in conflicting_prefixes:
+ mir_func_dict[prefix] = {}
+ func_dict[prefix] = {}
+
global_vars_seen_dict = {}
is_in_function = False
is_in_function_start = False
func_name = None
prefix_set = set([prefix for p in run_list for prefix in p[0]])
+ prefix_set.update([prefix for p in mir_run_list for prefix in p[0]])
common.debug("Rewriting FileCheck prefixes:", str(prefix_set))
output_lines = []
@@ -221,6 +265,22 @@ def update_test(ti: common.TestInfo):
is_filtered=builder.is_filtered(),
)
)
+
+ # Also add MIR checks if we have them for this function
+ if mir_run_list and func_name:
+ mir.add_mir_checks_for_function(
+ ti.path,
+ output_lines,
+ mir_run_list,
+ mir_func_dict,
+ func_name,
+ single_bb=False, # Don't skip basic block labels.
+ print_fixed_stack=False, # Don't print fixed stack (ASM tests don't need it).
+ first_check_is_next=False, # First check is LABEL, not NEXT.
+ at_the_function_name=False, # Use "name:" not "@name".
+ check_indent="", # No indentation for IR files (not MIR files).
+ )
+
is_in_function_start = False
if is_in_function:
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 8728e66..70d424b 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -21,13 +21,6 @@ include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/IR/OpBase.td"
include "mlir/IR/RegionKindInterface.td"
-// This is roughly similar to OpFoldResult assuming the handle produces a single
-// value in the payload IR.
-def TransformAnyParamTypeOrAnyHandle : Type<
- Or<[TransformHandleTypeInterface.predicate,
- TransformParamTypeInterface.predicate]>,
- "transform any param type or any handle type">;
-
//===----------------------------------------------------------------------===//
// Apply...PatternsOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 48978eb..de07f50 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -33,22 +33,14 @@ namespace linalg {
//===----------------------------------------------------------------------===//
// Utilities for inferring various semantics properties of Linalg ops.
//===----------------------------------------------------------------------===//
-/// Shell function to compute the Destination Permutation of PackOp
-/// This function uses the helper function `computePackUnPackPerm` to get
-/// the permutation vector. Only major difference between UnPack and Pack is
-/// that packOp uses destination rank whereas unpack Uses source rank.
-SmallVector<int64_t> getPackInverseDestPerm(linalg::PackOp packOp);
-
-/// Shell function to compute the Source Permutation of unPackOp.
-/// This function, like the getPackInverseDestPerm uses the helper function
-/// computePackUnPackPerm` to get the permutation vector.
-/// Only major difference between UnPack and Pack is that packOp uses
-/// destination rank whereas unpack Uses source rank.
-SmallVector<int64_t> getUnPackInverseSrcPerm(linalg::UnPackOp unpackOp);
-
-/// Shell function to compute the Source rank permutation for unpackOp
-/// Unpack requires some packing metadata data information, so created
-/// another function where this value is passed by reference.
+
+/// Compute inverse permutation for the destination tensor (i.e. in the packed
+/// domain).
+SmallVector<int64_t> getPackInverseDestPerm(linalg::PackOp packOp,
+ PackingMetadata &metadata);
+
+/// Compute inverse permutation for the source tensor (i.e. in the packed
+/// domain).
SmallVector<int64_t> getUnPackInverseSrcPerm(linalg::UnPackOp,
PackingMetadata &metadata);
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
index 0e42d08..b628f1a 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
@@ -395,7 +395,7 @@ def SPV_INTEL_fpga_buffer_location : I32EnumAttrCase<"SPV_INTEL_fp
def SPV_INTEL_arbitrary_precision_fixed_point : I32EnumAttrCase<"SPV_INTEL_arbitrary_precision_fixed_point", 4019>;
def SPV_INTEL_usm_storage_classes : I32EnumAttrCase<"SPV_INTEL_usm_storage_classes", 4020>;
def SPV_INTEL_io_pipes : I32EnumAttrCase<"SPV_INTEL_io_pipes", 4021>;
-def SPV_INTEL_blocking_pipes : I32EnumAttrCase<"SPV_INTEL_blocking_pipes", 4022>;
+def SPV_ALTERA_blocking_pipes : I32EnumAttrCase<"SPV_ALTERA_blocking_pipes", 4022>;
def SPV_INTEL_fpga_reg : I32EnumAttrCase<"SPV_INTEL_fpga_reg", 4023>;
def SPV_INTEL_long_constant_composite : I32EnumAttrCase<"SPV_INTEL_long_constant_composite", 4024>;
def SPV_INTEL_optnone : I32EnumAttrCase<"SPV_INTEL_optnone", 4025>;
@@ -465,7 +465,7 @@ def SPIRV_ExtensionAttr :
SPV_INTEL_kernel_attributes, SPV_INTEL_fpga_memory_accesses,
SPV_INTEL_fpga_cluster_attributes, SPV_INTEL_loop_fuse,
SPV_INTEL_fpga_buffer_location, SPV_INTEL_arbitrary_precision_fixed_point,
- SPV_INTEL_usm_storage_classes, SPV_INTEL_io_pipes, SPV_INTEL_blocking_pipes,
+ SPV_INTEL_usm_storage_classes, SPV_INTEL_io_pipes, SPV_ALTERA_blocking_pipes,
SPV_INTEL_fpga_reg, SPV_INTEL_long_constant_composite, SPV_INTEL_optnone,
SPV_INTEL_debug_module, SPV_INTEL_fp_fast_math_mode,
SPV_INTEL_memory_access_aliasing, SPV_INTEL_split_barrier,
@@ -807,9 +807,9 @@ def SPIRV_C_IOPipesINTEL : I32EnumAttrCase<"IOPip
Extension<[SPV_INTEL_io_pipes]>
];
}
-def SPIRV_C_BlockingPipesINTEL : I32EnumAttrCase<"BlockingPipesINTEL", 5945> {
+def SPIRV_C_BlockingPipesALTERA : I32EnumAttrCase<"BlockingPipesALTERA", 5945> {
list<Availability> availability = [
- Extension<[SPV_INTEL_blocking_pipes]>
+ Extension<[SPV_ALTERA_blocking_pipes]>
];
}
def SPIRV_C_FPGARegINTEL : I32EnumAttrCase<"FPGARegINTEL", 5948> {
@@ -1519,7 +1519,7 @@ def SPIRV_CapabilityAttr :
SPIRV_C_FPGAMemoryAccessesINTEL, SPIRV_C_FPGAClusterAttributesINTEL,
SPIRV_C_LoopFuseINTEL, SPIRV_C_MemoryAccessAliasingINTEL,
SPIRV_C_FPGABufferLocationINTEL, SPIRV_C_ArbitraryPrecisionFixedPointINTEL,
- SPIRV_C_USMStorageClassesINTEL, SPIRV_C_IOPipesINTEL, SPIRV_C_BlockingPipesINTEL,
+ SPIRV_C_USMStorageClassesINTEL, SPIRV_C_IOPipesINTEL, SPIRV_C_BlockingPipesALTERA,
SPIRV_C_FPGARegINTEL, SPIRV_C_DotProductInputAll,
SPIRV_C_DotProductInput4x8BitPacked, SPIRV_C_DotProduct, SPIRV_C_RayCullMaskKHR,
SPIRV_C_CooperativeMatrixKHR, SPIRV_C_ReplicatedCompositesEXT,
diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformTypes.td b/mlir/include/mlir/Dialect/Transform/IR/TransformTypes.td
index 2d9a26e..3e3fff4 100644
--- a/mlir/include/mlir/Dialect/Transform/IR/TransformTypes.td
+++ b/mlir/include/mlir/Dialect/Transform/IR/TransformTypes.td
@@ -103,4 +103,9 @@ def TransformAnyHandle : Type<
TransformValueHandleTypeInterface.predicate]>,
"transform operation or value handle">;
+def TransformAnyParamTypeOrAnyHandle : Type<
+ Or<[TransformHandleTypeInterface.predicate,
+ TransformParamTypeInterface.predicate]>,
+ "transform any param type or any handle type">;
+
#endif // MLIR_DIALECT_TRANSFORM_IR_TRANSFORMTYPES
diff --git a/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt
index 9f57627..cb1e9d0 100644
--- a/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt
@@ -1,2 +1,3 @@
add_subdirectory(IR)
add_subdirectory(Transforms)
+add_subdirectory(TransformOps)
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/TransformOps/CMakeLists.txt
new file mode 100644
index 0000000..59246064
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_TARGET_DEFINITIONS XeGPUTransformOps.td)
+mlir_tablegen(XeGPUTransformOps.h.inc -gen-op-decls)
+mlir_tablegen(XeGPUTransformOps.cpp.inc -gen-op-defs)
+add_public_tablegen_target(MLIRXeGPUTransformOpsIncGen)
+
+add_mlir_doc(XeGPUTransformOps XeGPUTransformOps Dialects/ -gen-op-doc)
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h
new file mode 100644
index 0000000..3e16d1e
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h
@@ -0,0 +1,28 @@
+//===- XeGPUTransformOps.h - XeGPU transformation ops -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_TRANSFORMOPS_XEGPUTRANSFORMOPS_H
+#define MLIR_DIALECT_XEGPU_TRANSFORMOPS_XEGPUTRANSFORMOPS_H
+
+#include "mlir/Dialect/Transform/IR/TransformDialect.h"
+#include "mlir/Dialect/Transform/IR/TransformTypes.h"
+#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h.inc"
+
+namespace mlir {
+class DialectRegistry;
+
+namespace xegpu {
+void registerTransformDialectExtension(DialectRegistry &registry);
+} // namespace xegpu
+} // namespace mlir
+
+#endif // MLIR_DIALECT_XEGPU_TRANSFORMOPS_XEGPUTRANSFORMOPS_H
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
new file mode 100644
index 0000000..b985d54
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -0,0 +1,81 @@
+//===- XeGPUTransformOps.td - XeGPU transformation ops -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XEGPU_TRANSFORM_OPS
+#define XEGPU_TRANSFORM_OPS
+
+include "mlir/Dialect/Transform/IR/TransformAttrs.td"
+include "mlir/Dialect/Transform/IR/TransformDialect.td"
+include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td"
+include "mlir/Dialect/Transform/IR/TransformTypes.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/OpBase.td"
+
+def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
+ AttrSizedOperandSegments,
+ DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
+ TransformOpInterface
+]> {
+
+ let summary = "Set xegpu.layout attribute to a xegpu.create_nd_desc op result.";
+ let description = [{
+ Given an `xegpu.create_nd_desc` operation, this transform adds `xegpu.layout`
+ attribute to the result tensor descriptor. The layout is defined by the
+ `sg_layout`, and `sg_data` and optional `inst_data` attributes. Returns a handle
+ to the transformed op.
+ }];
+
+ let arguments = (ins
+ TransformHandleTypeInterface : $target,
+ Variadic<TransformAnyParamTypeOrAnyHandle> : $sg_layout,
+ Variadic<TransformAnyParamTypeOrAnyHandle> : $sg_data,
+ Variadic<TransformAnyParamTypeOrAnyHandle> : $inst_data,
+ DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_layout,
+ DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data,
+ DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data
+ );
+
+ let results = (outs TransformHandleTypeInterface : $transformed);
+ let builders = [
+ OpBuilder<(ins "Value":$target,
+ "ArrayRef<OpFoldResult>":$mixedSgLayout,
+ "ArrayRef<OpFoldResult>":$mixedSgData,
+ "ArrayRef<OpFoldResult>":$mixedInstData
+ )>,
+ ];
+
+ let assemblyFormat = [{
+ $target
+ `sg_layout` `=` custom<DynamicIndexList>($sg_layout, $static_sg_layout)
+ `sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data)
+ (`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)?
+ attr-dict `:` functional-type(operands, results)
+ }];
+
+ let extraClassDeclaration = [{
+ ::mlir::DiagnosedSilenceableFailure apply(
+ ::mlir::transform::TransformRewriter &rewriter,
+ ::mlir::transform::TransformResults &transformResults,
+ ::mlir::transform::TransformState &state);
+
+ ::llvm::SmallVector<::mlir::OpFoldResult> getMixedSgLayout() {
+ Builder b(getContext());
+ return getMixedValues(getStaticSgLayout(), getSgLayout(), b);
+ }
+ ::llvm::SmallVector<::mlir::OpFoldResult> getMixedSgData() {
+ Builder b(getContext());
+ return getMixedValues(getStaticSgData(), getSgData(), b);
+ }
+ ::llvm::SmallVector<::mlir::OpFoldResult> getMixedInstData() {
+ Builder b(getContext());
+ return getMixedValues(getStaticInstData(), getInstData(), b);
+ }
+ }];
+}
+
+#endif // XEGPU_TRANSFORM_OPS
diff --git a/mlir/include/mlir/Support/Timing.h b/mlir/include/mlir/Support/Timing.h
index 3d61a0a..50ae847 100644
--- a/mlir/include/mlir/Support/Timing.h
+++ b/mlir/include/mlir/Support/Timing.h
@@ -473,6 +473,11 @@ void registerDefaultTimingManagerCLOptions();
/// 'registerDefaultTimingManagerOptions' to a `DefaultTimingManager`.
void applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm);
+/// Create an output strategy for the specified format, to be passed to
+/// DefaultTimingManager::setOutput().
+std::unique_ptr<OutputStrategy>
+createOutputStrategy(DefaultTimingManager::OutputFormat fmt, raw_ostream &os);
+
} // namespace mlir
#endif // MLIR_SUPPORT_TIMING_H
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index bd25e94..027268c 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -232,10 +232,9 @@ FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
// 2. Compute the permutation vector to shuffle packed shape into the shape
// before any outer or inner permutations have been applied.
- PackingMetadata packingMetadata = computePackingMetadata(
- packedTensorType.getRank(), packOp.getInnerDimsPos());
+ PackingMetadata packingMetadata;
SmallVector<int64_t> packedToStripMinedShapePerm =
- getPackInverseDestPerm(packOp);
+ getPackInverseDestPerm(packOp, packingMetadata);
// 3. Compute the stripMinedShape: this is the packed shape before any outer
// or inner permutations have been applied.
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index cb6199f..19d2d85 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1564,13 +1564,6 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state,
return success();
}
-/// Given a linalg::PackOp, return the `dest` shape before any packing
-/// permutations.
-static SmallVector<int64_t> getTiledPackShape(linalg::PackOp packOp,
- ArrayRef<int64_t> destShape) {
- return applyPermutation(destShape, linalg::getPackInverseDestPerm(packOp));
-}
-
/// Determines whether a mask for xfer_write is trivially "all true"
///
/// Given all the inputs required to generate a mask (mask sizes and shapes),
@@ -1761,99 +1754,6 @@ createWriteOrMaskedWrite(OpBuilder &builder, Location loc, Value vecToStore,
return mlir::vector::maskOperation(builder, write, maskForWrite);
}
-/// Vectorize linalg::PackOp with (1) static inner_tiles (2) constant
-/// padding value and (3) input vector sizes into:
-///
-/// masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds
-///
-/// As in the following example:
-/// %pack = tensor.pack %src inner_dims_pos = [2, 1] inner_tiles = [16, 2]
-/// into %dst : tensor<32x8x16xf32> -> tensor<32x4x1x16x2xf32>
-///
-/// This pack would be vectorized to:
-///
-/// %load = vector.mask %mask {
-/// vector.transfer_read %arg0[%c0, %c0, %c0], %cst
-/// {in_bounds = [true, true, true]} :
-/// tensor<32x7x16xf32>, vector<32x8x16xf32>
-/// } : vector<32x8x16xi1> -> vector<32x8x16xf32>
-/// %shape_cast = vector.shape_cast %load : vector<32x8x16xf32>
-/// to vector<32x4x2x1x16xf32>
-/// %transpose = vector.transpose %shape_cast, [0, 1, 3, 4, 2]
-/// : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
-/// %write = vector.transfer_write %transpose,
-/// %empty[%c0_0, %c0_0, %c0_0, %c0_0, %c0_0]
-/// {in_bounds = [true, true, true, true, true]}
-/// : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
-///
-/// If the (3) input vector sizes are not provided, the vector sizes are
-/// determined by the result tensor shape and the `in_bounds`
-/// attribute is used instead of masking to mark out-of-bounds accesses.
-///
-/// NOTE: The input vector sizes specify the dimensions corresponding to the
-/// outer dimensions of the output tensor. The remaining dimensions are
-/// computed based on, e.g., the static inner tiles.
-/// Supporting dynamic inner tiles will require the user to specify the
-/// missing vector sizes. This is left as a TODO.
-static LogicalResult
-vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp,
- ArrayRef<int64_t> inputVectorSizes,
- SmallVectorImpl<Value> &newResults) {
- // TODO: Introduce a parent class that will handle the insertion point update.
- OpBuilder::InsertionGuard g(rewriter);
- rewriter.setInsertionPoint(packOp);
-
- Location loc = packOp.getLoc();
- std::optional<Value> padValue = packOp.getPaddingValue()
- ? std::optional(packOp.getPaddingValue())
- : std::nullopt;
-
- // If the input vector sizes are not provided, then the vector sizes are
- // determined by the result tensor shape. In case the vector sizes aren't
- // provided, we update the inBounds attribute instead of masking.
- bool useInBoundsInsteadOfMasking = false;
- if (inputVectorSizes.empty()) {
- ArrayRef<int64_t> resultTensorShape = packOp.getDestType().getShape();
- inputVectorSizes = resultTensorShape.take_front(packOp.getSourceRank());
- useInBoundsInsteadOfMasking = true;
- }
-
- // Create masked TransferReadOp.
- SmallVector<int64_t> inputShape(inputVectorSizes);
- auto innerTiles = packOp.getStaticInnerTiles();
- auto innerDimsPos = packOp.getInnerDimsPos();
- auto outerDimsPerm = packOp.getOuterDimsPerm();
- if (!outerDimsPerm.empty())
- applyPermutationToVector(inputShape,
- invertPermutationVector(outerDimsPerm));
- for (auto [idx, size] : enumerate(innerTiles))
- inputShape[innerDimsPos[idx]] *= size;
- auto maskedRead = vector::createReadOrMaskedRead(
- rewriter, loc, packOp.getSource(), inputShape, padValue,
- useInBoundsInsteadOfMasking,
- /*inputScalableVecSizes=*/{});
-
- // Create ShapeCastOp.
- SmallVector<int64_t> destShape(inputVectorSizes);
- destShape.append(innerTiles.begin(), innerTiles.end());
- auto tiledPackType = VectorType::get(getTiledPackShape(packOp, destShape),
- packOp.getDestType().getElementType());
- auto shapeCastOp =
- vector::ShapeCastOp::create(rewriter, loc, tiledPackType, maskedRead);
-
- // Create TransposeOp.
- auto destPermutation =
- invertPermutationVector(getPackInverseDestPerm(packOp));
- auto transposeOp = vector::TransposeOp::create(
- rewriter, loc, shapeCastOp.getResult(), destPermutation);
-
- // Create TransferWriteOp.
- Operation *write = createWriteOrMaskedWrite(
- rewriter, loc, transposeOp.getResult(), packOp.getDest());
- newResults.push_back(write->getResult(0));
- return success();
-}
-
/// Given the re-associations, "collapses" the input Vector type
///
/// This is similar to CollapseShapeOp::inferCollapsedType with two notable
@@ -1901,12 +1801,121 @@ static VectorType getCollapsedVecType(VectorType type,
return VectorType::get(newShape, type.getElementType(), newScalableFlags);
}
+/// Vectorize `linalg.pack` as:
+/// * xfer_read -> shape_cast -> transpose -> xfer_write
+///
+/// The input-vector-sizes specify the _write_ vector sizes (i.e. the vector
+/// sizes for the xfer_write operation). This is sufficient to infer the other
+/// vector sizes required here.
+///
+/// If the vector sizes are not provided:
+/// * the vector sizes are determined from the destination tensor static shape.
+/// * the inBounds attribute is used instead of masking.
+///
+/// EXAMPLE (no vector sizes):
+/// ```
+/// %pack = tensor.pack %src
+/// inner_dims_pos = [2, 1]
+/// inner_tiles = [16, 2]
+/// into %dst : tensor<32x8x16xf32> -> tensor<32x4x1x16x2xf32>
+/// ``
+/// is vectorizes as:
+/// ```
+/// %read = vector.transfer_read %src
+/// : tensor<32x7x16xf32>, vector<32x8x16xf32>
+/// %sc = vector.shape_cast %read
+/// : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
+/// %tr = vector.transpose %sc, [0, 1, 3, 4, 2]
+/// : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
+/// %write = vector.transfer_write %tr into %dest
+/// : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
+/// ```
+static LogicalResult
+vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp,
+ ArrayRef<int64_t> inputVectorSizes,
+ SmallVectorImpl<Value> &newResults) {
+ if (!inputVectorSizes.empty()) {
+ assert(inputVectorSizes.size() == packOp.getDestRank() &&
+ "Invalid number of input vector sizes!");
+ }
+
+ // TODO: Introduce a parent class that will handle the insertion point update.
+ OpBuilder::InsertionGuard g(rewriter);
+ rewriter.setInsertionPoint(packOp);
+
+ Location loc = packOp.getLoc();
+ std::optional<Value> padValue = packOp.getPaddingValue()
+ ? std::optional(packOp.getPaddingValue())
+ : std::nullopt;
+
+ SmallVector<int64_t> destShape =
+ SmallVector<int64_t>(packOp.getDestType().getShape());
+
+ // This is just a convenience alias to clearly communicate that the input
+ // vector sizes determine the _write_ sizes.
+ ArrayRef<int64_t> &writeVectorSizes = inputVectorSizes;
+
+ // In the absence of input-vector-sizes, use the _static_ input tensor shape.
+ // In addition, use the inBounds attribute instead of masking.
+ bool useInBoundsInsteadOfMasking = false;
+ if (writeVectorSizes.empty()) {
+ if (ShapedType::isDynamicShape(destShape))
+ return rewriter.notifyMatchFailure(packOp,
+ "unable to infer vector sizes");
+
+ writeVectorSizes = destShape;
+ useInBoundsInsteadOfMasking = true;
+ }
+
+ // Compute pre-transpose-write-vector-type, i.e. the write vector type
+ // _before_ the transposition (i.e. before dimension permutation). This is
+ // done by inverting the permutation/transposition that's part of the Pack
+ // operation. This type is required to:
+ // 1) compute the read vector type for masked-read below, and
+ // 2) generate shape-cast Op below that expands the read vector type.
+ PackingMetadata packMetadata;
+ SmallVector<int64_t> preTransposeWriteVecSizses(writeVectorSizes);
+ auto destInvPermutation = getPackInverseDestPerm(packOp, packMetadata);
+ applyPermutationToVector(preTransposeWriteVecSizses, destInvPermutation);
+ auto preTransposeWriteVecType = VectorType::get(
+ preTransposeWriteVecSizses, packOp.getType().getElementType());
+
+ // Compute vector type for the _read_ opeartion. This is simply
+ // pre-transpose-write-vector-type with the dimensions collapsed
+ // as per the Pack operation.
+ VectorType readVecType = getCollapsedVecType(
+ preTransposeWriteVecType,
+ getSymbolLessAffineMaps(convertReassociationIndicesToExprs(
+ rewriter.getContext(), packMetadata.reassociations)));
+
+ // Create masked TransferReadOp.
+ auto maskedRead = vector::createReadOrMaskedRead(
+ rewriter, loc, packOp.getSource(), readVecType.getShape(), padValue,
+ useInBoundsInsteadOfMasking,
+ /*inputScalableVecSizes=*/{});
+
+ // Create ShapeCastOp.
+ auto shapeCastOp = vector::ShapeCastOp::create(
+ rewriter, loc, preTransposeWriteVecType, maskedRead);
+
+ // Create TransposeOp.
+ auto destPermutation = invertPermutationVector(destInvPermutation);
+ auto transposeOp = vector::TransposeOp::create(
+ rewriter, loc, shapeCastOp.getResult(), destPermutation);
+
+ // Create TransferWriteOp.
+ Operation *write = createWriteOrMaskedWrite(
+ rewriter, loc, transposeOp.getResult(), packOp.getDest());
+ newResults.push_back(write->getResult(0));
+ return success();
+}
+
/// Vectorize `linalg.unpack` as:
/// * xfer_read -> vector.transpose -> vector.shape_cast -> xfer_write
///
-/// The input-vector-sizes specify the read vector sizes (i.e. the vector sizes
-/// for the xfer_read operation). This is sufficient to infer the other vector
-/// sizes required here.
+/// The input-vector-sizes specify the _read_ vector sizes (i.e. the vector
+/// sizes for the xfer_read operation). This is sufficient to infer the other
+/// vector sizes required here.
///
/// If the vector sizes are not provided:
/// * the vector sizes are determined from the input tensor static shape.
@@ -1960,7 +1969,8 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
// In the absence of input-vector-sizes, use the _static_ input tensor shape.
if (inputVectorSizes.empty()) {
if (ShapedType::isDynamicShape(sourceShape))
- return failure();
+ return rewriter.notifyMatchFailure(unpackOp,
+ "Unable to infer vector sizes!");
readVectorSizes.assign(sourceShape.begin(), sourceShape.end());
useInBoundsInsteadOfMasking = true;
@@ -2443,6 +2453,7 @@ vectorizePackOpPrecondition(linalg::PackOp packOp,
ArrayRef<int64_t> inputVectorSizes) {
auto padValue = packOp.getPaddingValue();
Attribute cstAttr;
+ // TODO: Relax this condiiton
if (padValue && !matchPattern(padValue, m_Constant(&cstAttr))) {
LDBG() << "pad value is not constant: " << packOp;
return failure();
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 24d3722..6eeb206 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -171,29 +171,24 @@ computePackUnPackPerm(int64_t rank, ArrayRef<int64_t> &innerDimsPos,
namespace mlir {
namespace linalg {
-SmallVector<int64_t> getPackInverseDestPerm(PackOp packOp) {
+SmallVector<int64_t> getPackInverseDestPerm(PackOp packOp,
+ PackingMetadata &metadata) {
- PackingMetadata pMetadata;
int64_t packedRank = packOp.getDestType().getRank();
ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos();
ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm();
SmallVector<int64_t> packInvDestPerm =
- computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata);
+ computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata);
return packInvDestPerm;
}
-SmallVector<int64_t> getUnPackInverseSrcPerm(UnPackOp unpackOp) {
- PackingMetadata metadata;
- return getUnPackInverseSrcPerm(unpackOp, metadata);
-}
-
SmallVector<int64_t> getUnPackInverseSrcPerm(UnPackOp unpackOp,
PackingMetadata &metadata) {
- int64_t unpackRank = unpackOp.getSourceType().getRank();
+ int64_t packedRank = unpackOp.getSourceType().getRank();
ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm();
SmallVector<int64_t> unpackInvSrcPerm =
- computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata);
+ computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata);
return unpackInvSrcPerm;
}
diff --git a/mlir/lib/Dialect/XeGPU/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/CMakeLists.txt
index 31167e6..46b8251 100644
--- a/mlir/lib/Dialect/XeGPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/XeGPU/CMakeLists.txt
@@ -1,3 +1,4 @@
add_subdirectory(IR)
add_subdirectory(Transforms)
add_subdirectory(Utils)
+add_subdirectory(TransformOps)
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/TransformOps/CMakeLists.txt
new file mode 100644
index 0000000..48fe841
--- /dev/null
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_mlir_dialect_library(MLIRXeGPUTransformOps
+ XeGPUTransformOps.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${PROJECT_SOURCE_DIR}/mlir/Dialect/XeGPU/TransformOps/
+
+ DEPENDS
+ MLIRXeGPUTransformOpsIncGen
+
+ LINK_LIBS PUBLIC
+ MLIRXeGPUDialect
+ MLIRXeGPUTransforms
+ MLIRIR
+ MLIRTransformDialect
+ MLIRFuncDialect
+ MLIRSCFDialect
+)
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
new file mode 100644
index 0000000..8943ba0
--- /dev/null
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -0,0 +1,225 @@
+//===- XeGPUTransformOps.cpp - Implementation of XeGPU transformation ops -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
+#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
+
+#include <optional>
+
+using namespace mlir;
+using namespace mlir::transform;
+
+/// Assuming that `ofr` is an index attr or a param of index type
+/// or a transform dialect handle mapped to exactly one op
+/// with one index result, get that value and cast it to int type.
+static DiagnosedSilenceableFailure convertMixedValuesToInt(
+ transform::TransformState &state, TransformOpInterface transformOp,
+ SmallVectorImpl<int32_t> &result, ArrayRef<OpFoldResult> ofrs) {
+ for (OpFoldResult ofr : ofrs) {
+ // Attribute case.
+ if (auto attr = dyn_cast<Attribute>(ofr)) {
+ if (auto intAttr = dyn_cast<IntegerAttr>(attr)) {
+ result.push_back(intAttr.getInt());
+ continue;
+ }
+ return transformOp.emitDefiniteFailure() << "expected IntegerAttr";
+ }
+
+ // Transform param case.
+ Value transformValue = cast<Value>(ofr);
+ if (isa<TransformParamTypeInterface>(transformValue.getType())) {
+ ArrayRef<Attribute> params = state.getParams(transformValue);
+ if (params.size() != 1)
+ return transformOp.emitDefiniteFailure()
+ << "requires exactly one parameter associated";
+ result.push_back(
+ cast<IntegerAttr>(params.front()).getValue().getSExtValue());
+ continue;
+ }
+
+ // Payload value case.
+ auto payloadOps = state.getPayloadOps(transformValue);
+ if (!llvm::hasSingleElement(payloadOps)) {
+ DiagnosedSilenceableFailure diag =
+ transformOp.emitSilenceableError()
+ << "handle must be mapped to exactly one payload op";
+ diag.attachNote(transformValue.getLoc())
+ << "mapped to " << llvm::range_size(payloadOps) << " payload ops";
+ return diag;
+ }
+
+ Operation *op = *payloadOps.begin();
+ if (op->getNumResults() != 1 || !op->getResult(0).getType().isIndex()) {
+ DiagnosedSilenceableFailure diag =
+ transformOp.emitSilenceableError()
+ << "payload op must have exactly 1 index result";
+ diag.attachNote(op->getLoc())
+ << "has " << op->getNumResults() << " results";
+ return diag;
+ }
+
+ IntegerAttr intAttr;
+ if (!matchPattern(op->getResult(0), m_Constant(&intAttr)))
+ return transformOp.emitSilenceableError()
+ << "requires param or handle to be the result of a constant like "
+ "op";
+
+ result.push_back(intAttr.getInt());
+ }
+ return DiagnosedSilenceableFailure::success();
+}
+
+/// Create a layout attribute from the given parameters.
+static xegpu::LayoutAttr
+createLayoutAttr(MLIRContext *ctx, ArrayRef<int32_t> sgLayout,
+ ArrayRef<int32_t> sgData,
+ std::optional<ArrayRef<int32_t>> instData) {
+ return xegpu::LayoutAttr::get(
+ ctx, DenseI32ArrayAttr::get(ctx, sgLayout),
+ DenseI32ArrayAttr::get(ctx, sgData),
+ instData ? DenseI32ArrayAttr::get(ctx, instData.value()) : nullptr,
+ /*lane_layout=*/nullptr,
+ /*lane_data=*/nullptr,
+ /*order=*/nullptr);
+}
+
+/// Replace xegpu.create_nd_desc op with a new one with the given layout.
+static xegpu::CreateNdDescOp
+setDescLayout(transform::TransformRewriter &rewriter,
+ xegpu::CreateNdDescOp descOp, xegpu::LayoutAttr layout) {
+ assert(descOp.getMixedOffsets().size() == 0 &&
+ "create desc op with offsets is not supported");
+ auto oldTensorDesc = descOp.getType();
+ auto descType = xegpu::TensorDescType::get(
+ oldTensorDesc.getShape(), oldTensorDesc.getElementType(),
+ /*array_length=*/oldTensorDesc.getArrayLength(),
+ /*boundary_check=*/oldTensorDesc.getBoundaryCheck(),
+ /*memory_space=*/oldTensorDesc.getMemorySpace(),
+ /*layout=*/layout);
+
+ rewriter.setInsertionPointAfter(descOp);
+ auto newDescOp = rewriter.replaceOpWithNewOp<xegpu::CreateNdDescOp>(
+ descOp, descType, descOp.getSource(), descOp.getMixedSizes(),
+ descOp.getMixedStrides());
+ return newDescOp;
+}
+
+void transform::SetDescLayoutOp::build(OpBuilder &builder,
+ OperationState &result, Value target,
+ ArrayRef<OpFoldResult> mixedSgLayout,
+ ArrayRef<OpFoldResult> mixedSgData,
+ ArrayRef<OpFoldResult> mixedInstData) {
+ SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData;
+ SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData;
+ dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout);
+ dispatchIndexOpFoldResults(mixedSgData, dynamicSgData, staticSgData);
+ dispatchIndexOpFoldResults(mixedInstData, dynamicInstData, staticInstData);
+ build(builder, result, target.getType(),
+ /*target=*/target,
+ /*sg_layout=*/dynamicSgLayout,
+ /*sg_data=*/dynamicSgData,
+ /*inst_data=*/dynamicInstData,
+ /*static_sg_layout=*/staticSgLayout,
+ /*static_sg_data=*/staticSgData,
+ /*static_inst_data=*/staticInstData);
+}
+
+DiagnosedSilenceableFailure
+transform::SetDescLayoutOp::apply(transform::TransformRewriter &rewriter,
+ transform::TransformResults &results,
+ transform::TransformState &state) {
+ auto targetOps = state.getPayloadOps(getTarget());
+ if (!llvm::hasSingleElement(targetOps)) {
+ return emitDefiniteFailure() << "requires exactly one targetOp handle (got "
+ << llvm::range_size(targetOps) << ")";
+ }
+ Operation *target = *targetOps.begin();
+
+ SmallVector<int32_t> sgLayout;
+ DiagnosedSilenceableFailure status =
+ convertMixedValuesToInt(state, (*this), sgLayout, getMixedSgLayout());
+ if (!status.succeeded())
+ return status;
+
+ SmallVector<int32_t> sgData;
+ status = convertMixedValuesToInt(state, (*this), sgData, getMixedSgData());
+ if (!status.succeeded())
+ return status;
+
+ SmallVector<int32_t> instData;
+ status =
+ convertMixedValuesToInt(state, (*this), instData, getMixedInstData());
+ if (!status.succeeded())
+ return status;
+ auto maybeInstData = instData.empty()
+ ? std::nullopt
+ : std::optional<ArrayRef<int32_t>>(instData);
+
+ // For now only create_nd_desc op is supported.
+ auto descOp = dyn_cast<xegpu::CreateNdDescOp>(target);
+ if (!descOp) {
+ auto diag = emitSilenceableFailure(getLoc())
+ << "Expected a xegpu.create_nd_desc op, but got: "
+ << target->getName();
+ diag.attachNote(target->getLoc()) << "target op";
+ return diag;
+ }
+
+ // Set layout attr in desc op's return type. Replaces old desc op.
+ auto layoutAttr =
+ createLayoutAttr(rewriter.getContext(), sgLayout, sgData, maybeInstData);
+ auto newdescOp = setDescLayout(rewriter, descOp, layoutAttr);
+
+ // Map result handles.
+ results.set(cast<OpResult>(getTransformed()), {newdescOp.getOperation()});
+
+ return DiagnosedSilenceableFailure::success();
+}
+
+void transform::SetDescLayoutOp::getEffects(
+ ::llvm::SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
+ consumesHandle(getTargetMutable(), effects);
+ onlyReadsHandle(getSgLayoutMutable(), effects);
+ onlyReadsHandle(getSgDataMutable(), effects);
+ onlyReadsHandle(getInstDataMutable(), effects);
+ producesHandle(getOperation()->getOpResults(), effects);
+ modifiesPayload(effects);
+}
+
+namespace {
+class XeGPUTransformDialectExtension
+ : public transform::TransformDialectExtension<
+ XeGPUTransformDialectExtension> {
+public:
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(XeGPUTransformDialectExtension)
+
+ using Base::Base;
+
+ void init();
+};
+
+void XeGPUTransformDialectExtension::init() {
+ declareGeneratedDialect<scf::SCFDialect>();
+ declareGeneratedDialect<arith::ArithDialect>();
+ declareGeneratedDialect<xegpu::XeGPUDialect>();
+
+ registerTransformOps<
+#define GET_OP_LIST
+#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp.inc"
+ >();
+}
+} // namespace
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp.inc"
+
+void mlir::xegpu::registerTransformDialectExtension(DialectRegistry &registry) {
+ registry.addExtensions<XeGPUTransformDialectExtension>();
+}
diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
index d2bafb7..a5bfde1 100644
--- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
+++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
@@ -16,6 +16,7 @@
#include "mlir/Interfaces/ViewLikeInterface.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
#define DEBUG_TYPE "value-bounds-op-interface"
@@ -195,7 +196,7 @@ void ValueBoundsConstraintSet::addBound(BoundType type, int64_t pos,
// Even without this bound, there may be enough information in the
// constraint system to compute the requested bound. In case this bound is
// actually needed, `computeBound` will return `failure`.
- LLVM_DEBUG(llvm::dbgs() << "Failed to add bound: " << expr << "\n");
+ LDBG() << "Failed to add bound: " << expr << "\n";
}
}
@@ -271,11 +272,9 @@ int64_t ValueBoundsConstraintSet::insert(Value value,
assert(!valueDimToPosition.contains(valueDim) && "already mapped");
int64_t pos = isSymbol ? cstr.appendVar(VarKind::Symbol)
: cstr.appendVar(VarKind::SetDim);
- LLVM_DEBUG(llvm::dbgs() << "Inserting constraint set column " << pos
- << " for: " << value
- << " (dim: " << dim.value_or(kIndexValue)
- << ", owner: " << getOwnerOfValue(value)->getName()
- << ")\n");
+ LDBG() << "Inserting constraint set column " << pos << " for: " << value
+ << " (dim: " << dim.value_or(kIndexValue)
+ << ", owner: " << getOwnerOfValue(value)->getName() << ")";
positionToValueDim.insert(positionToValueDim.begin() + pos, valueDim);
// Update reverse mapping.
for (int64_t i = pos, e = positionToValueDim.size(); i < e; ++i)
@@ -283,8 +282,8 @@ int64_t ValueBoundsConstraintSet::insert(Value value,
valueDimToPosition[*positionToValueDim[i]] = i;
if (addToWorklist) {
- LLVM_DEBUG(llvm::dbgs() << "Push to worklist: " << value
- << " (dim: " << dim.value_or(kIndexValue) << ")\n");
+ LDBG() << "Push to worklist: " << value
+ << " (dim: " << dim.value_or(kIndexValue) << ")";
worklist.push(pos);
}
@@ -294,8 +293,7 @@ int64_t ValueBoundsConstraintSet::insert(Value value,
int64_t ValueBoundsConstraintSet::insert(bool isSymbol) {
int64_t pos = isSymbol ? cstr.appendVar(VarKind::Symbol)
: cstr.appendVar(VarKind::SetDim);
- LLVM_DEBUG(llvm::dbgs() << "Inserting anonymous constraint set column " << pos
- << "\n");
+ LDBG() << "Inserting anonymous constraint set column " << pos;
positionToValueDim.insert(positionToValueDim.begin() + pos, std::nullopt);
// Update reverse mapping.
for (int64_t i = pos, e = positionToValueDim.size(); i < e; ++i)
@@ -339,10 +337,9 @@ int64_t ValueBoundsConstraintSet::getPos(Value value,
cast<BlockArgument>(value).getOwner()->isEntryBlock()) &&
"unstructured control flow is not supported");
#endif // NDEBUG
- LLVM_DEBUG(llvm::dbgs() << "Getting pos for: " << value
- << " (dim: " << dim.value_or(kIndexValue)
- << ", owner: " << getOwnerOfValue(value)->getName()
- << ")\n");
+ LDBG() << "Getting pos for: " << value
+ << " (dim: " << dim.value_or(kIndexValue)
+ << ", owner: " << getOwnerOfValue(value)->getName() << ")";
auto it =
valueDimToPosition.find(std::make_pair(value, dim.value_or(kIndexValue)));
assert(it != valueDimToPosition.end() && "expected mapped entry");
@@ -364,7 +361,7 @@ bool ValueBoundsConstraintSet::isMapped(Value value,
}
void ValueBoundsConstraintSet::processWorklist() {
- LLVM_DEBUG(llvm::dbgs() << "Processing value bounds worklist...\n");
+ LDBG() << "Processing value bounds worklist...";
while (!worklist.empty()) {
int64_t pos = worklist.front();
worklist.pop();
@@ -386,8 +383,8 @@ void ValueBoundsConstraintSet::processWorklist() {
// Do not process any further if the stop condition is met.
auto maybeDim = dim == kIndexValue ? std::nullopt : std::make_optional(dim);
if (stopCondition(value, maybeDim, *this)) {
- LLVM_DEBUG(llvm::dbgs() << "Stop condition met for: " << value
- << " (dim: " << maybeDim << ")\n");
+ LDBG() << "Stop condition met for: " << value << " (dim: " << maybeDim
+ << ")";
continue;
}
@@ -395,9 +392,8 @@ void ValueBoundsConstraintSet::processWorklist() {
// the worklist.
auto valueBoundsOp =
dyn_cast<ValueBoundsOpInterface>(getOwnerOfValue(value));
- LLVM_DEBUG(llvm::dbgs()
- << "Query value bounds for: " << value
- << " (owner: " << getOwnerOfValue(value)->getName() << ")\n");
+ LDBG() << "Query value bounds for: " << value
+ << " (owner: " << getOwnerOfValue(value)->getName() << ")";
if (valueBoundsOp) {
if (dim == kIndexValue) {
valueBoundsOp.populateBoundsForIndexValue(value, *this);
@@ -406,7 +402,7 @@ void ValueBoundsConstraintSet::processWorklist() {
}
continue;
}
- LLVM_DEBUG(llvm::dbgs() << "--> ValueBoundsOpInterface not implemented\n");
+ LDBG() << "--> ValueBoundsOpInterface not implemented";
// If the op does not implement `ValueBoundsOpInterface`, check if it
// implements the `DestinationStyleOpInterface`. OpResults of such ops are
@@ -705,9 +701,7 @@ bool ValueBoundsConstraintSet::comparePos(int64_t lhsPos,
// We cannot prove anything if the constraint set is already empty.
if (cstr.isEmpty()) {
- LLVM_DEBUG(
- llvm::dbgs()
- << "cannot compare value/dims: constraint system is already empty");
+ LDBG() << "cannot compare value/dims: constraint system is already empty";
return false;
}
diff --git a/mlir/lib/RegisterAllExtensions.cpp b/mlir/lib/RegisterAllExtensions.cpp
index 3839172..c857c38 100644
--- a/mlir/lib/RegisterAllExtensions.cpp
+++ b/mlir/lib/RegisterAllExtensions.cpp
@@ -56,6 +56,7 @@
#include "mlir/Dialect/Transform/SMTExtension/SMTExtension.h"
#include "mlir/Dialect/Transform/TuneExtension/TuneExtension.h"
#include "mlir/Dialect/Vector/TransformOps/VectorTransformOps.h"
+#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h"
#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
@@ -112,6 +113,7 @@ void mlir::registerAllExtensions(DialectRegistry &registry) {
transform::registerSMTExtension(registry);
transform::registerTuneExtension(registry);
vector::registerTransformDialectExtension(registry);
+ xegpu::registerTransformDialectExtension(registry);
arm_neon::registerTransformDialectExtension(registry);
arm_sve::registerTransformDialectExtension(registry);
diff --git a/mlir/lib/Support/Timing.cpp b/mlir/lib/Support/Timing.cpp
index 2e92d9c..b0ac379 100644
--- a/mlir/lib/Support/Timing.cpp
+++ b/mlir/lib/Support/Timing.cpp
@@ -619,11 +619,17 @@ void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) {
return;
tm.setEnabled(options->timing);
tm.setDisplayMode(options->displayMode);
+ tm.setOutput(createOutputStrategy(options->outputFormat, llvm::errs()));
+}
- std::unique_ptr<OutputStrategy> printer;
- if (options->outputFormat == OutputFormat::Text)
- printer = std::make_unique<OutputTextStrategy>(llvm::errs());
- else if (options->outputFormat == OutputFormat::Json)
- printer = std::make_unique<OutputJsonStrategy>(llvm::errs());
- tm.setOutput(std::move(printer));
+std::unique_ptr<OutputStrategy>
+mlir::createOutputStrategy(DefaultTimingManager::OutputFormat fmt,
+ raw_ostream &os) {
+ switch (fmt) {
+ case OutputFormat::Text:
+ return std::make_unique<OutputTextStrategy>(os);
+ case OutputFormat::Json:
+ return std::make_unique<OutputJsonStrategy>(os);
+ }
+ llvm_unreachable("Invalid output format");
}
diff --git a/mlir/lib/Transforms/RemoveDeadValues.cpp b/mlir/lib/Transforms/RemoveDeadValues.cpp
index 979b396..41f3f9d 100644
--- a/mlir/lib/Transforms/RemoveDeadValues.cpp
+++ b/mlir/lib/Transforms/RemoveDeadValues.cpp
@@ -742,25 +742,7 @@ static void processBranchOp(BranchOpInterface branchOp, RunLivenessAnalysis &la,
static void cleanUpDeadVals(RDVFinalCleanupList &list) {
LDBG() << "Starting cleanup of dead values...";
- // 1. Blocks
- LDBG() << "Cleaning up " << list.blocks.size() << " block argument lists";
- for (auto &b : list.blocks) {
- // blocks that are accessed via multiple codepaths processed once
- if (b.b->getNumArguments() != b.nonLiveArgs.size())
- continue;
- LDBG() << "Erasing " << b.nonLiveArgs.count()
- << " non-live arguments from block: " << b.b;
- // it iterates backwards because erase invalidates all successor indexes
- for (int i = b.nonLiveArgs.size() - 1; i >= 0; --i) {
- if (!b.nonLiveArgs[i])
- continue;
- LDBG() << " Erasing block argument " << i << ": " << b.b->getArgument(i);
- b.b->getArgument(i).dropAllUses();
- b.b->eraseArgument(i);
- }
- }
-
- // 2. Operations
+ // 1. Operations
LDBG() << "Cleaning up " << list.operations.size() << " operations";
for (auto &op : list.operations) {
LDBG() << "Erasing operation: "
@@ -769,14 +751,14 @@ static void cleanUpDeadVals(RDVFinalCleanupList &list) {
op->erase();
}
- // 3. Values
+ // 2. Values
LDBG() << "Cleaning up " << list.values.size() << " values";
for (auto &v : list.values) {
LDBG() << "Dropping all uses of value: " << v;
v.dropAllUses();
}
- // 4. Functions
+ // 3. Functions
LDBG() << "Cleaning up " << list.functions.size() << " functions";
// Record which function arguments were erased so we can shrink call-site
// argument segments for CallOpInterface operations (e.g. ops using
@@ -798,7 +780,7 @@ static void cleanUpDeadVals(RDVFinalCleanupList &list) {
(void)f.funcOp.eraseResults(f.nonLiveRets);
}
- // 5. Operands
+ // 4. Operands
LDBG() << "Cleaning up " << list.operands.size() << " operand lists";
for (OperationToCleanup &o : list.operands) {
// Handle call-specific cleanup only when we have a cached callee reference.
@@ -840,7 +822,7 @@ static void cleanUpDeadVals(RDVFinalCleanupList &list) {
}
}
- // 6. Results
+ // 5. Results
LDBG() << "Cleaning up " << list.results.size() << " result lists";
for (auto &r : list.results) {
LDBG() << "Erasing " << r.nonLive.count()
@@ -849,6 +831,24 @@ static void cleanUpDeadVals(RDVFinalCleanupList &list) {
dropUsesAndEraseResults(r.op, r.nonLive);
}
+ // 6. Blocks
+ LDBG() << "Cleaning up " << list.blocks.size() << " block argument lists";
+ for (auto &b : list.blocks) {
+ // blocks that are accessed via multiple codepaths processed once
+ if (b.b->getNumArguments() != b.nonLiveArgs.size())
+ continue;
+ LDBG() << "Erasing " << b.nonLiveArgs.count()
+ << " non-live arguments from block: " << b.b;
+ // it iterates backwards because erase invalidates all successor indexes
+ for (int i = b.nonLiveArgs.size() - 1; i >= 0; --i) {
+ if (!b.nonLiveArgs[i])
+ continue;
+ LDBG() << " Erasing block argument " << i << ": " << b.b->getArgument(i);
+ b.b->getArgument(i).dropAllUses();
+ b.b->eraseArgument(i);
+ }
+ }
+
// 7. Successor Operands
LDBG() << "Cleaning up " << list.successorOperands.size()
<< " successor operand lists";
diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt
index 20ed3ab..51c7576 100644
--- a/mlir/python/CMakeLists.txt
+++ b/mlir/python/CMakeLists.txt
@@ -322,6 +322,15 @@ declare_mlir_dialect_extension_python_bindings(
"../../include/mlir/Dialect/Vector/Transforms/VectorTransformsBase.td"
)
+declare_mlir_dialect_extension_python_bindings(
+ ADD_TO_PARENT MLIRPythonSources.Dialects
+ ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
+ TD_FILE dialects/XeGPUTransformOps.td
+ SOURCES
+ dialects/transform/xegpu.py
+ DIALECT_NAME transform
+ EXTENSION_NAME xegpu_transform)
+
declare_mlir_dialect_python_bindings(
ADD_TO_PARENT MLIRPythonSources.Dialects
ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
diff --git a/mlir/python/mlir/dialects/XeGPUTransformOps.td b/mlir/python/mlir/dialects/XeGPUTransformOps.td
new file mode 100644
index 0000000..5a5e7b9
--- /dev/null
+++ b/mlir/python/mlir/dialects/XeGPUTransformOps.td
@@ -0,0 +1,19 @@
+//===---- XeGPUTransformOps.td -----------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Entry point of the Python bindings generator for the XeGPU transform ops.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef PYTHON_BINDINGS_XEGPU_TRANSFORM_OPS
+#define PYTHON_BINDINGS_XEGPU_TRANSFORM_OPS
+
+include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td"
+
+#endif // PYTHON_BINDINGS_XEGPU_TRANSFORM_OPS
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
new file mode 100644
index 0000000..2918bf5
--- /dev/null
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -0,0 +1,66 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from .._xegpu_transform_ops_gen import *
+from .._xegpu_transform_ops_gen import _Dialect
+
+try:
+ from ...ir import *
+ from .._ods_common import _cext as _ods_cext
+ from .._ods_common import (
+ MixedValues,
+ get_op_result_or_value as _get_op_result_or_value,
+ _dispatch_dynamic_index_list,
+ )
+
+except ImportError as e:
+ raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Union, Optional
+
+
+@_ods_cext.register_operation(_Dialect, replace=True)
+class SetDescLayoutOp(SetDescLayoutOp):
+ """Specialization for SetDescLayoutOp class."""
+
+ def __init__(
+ self,
+ target: Union[Operation, Value],
+ sg_layout: MixedValues,
+ sg_data: MixedValues,
+ *,
+ inst_data: Optional[MixedValues] = None,
+ loc=None,
+ ip=None,
+ ):
+ target_handle = _get_op_result_or_value(target)
+ inst_data = [] if inst_data is None else inst_data
+ (
+ dynamic_sg_layout,
+ static_sg_layout,
+ _,
+ ) = _dispatch_dynamic_index_list(sg_layout)
+ (
+ dynamic_sg_data,
+ static_sg_data,
+ _,
+ ) = _dispatch_dynamic_index_list(sg_data)
+ (
+ dynamic_inst_data,
+ static_inst_data,
+ _,
+ ) = _dispatch_dynamic_index_list(inst_data)
+
+ super().__init__(
+ target_handle.type,
+ target_handle,
+ dynamic_sg_layout,
+ dynamic_sg_data,
+ dynamic_inst_data,
+ static_sg_layout=static_sg_layout,
+ static_sg_data=static_sg_data,
+ static_inst_data=static_inst_data,
+ loc=loc,
+ ip=ip,
+ )
diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
index aa2c1da..9a14ab7 100644
--- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
@@ -285,6 +285,8 @@ module attributes {transform.with_named_sequence} {
///----------------------------------------------------------------------------------------
/// Tests for linalg.pack
+///
+/// TODO: Add similar tests for linalg.unpack
///----------------------------------------------------------------------------------------
// Note, see a similar test in:
diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir
index 1304a90..170bae6 100644
--- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir
@@ -1335,7 +1335,7 @@ func.func @pack_no_padding(%src: tensor<32x8x16xf32>, %dest: tensor<4x1x32x16x2x
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%src: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.pack"]} in %src : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [4, 1, 32] : !transform.any_op
+ transform.structured.vectorize %0 vector_sizes [4, 1, 32, 16, 2] : !transform.any_op
transform.yield
}
}
@@ -1378,7 +1378,7 @@ func.func @pack_with_padding(%src: tensor<32x7x15xf32>, %dest: tensor<32x4x1x16x
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [32, 4, 1] : !transform.any_op
+ transform.structured.vectorize %0 vector_sizes [32, 4, 1, 16, 2] : !transform.any_op
transform.yield
}
}
@@ -1424,8 +1424,13 @@ module attributes {transform.with_named_sequence} {
// CHECK-LABEL: func @pack_with_dynamic_dims
// CHECK-SAME: %[[SRC:.*]]: tensor<?x?xf32>,
// CHECK-SAME: %[[DEST:.*]]: tensor<?x?x16x2xf32>
-func.func @pack_with_dynamic_dims(%src: tensor<?x?xf32>, %dest: tensor<?x?x16x2xf32>) -> tensor<?x?x16x2xf32> {
- %pack = linalg.pack %src inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %dest : tensor<?x?xf32> -> tensor<?x?x16x2xf32>
+func.func @pack_with_dynamic_dims(
+ %src: tensor<?x?xf32>,
+ %dest: tensor<?x?x16x2xf32>) -> tensor<?x?x16x2xf32> {
+ %pack = linalg.pack %src
+ inner_dims_pos = [1, 0]
+ inner_tiles = [16, 2]
+ into %dest : tensor<?x?xf32> -> tensor<?x?x16x2xf32>
return %pack : tensor<?x?x16x2xf32>
}
@@ -1433,30 +1438,108 @@ func.func @pack_with_dynamic_dims(%src: tensor<?x?xf32>, %dest: tensor<?x?x16x2x
// CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C0_0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C1_0:.*]] = arith.constant 1 : index
+
+/// Compute mask for xfer_read
// CHECK-DAG: %[[D0_0:.*]] = tensor.dim {{.*}} %[[C0_0]] : tensor<?x?xf32>
// CHECK-DAG: %[[D1_0:.*]] = tensor.dim {{.*}} %[[C1_0]] : tensor<?x?xf32>
// CHECK: %[[MASK:.*]] = vector.create_mask %[[D0_0]], %[[D1_0]] : vector<8x16xi1>
+
+/// --= read =---
// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] {
// CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[CST]]
// CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<8x16xf32>
// CHECK-SAME: } : vector<8x16xi1> -> vector<8x16xf32>
+
+/// --= shape_cast =---
// CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<8x16xf32> to vector<4x2x1x16xf32>
+
+/// --= transpose =---
// CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 2, 3, 1] : vector<4x2x1x16xf32> to vector<4x1x16x2xf32>
+
+/// Compute mask for xfer_write
// CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
// CHECK-DAG: %[[D2:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x16x2xf32>
// CHECK-DAG: %[[D3:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x16x2xf32>
// CHECK: %[[MASK_0:.*]] = vector.create_mask %[[D2]], %[[D3]], %[[C16]], %[[C2]] : vector<4x1x16x2xi1>
+
+/// --= write =---
// CHECK: %[[WRITE:.*]] = vector.mask %[[MASK_0]] {
// CHECK-SAME: vector.transfer_write %[[TR]], %[[DEST]][%[[C0_2]], %[[C0_2]], %[[C0_2]], %[[C0_2]]]
// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<4x1x16x2xf32>, tensor<?x?x16x2xf32>
+
// CHECK: return %[[WRITE]] : tensor<?x?x16x2xf32>
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [4, 1] : !transform.any_op
+ transform.structured.vectorize %0 vector_sizes [4, 1, 16, 2] : !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
+/// Similar to the test above, but one of the inner tile sizes is dynamic. As a
+/// result, more output dims are dynamic (and, e.g., output mask calcuation is a bit different).
+
+// CHECK-LABEL: func @pack_with_dynamic_dims_and_dynamic_inner_tile
+// CHECK-SAME: %[[SRC:.*]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<?x?x?x2xf32>
+func.func @pack_with_dynamic_dims_and_dynamic_inner_tile(
+ %src: tensor<?x?xf32>,
+ %dest: tensor<?x?x?x2xf32>) -> tensor<?x?x?x2xf32> {
+ %c16 = arith.constant 16 : index
+ %pack = linalg.pack %src
+ inner_dims_pos = [1, 0]
+ inner_tiles = [%c16, 2]
+ into %dest : tensor<?x?xf32> -> tensor<?x?x?x2xf32>
+ return %pack : tensor<?x?x?x2xf32>
+}
+
+// CHECK-DAG: %[[CST:.*]] = ub.poison : f32
+// CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C0_0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1_0:.*]] = arith.constant 1 : index
+
+/// Compute mask for xfer_read
+// CHECK-DAG: %[[D0_0:.*]] = tensor.dim {{.*}} %[[C0_0]] : tensor<?x?xf32>
+// CHECK-DAG: %[[D1_0:.*]] = tensor.dim {{.*}} %[[C1_0]] : tensor<?x?xf32>
+// CHECK: %[[MASK:.*]] = vector.create_mask %[[D0_0]], %[[D1_0]] : vector<8x16xi1>
+
+/// --= read =---
+// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] {
+// CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[CST]]
+// CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<8x16xf32>
+// CHECK-SAME: } : vector<8x16xi1> -> vector<8x16xf32>
+
+/// --= shape_cast =---
+// CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<8x16xf32> to vector<4x2x1x16xf32>
+
+/// --= transpose =---
+// CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 2, 3, 1] : vector<4x2x1x16xf32> to vector<4x1x16x2xf32>
+
+/// Compute mask for xfer_write
+// CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[C2_2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[D2:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x?x2xf32>
+// CHECK-DAG: %[[D3:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x?x2xf32>
+// CHECK-DAG: %[[D4:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x?x2xf32>
+// CHECK: %[[MASK_0:.*]] = vector.create_mask %[[D2]], %[[D3]], %[[D4]], %[[C2_2]] : vector<4x1x16x2xi1>
+
+/// --= write =---
+// CHECK: %[[WRITE:.*]] = vector.mask %[[MASK_0]] {
+// CHECK-SAME: vector.transfer_write %[[TR]], %[[DEST]][%[[C0_2]], %[[C0_2]], %[[C0_2]], %[[C0_2]]]
+// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<4x1x16x2xf32>, tensor<?x?x?x2xf32>
+
+// CHECK: return %[[WRITE]] : tensor<?x?x?x2xf32>
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ transform.structured.vectorize %0 vector_sizes [4, 1, 16, 2] : !transform.any_op
transform.yield
}
}
diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
new file mode 100644
index 0000000..3035845
--- /dev/null
+++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
@@ -0,0 +1,15 @@
+// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics
+
+func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) {
+ %c32 = arith.constant 32 : index // expected-note {{target op}}
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // expected-error@below {{Expected a xegpu.create_nd_desc op, but got: arith.constant}}
+ %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] : (!transform.any_op) -> !transform.any_op
+ transform.yield
+ }
+}
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
new file mode 100644
index 0000000..23e1cd9
--- /dev/null
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -0,0 +1,58 @@
+// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: @set_desc_layout
+func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) {
+ // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
+ // CHECK-SAME: #xegpu.block_tdesc_attr<boundary_check = false>
+ // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16]>>
+ %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16, #xegpu.block_tdesc_attr<boundary_check = false>>
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // CHECK: transform.xegpu.set_desc_layout %{{.*}}
+ %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] inst_data = [8, 16] : (!transform.any_op) -> !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-LABEL: @set_desc_layout_minimal
+func.func @set_desc_layout_minimal(%arg0: memref<4096x4096xf16>) {
+ // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
+ // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>>
+ %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // CHECK: transform.xegpu.set_desc_layout %{{.*}}
+ %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] : (!transform.any_op) -> !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-LABEL: @set_desc_layout_param
+func.func @set_desc_layout_param(%arg0: memref<4096x4096xf16>) {
+ // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
+ // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16]>>
+ %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // CHECK: transform.xegpu.set_desc_layout %{{.*}}
+ %layout0 = transform.param.constant 8 : i64 -> !transform.param<i64>
+ %1 = transform.xegpu.set_desc_layout %0 sg_layout = [%layout0, 4] sg_data = [32, 32] inst_data = [8, 16] : (!transform.any_op, !transform.param<i64>) -> !transform.any_op
+ transform.yield
+ }
+}
diff --git a/mlir/test/Transforms/remove-dead-values.mlir b/mlir/test/Transforms/remove-dead-values.mlir
index 8b5ccdc..e730450 100644
--- a/mlir/test/Transforms/remove-dead-values.mlir
+++ b/mlir/test/Transforms/remove-dead-values.mlir
@@ -674,18 +674,3 @@ func.func @dead_value_loop_ivs_no_result(%lb: index, %ub: index, %step: index, %
}
return
}
-
-// -----
-
-// CHECK-LABEL: func @op_block_have_dead_arg
-func.func @op_block_have_dead_arg(%arg0: index, %arg1: index, %arg2: index, %arg3: i1) {
- scf.for %iv = %arg0 to %arg1 step %arg2 {
- scf.execute_region {
- cf.cond_br %arg3, ^bb1(%arg0 : index), ^bb1(%arg1 : index)
- ^bb1(%0: index):
- scf.yield
- }
- }
-// CHECK-NEXT: return
- return
-}
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
new file mode 100644
index 0000000..1c8a2bc
--- /dev/null
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -0,0 +1,51 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+from mlir.ir import *
+from mlir.dialects import transform
+from mlir.dialects.transform import xegpu
+from mlir.dialects.transform import structured
+
+
+def run(f):
+ with Context(), Location.unknown():
+ module = Module.create()
+ with InsertionPoint(module.body):
+ print("\nTEST:", f.__name__)
+ f()
+ print(module)
+ return f
+
+
+@run
+def setDescLayoutMinimal():
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate,
+ [],
+ transform.OperationType.get("xegpu.create_nd_tdesc"),
+ )
+ with InsertionPoint(sequence.body):
+ xegpu.SetDescLayoutOp(sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16])
+ transform.YieldOp()
+ # CHECK-LABEL: TEST: setDescLayoutMinimal
+ # CHECK: %0 = transform.xegpu.set_desc_layout %
+ # CHECK: sg_layout = [6, 4]
+ # CHECK: sg_data = [32, 16]
+
+
+@run
+def setDescLayoutInstData():
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate,
+ [],
+ transform.OperationType.get("xegpu.create_nd_tdesc"),
+ )
+ with InsertionPoint(sequence.body):
+ xegpu.SetDescLayoutOp(
+ sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16], inst_data=[8, 16]
+ )
+ transform.YieldOp()
+ # CHECK-LABEL: TEST: setDescLayoutInstData
+ # CHECK: %0 = transform.xegpu.set_desc_layout %
+ # CHECK: sg_layout = [6, 4]
+ # CHECK: sg_data = [32, 16]
+ # CHECK: inst_data = [8, 16]
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b65fe64..ecd11b9 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5281,6 +5281,7 @@ libc_function(
hdrs = ["src/stdlib/strfromf.h"],
deps = [
":__support_common",
+ ":printf_error_mapper",
":str_from_util",
],
)
@@ -5291,6 +5292,7 @@ libc_function(
hdrs = ["src/stdlib/strfromd.h"],
deps = [
":__support_common",
+ ":printf_error_mapper",
":str_from_util",
],
)
@@ -5301,6 +5303,7 @@ libc_function(
hdrs = ["src/stdlib/strfroml.h"],
deps = [
":__support_common",
+ ":printf_error_mapper",
":str_from_util",
],
)
@@ -6514,12 +6517,34 @@ libc_support_library(
)
libc_support_library(
+ name = "printf_error_mapper",
+ hdrs = [
+ "src/stdio/printf_core/error_mapper.h",
+ ] + select({
+ "@platforms//os:linux": [
+ "src/stdio/printf_core/linux/error_mapper.h",
+ ],
+ "//conditions:default": [
+ "src/stdio/printf_core/generic/error_mapper.h",
+ ],
+ }),
+ deps = [
+ ":__support_cpp_type_traits",
+ ":__support_error_or",
+ ":__support_macros_properties_architectures",
+ ":hdr_errno_macros",
+ ":printf_core_structs",
+ ],
+)
+
+libc_support_library(
name = "printf_main",
hdrs = ["src/stdio/printf_core/printf_main.h"],
deps = [
":__support_arg_list",
":printf_converter",
":printf_core_structs",
+ ":printf_error_mapper",
":printf_parser",
":printf_writer",
],
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel
index cbc6d13..e33199c 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel
@@ -87,6 +87,8 @@ libc_test(
name = "fprintf_test",
srcs = ["fprintf_test.cpp"],
deps = [
+ "//libc:__support_cpp_limits",
+ "//libc:__support_macros_properties_architectures",
"//libc:fprintf",
],
)