aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ci/generate_test_report_lib.py42
-rw-r--r--clang/docs/PointerAuthentication.rst30
-rw-r--r--clang/docs/ReleaseNotes.rst2
-rw-r--r--clang/lib/AST/ASTContext.cpp6
-rw-r--r--clang/lib/AST/ASTStructuralEquivalence.cpp13
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenStmt.cpp2
-rw-r--r--clang/lib/Headers/ptrauth.h14
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp12
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h1
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp9
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp8
-rw-r--r--clang/test/Analysis/Checkers/WebKit/mock-system-header.h6
-rw-r--r--clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm11
-rw-r--r--clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm38
-rw-r--r--clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm18
-rw-r--r--clang/test/C/C23/n3037.c46
-rw-r--r--clang/test/CIR/CodeGen/goto.cpp27
-rw-r--r--clang/test/CIR/CodeGen/label.c32
-rw-r--r--clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl70
-rw-r--r--clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl26
-rw-r--r--clang/test/CodeGenHLSL/resources/TypedBuffers-constructor.hlsl (renamed from clang/test/CodeGenHLSL/resources/RWBuffer-constructor.hlsl)36
-rw-r--r--clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl94
-rw-r--r--clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl42
-rw-r--r--clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl26
-rw-r--r--compiler-rt/CMakeLists.txt2
-rw-r--r--compiler-rt/lib/asan/tests/CMakeLists.txt14
-rw-r--r--compiler-rt/lib/msan/msan.h1
-rw-r--r--compiler-rt/lib/msan/msan_allocator.cpp44
-rw-r--r--compiler-rt/lib/msan/msan_report.cpp4
-rw-r--r--compiler-rt/test/msan/allocator_padding.cpp94
-rw-r--r--compiler-rt/test/msan/zero_alloc.cpp11
-rw-r--r--flang/lib/Optimizer/CodeGen/CodeGen.cpp16
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp8
-rw-r--r--flang/test/Fir/CUDA/cuda-code-gen.mlir28
-rw-r--r--flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir24
-rw-r--r--lldb/cmake/modules/LLDBFramework.cmake2
-rw-r--r--lldb/include/lldb/lldb-enumerations.h2
-rw-r--r--lldb/packages/Python/lldbsuite/test/gdbclientutils.py70
-rw-r--r--lldb/source/Commands/CommandOptionsProcessLaunch.cpp4
-rw-r--r--lldb/source/Commands/Options.td5
-rw-r--r--lldb/source/Host/macosx/objcxx/Host.mm33
-rw-r--r--lldb/source/Host/windows/MainLoopWindows.cpp6
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp84
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h7
-rw-r--r--lldb/test/API/macosx/mte/Makefile9
-rw-r--r--lldb/test/API/macosx/mte/TestDarwinMTE.py14
-rw-r--r--llvm/include/llvm/Analysis/StaticDataProfileInfo.h23
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h11
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h13
-rw-r--r--llvm/include/llvm/TableGen/CodeGenHelpers.h15
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp12
-rw-r--r--llvm/lib/Analysis/StaticDataProfileInfo.cpp47
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp72
-rw-r--r--llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp12
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp53
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp26
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp22
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp28
-rw-r--r--llvm/lib/Target/DirectX/DXContainerGlobals.cpp8
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp45
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp4
-rw-r--r--llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll135
-rw-r--r--llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll28
-rw-r--r--llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll12
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/vselect.ll125
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/vselect.ll137
-rw-r--r--llvm/test/CodeGen/PowerPC/fmf-propagation.ll90
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll76
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir57
-rw-r--r--llvm/test/CodeGen/WebAssembly/bulk-memory.ll97
-rw-r--r--llvm/test/CodeGen/WebAssembly/bulk-memory64.ll91
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll41
-rw-r--r--llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll4
-rw-r--r--llvm/test/TableGen/directive1.td1
-rw-r--r--llvm/test/TableGen/directive2.td1
-rw-r--r--llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll588
-rw-r--r--llvm/test/Transforms/SROA/slice-width.ll19
-rw-r--r--llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp4
-rw-r--r--llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp186
-rw-r--r--llvm/utils/TableGen/Basic/DirectiveEmitter.cpp6
-rw-r--r--llvm/utils/profcheck-xfail.txt8
-rw-r--r--mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp3
-rw-r--r--mlir/test/Dialect/Linalg/match-ops-interpreter.mlir14
-rw-r--r--orc-rt/include/orc-rt/ExecutorAddress.h3
-rw-r--r--orc-rt/unittests/ExecutorAddressTest.cpp6
-rw-r--r--utils/bazel/llvm-project-overlay/mlir/BUILD.bazel64
-rw-r--r--utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel6
88 files changed, 2696 insertions, 595 deletions
diff --git a/.ci/generate_test_report_lib.py b/.ci/generate_test_report_lib.py
index 5026c29..36c9585 100644
--- a/.ci/generate_test_report_lib.py
+++ b/.ci/generate_test_report_lib.py
@@ -98,6 +98,23 @@ def _format_ninja_failures(ninja_failures: list[tuple[str, str]]) -> list[str]:
)
return output
+def get_failures(junit_objects) -> dict[str, list[tuple[str, str]]]:
+ failures = {}
+ for results in junit_objects:
+ for testsuite in results:
+ for test in testsuite:
+ if (
+ not test.is_passed
+ and test.result
+ and isinstance(test.result[0], Failure)
+ ):
+ if failures.get(testsuite.name) is None:
+ failures[testsuite.name] = []
+ failures[testsuite.name].append(
+ (test.classname + "/" + test.name, test.result[0].text)
+ )
+ return failures
+
# Set size_limit to limit the byte size of the report. The default is 1MB as this
# is the most that can be put into an annotation. If the generated report exceeds
@@ -113,7 +130,7 @@ def generate_report(
size_limit=1024 * 1024,
list_failures=True,
):
- failures = {}
+ failures = get_failures(junit_objects)
tests_run = 0
tests_skipped = 0
tests_failed = 0
@@ -124,18 +141,6 @@ def generate_report(
tests_skipped += testsuite.skipped
tests_failed += testsuite.failures
- for test in testsuite:
- if (
- not test.is_passed
- and test.result
- and isinstance(test.result[0], Failure)
- ):
- if failures.get(testsuite.name) is None:
- failures[testsuite.name] = []
- failures[testsuite.name].append(
- (test.classname + "/" + test.name, test.result[0].text)
- )
-
report = [f"# {title}", ""]
if tests_run == 0:
@@ -258,7 +263,7 @@ def generate_report(
return report
-def generate_report_from_files(title, return_code, build_log_files):
+def load_info_from_files(build_log_files):
junit_files = [
junit_file for junit_file in build_log_files if junit_file.endswith(".xml")
]
@@ -271,6 +276,9 @@ def generate_report_from_files(title, return_code, build_log_files):
ninja_logs.append(
[log_line.strip() for log_line in ninja_log_file_handle.readlines()]
)
- return generate_report(
- title, return_code, [JUnitXml.fromfile(p) for p in junit_files], ninja_logs
- )
+ return [JUnitXml.fromfile(p) for p in junit_files], ninja_logs
+
+
+def generate_report_from_files(title, return_code, build_log_files):
+ junit_objects, ninja_logs = load_info_from_files(build_log_files)
+ return generate_report(title, return_code, junit_objects, ninja_logs)
diff --git a/clang/docs/PointerAuthentication.rst b/clang/docs/PointerAuthentication.rst
index 96eb498..7e65f4b 100644
--- a/clang/docs/PointerAuthentication.rst
+++ b/clang/docs/PointerAuthentication.rst
@@ -592,6 +592,36 @@ The result value is never zero and always within range for both the
This can be used in constant expressions.
+``ptrauth_type_discriminator``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+ ptrauth_type_discriminator(type)
+
+Compute the constant discriminator derived from the given type, as is computed
+for automatically type diversified schemas.
+
+``type`` must be a type. The result has the type ``ptrauth_extra_data_t``.
+
+This can be used in constant expressions.
+
+``ptrauth_function_pointer_type_discriminator``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+ ptrauth_function_pointer_type_discriminator(function_type)
+
+Compute the constant discriminator derived from the provided function type, for
+use in contexts where the default function authentication schema. If function
+pointer type diversity is enabled, this is equivalent to
+`ptrauth_type_discriminator(function_type)`, if it is not enabled this is `0`.
+
+``function_type`` must be a function type. The result has the type ``ptrauth_extra_data_t``.
+
+This can be used in constant expressions.
+
``ptrauth_strip``
^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index edb872c..79ff821 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -191,6 +191,8 @@ C23 Feature Support
- Added ``FLT_SNAN``, ``DBL_SNAN``, and ``LDBL_SNAN`` to Clang's ``<float.h>``
header in C23 and later modes. This implements
`WG14 N2710 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2710.htm>`_.
+- Fixed accepting as compatible unnamed tag types with the same fields within
+ the same translation unit but from different types.
Non-comprehensive list of changes in this release
-------------------------------------------------
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index a8b41ba..3603e9cd 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -11581,6 +11581,12 @@ QualType ASTContext::mergeTagDefinitions(QualType LHS, QualType RHS) {
if (LangOpts.CPlusPlus || !LangOpts.C23)
return {};
+ // Nameless tags are comparable only within outer definitions. At the top
+ // level they are not comparable.
+ const TagDecl *LTagD = LHS->castAsTagDecl(), *RTagD = RHS->castAsTagDecl();
+ if (!LTagD->getIdentifier() || !RTagD->getIdentifier())
+ return {};
+
// C23, on the other hand, requires the members to be "the same enough", so
// we use a structural equivalence check.
StructuralEquivalenceContext::NonEquivalentDeclSet NonEquivalentDecls;
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index 1557346..b17cd6f 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -1763,19 +1763,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
// another anonymous structure or union, respectively, if their members
// fulfill the preceding requirements. ... Otherwise, the structure, union,
// or enumerated types are incompatible.
-
- // Note: "the same tag" refers to the identifier for the structure; two
- // structures without names are not compatible within a TU. In C23, if either
- // declaration has no name, they're not equivalent. However, the paragraph
- // after the bulleted list goes on to talk about compatibility of anonymous
- // structure and union members, so this prohibition only applies to top-level
- // declarations; if either declaration is not a member, they cannot be
- // compatible.
- if (Context.LangOpts.C23 && (!D1->getIdentifier() || !D2->getIdentifier()) &&
- (!D1->getDeclContext()->isRecord() || !D2->getDeclContext()->isRecord()))
- return false;
-
- // Otherwise, check the names for equivalence.
if (!NameIsStructurallyEquivalent(*D1, *D2))
return false;
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index cfd48a2..5ba64dd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -536,7 +536,7 @@ mlir::LogicalResult CIRGenFunction::emitLabel(const clang::LabelDecl &d) {
mlir::Block *currBlock = builder.getBlock();
mlir::Block *labelBlock = currBlock;
- if (!currBlock->empty()) {
+ if (!currBlock->empty() || currBlock->isEntryBlock()) {
{
mlir::OpBuilder::InsertionGuard guard(builder);
labelBlock = builder.createBlock(builder.getBlock()->getParent());
diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h
index f902ca1..ad28f06 100644
--- a/clang/lib/Headers/ptrauth.h
+++ b/clang/lib/Headers/ptrauth.h
@@ -241,6 +241,18 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
#define ptrauth_type_discriminator(__type) \
__builtin_ptrauth_type_discriminator(__type)
+/* Compute the constant discriminator used by Clang to sign pointers with the
+ given C function pointer type.
+
+ A call to this function is an integer constant expression. */
+#if __has_feature(ptrauth_function_pointer_type_discrimination)
+#define ptrauth_function_pointer_type_discriminator(__type) \
+ __builtin_ptrauth_type_discriminator(__type)
+#else
+#define ptrauth_function_pointer_type_discriminator(__type) \
+ ((ptrauth_extra_data_t)0)
+#endif
+
/* Compute a signature for the given pair of pointer-sized values.
The order of the arguments is significant.
@@ -372,6 +384,8 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
})
#define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0)
+#define ptrauth_function_pointer_type_discriminator(__type) \
+ ((ptrauth_extra_data_t)0)
#define ptrauth_sign_generic_data(__value, __data) \
({ \
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
index 66cfccb..c1a5000 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
@@ -26,6 +26,7 @@ bool tryToFindPtrOrigin(
const Expr *E, bool StopAtFirstRefCountedObj,
std::function<bool(const clang::CXXRecordDecl *)> isSafePtr,
std::function<bool(const clang::QualType)> isSafePtrType,
+ std::function<bool(const clang::Decl *)> isSafeGlobalDecl,
std::function<bool(const clang::Expr *, bool)> callback) {
while (E) {
if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
@@ -34,6 +35,8 @@ bool tryToFindPtrOrigin(
auto IsImmortal = safeGetName(VD) == "NSApp";
if (VD->hasGlobalStorage() && (IsImmortal || QT.isConstQualified()))
return callback(E, true);
+ if (VD->hasGlobalStorage() && isSafeGlobalDecl(VD))
+ return callback(E, true);
}
}
if (auto *tempExpr = dyn_cast<MaterializeTemporaryExpr>(E)) {
@@ -71,9 +74,11 @@ bool tryToFindPtrOrigin(
}
if (auto *Expr = dyn_cast<ConditionalOperator>(E)) {
return tryToFindPtrOrigin(Expr->getTrueExpr(), StopAtFirstRefCountedObj,
- isSafePtr, isSafePtrType, callback) &&
+ isSafePtr, isSafePtrType, isSafeGlobalDecl,
+ callback) &&
tryToFindPtrOrigin(Expr->getFalseExpr(), StopAtFirstRefCountedObj,
- isSafePtr, isSafePtrType, callback);
+ isSafePtr, isSafePtrType, isSafeGlobalDecl,
+ callback);
}
if (auto *cast = dyn_cast<CastExpr>(E)) {
if (StopAtFirstRefCountedObj) {
@@ -93,7 +98,8 @@ bool tryToFindPtrOrigin(
if (auto *call = dyn_cast<CallExpr>(E)) {
if (auto *Callee = call->getCalleeDecl()) {
if (Callee->hasAttr<CFReturnsRetainedAttr>() ||
- Callee->hasAttr<NSReturnsRetainedAttr>()) {
+ Callee->hasAttr<NSReturnsRetainedAttr>() ||
+ Callee->hasAttr<NSReturnsAutoreleasedAttr>()) {
return callback(E, true);
}
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h
index 3a009d6..9fff456 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h
@@ -56,6 +56,7 @@ bool tryToFindPtrOrigin(
const clang::Expr *E, bool StopAtFirstRefCountedObj,
std::function<bool(const clang::CXXRecordDecl *)> isSafePtr,
std::function<bool(const clang::QualType)> isSafePtrType,
+ std::function<bool(const clang::Decl *)> isSafeGlobalDecl,
std::function<bool(const clang::Expr *, bool)> callback);
/// For \p E referring to a ref-countable/-counted pointer/reference we return
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp
index 9585ceb..791e709 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp
@@ -29,12 +29,12 @@ namespace {
class RawPtrRefCallArgsChecker
: public Checker<check::ASTDecl<TranslationUnitDecl>> {
BugType Bug;
- mutable BugReporter *BR;
TrivialFunctionAnalysis TFA;
EnsureFunctionAnalysis EFA;
protected:
+ mutable BugReporter *BR;
mutable std::optional<RetainTypeChecker> RTC;
public:
@@ -46,6 +46,7 @@ public:
virtual bool isSafePtr(const CXXRecordDecl *Record) const = 0;
virtual bool isSafePtrType(const QualType type) const = 0;
virtual bool isSafeExpr(const Expr *) const { return false; }
+ virtual bool isSafeDecl(const Decl *) const { return false; }
virtual const char *ptrKind() const = 0;
void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
@@ -214,6 +215,7 @@ public:
Arg, /*StopAtFirstRefCountedObj=*/true,
[&](const clang::CXXRecordDecl *Record) { return isSafePtr(Record); },
[&](const clang::QualType T) { return isSafePtrType(T); },
+ [&](const clang::Decl *D) { return isSafeDecl(D); },
[&](const clang::Expr *ArgOrigin, bool IsSafe) {
if (IsSafe)
return true;
@@ -479,6 +481,11 @@ public:
isa<ObjCMessageExpr>(E);
}
+ bool isSafeDecl(const Decl *D) const final {
+ // Treat NS/CF globals in system header as immortal.
+ return BR->getSourceManager().isInSystemHeader(D->getLocation());
+ }
+
const char *ptrKind() const final { return "unretained"; }
};
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp
index dd9701f..c13df479 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp
@@ -166,10 +166,10 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded,
class RawPtrRefLocalVarsChecker
: public Checker<check::ASTDecl<TranslationUnitDecl>> {
BugType Bug;
- mutable BugReporter *BR;
EnsureFunctionAnalysis EFA;
protected:
+ mutable BugReporter *BR;
mutable std::optional<RetainTypeChecker> RTC;
public:
@@ -180,6 +180,7 @@ public:
virtual bool isSafePtr(const CXXRecordDecl *) const = 0;
virtual bool isSafePtrType(const QualType) const = 0;
virtual bool isSafeExpr(const Expr *) const { return false; }
+ virtual bool isSafeDecl(const Decl *) const { return false; }
virtual const char *ptrKind() const = 0;
void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
@@ -288,6 +289,7 @@ public:
return isSafePtr(Record);
},
[&](const clang::QualType Type) { return isSafePtrType(Type); },
+ [&](const clang::Decl *D) { return isSafeDecl(D); },
[&](const clang::Expr *InitArgOrigin, bool IsSafe) {
if (!InitArgOrigin || IsSafe)
return true;
@@ -443,6 +445,10 @@ public:
return ento::cocoa::isCocoaObjectRef(E->getType()) &&
isa<ObjCMessageExpr>(E);
}
+ bool isSafeDecl(const Decl *D) const final {
+ // Treat NS/CF globals in system header as immortal.
+ return BR->getSourceManager().isInSystemHeader(D->getLocation());
+ }
const char *ptrKind() const final { return "unretained"; }
};
diff --git a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h
index 1e44de8..d55b3ab 100644
--- a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h
+++ b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h
@@ -34,6 +34,8 @@ void os_log_msg(os_log_t oslog, os_log_type_t type, const char *msg, ...);
typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStringRef;
+extern CFStringRef const kCFURLTagNamesKey;
+
#ifdef __OBJC__
@class NSString;
@interface SystemObject {
@@ -41,4 +43,8 @@ typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStrin
CFStringRef cf_string;
}
@end
+
+typedef NSString *NSNotificationName;
+extern "C" NSNotificationName NSApplicationDidBecomeActiveNotification;
+
#endif
diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
index a517dbc..5dc3b38 100644
--- a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
+++ b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
@@ -567,6 +567,17 @@ struct Derived : Base {
} // namespace ns_retained_return_value
+namespace autoreleased {
+
+NSString *provideAutoreleased() __attribute__((ns_returns_autoreleased));
+void consume(NSString *);
+
+void foo() {
+ consume(provideAutoreleased());
+}
+
+} // autoreleased
+
@interface TestObject : NSObject
- (void)doWork:(NSString *)msg, ...;
- (void)doWorkOnSelf;
diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm
index 307a4d03..f49e7bd 100644
--- a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm
+++ b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm
@@ -1,8 +1,11 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedLocalVarsChecker -verify %s
#import "objc-mock-types.h"
+#import "mock-system-header.h"
void someFunction();
+extern "C" CFStringRef LocalGlobalCFString;
+extern "C" NSString *LocalGlobalNSString;
namespace raw_ptr {
void foo() {
@@ -535,6 +538,41 @@ unsigned foo() {
} // namespace ns_retained_return_value
+namespace autoreleased {
+
+NSString *provideAutoreleased() __attribute__((ns_returns_autoreleased));
+void consume(NSString *);
+
+void foo() {
+ auto *string = provideAutoreleased();
+ consume(string);
+}
+
+} // autoreleased
+
+namespace ns_global {
+
+void consumeCFString(CFStringRef);
+void consumeNSString(NSString *);
+
+void cf() {
+ auto *str = kCFURLTagNamesKey;
+ consumeCFString(str);
+ auto *localStr = LocalGlobalCFString;
+ // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}}
+ consumeCFString(localStr);
+}
+
+void ns() {
+ auto *str = NSApplicationDidBecomeActiveNotification;
+ consumeNSString(str);
+ auto *localStr = LocalGlobalNSString;
+ // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}}
+ consumeNSString(localStr);
+}
+
+}
+
bool doMoreWorkOpaque(OtherObj*);
SomeObj* provide();
diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm
new file mode 100644
index 0000000..5c78b21
--- /dev/null
+++ b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm
@@ -0,0 +1,18 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedCallArgsChecker -verify %s
+
+#import "mock-types.h"
+#import "mock-system-header.h"
+
+void consumeCFString(CFStringRef);
+extern "C" CFStringRef LocalGlobalCFString;
+void consumeNSString(NSString *);
+extern "C" NSString *LocalGlobalNSString;
+
+void foo() {
+ consumeCFString(kCFURLTagNamesKey);
+ consumeCFString(LocalGlobalCFString);
+ // expected-warning@-1{{Call argument is unretained and unsafe}}
+ consumeNSString(NSApplicationDidBecomeActiveNotification);
+ consumeNSString(LocalGlobalNSString);
+ // expected-warning@-1{{Call argument is unretained and unsafe}}
+}
diff --git a/clang/test/C/C23/n3037.c b/clang/test/C/C23/n3037.c
index 3748375..113ecf7 100644
--- a/clang/test/C/C23/n3037.c
+++ b/clang/test/C/C23/n3037.c
@@ -30,11 +30,24 @@ void func2(PRODUCT(int, SUM(float, double)) y) { // c17-warning {{declaration of
struct foop { struct { int x; }; }; // c17-note {{previous definition is here}}
struct foop { struct { int x; }; }; // c17-error {{redefinition of 'foop'}}
+// Test the field lookup compatibility isn't sufficient, the structure of types should be compatible.
+struct AnonymousStructNotMatchingFields { // c17-note {{previous definition is here}}
+ struct { // c23-note {{field has name '' here}}
+ int x;
+ };
+};
+struct AnonymousStructNotMatchingFields { // c23-error {{type 'struct AnonymousStructNotMatchingFields' has incompatible definitions}} \
+ c17-error {{redefinition of 'AnonymousStructNotMatchingFields'}}
+ int x; // c23-note {{field has name 'x' here}}
+};
+
union barp { int x; float y; }; // c17-note {{previous definition is here}}
union barp { int x; float y; }; // c17-error {{redefinition of 'barp'}}
typedef struct q { int x; } q_t; // c17-note 2 {{previous definition is here}}
typedef struct q { int x; } q_t; // c17-error {{redefinition of 'q'}} \
c17-error-re {{typedef redefinition with different types ('struct (unnamed struct at {{.*}})' vs 'struct q')}}
+typedef struct { int x; } untagged_q_t; // both-note {{previous definition is here}}
+typedef struct { int x; } untagged_q_t; // both-error {{typedef redefinition with different types}}
void func3(void) {
struct S { int x; }; // c17-note {{previous definition is here}}
struct T { struct S s; }; // c17-note {{previous definition is here}}
@@ -389,13 +402,40 @@ void nontag_both_in_params(struct { int i; } Arg1, struct { int i; } Arg2) {
_Static_assert(0 == _Generic(__typeof__(Arg1), __typeof__(Arg2) : 1, default : 0)); // both-warning {{passing a type argument as the first operand to '_Generic' is a C2y extension}}
}
-struct InnerAnonStruct {
+struct InnerUnnamedStruct {
struct {
int i;
} untagged;
-} inner_anon_tagged;
+} inner_unnamed_tagged;
+_Static_assert(0 == _Generic(inner_unnamed_tagged.untagged, struct { int i; } : 1, default : 0));
-_Static_assert(0 == _Generic(inner_anon_tagged.untagged, struct { int i; } : 1, default : 0));
+struct InnerUnnamedStruct_same {
+ struct {
+ int i;
+ } untagged;
+};
+struct InnerUnnamedStruct_differentNaming {
+ struct {
+ int i;
+ } untaggedDifferent;
+};
+struct InnerUnnamedStruct_differentShape {
+ float x;
+ struct {
+ int i;
+ } untagged;
+ int y;
+};
+void compare_unnamed_struct_from_different_outer_type(
+ struct InnerUnnamedStruct sameOuterType,
+ struct InnerUnnamedStruct_same matchingType,
+ struct InnerUnnamedStruct_differentNaming differentFieldName,
+ struct InnerUnnamedStruct_differentShape differentType) {
+ inner_unnamed_tagged.untagged = sameOuterType.untagged;
+ inner_unnamed_tagged.untagged = matchingType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}}
+ inner_unnamed_tagged.untagged = differentFieldName.untaggedDifferent; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}}
+ inner_unnamed_tagged.untagged = differentType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}}
+}
// Test the same thing with enumerations (test for unions is omitted because
// unions and structures are both RecordDecl objects, whereas EnumDecl is not).
diff --git a/clang/test/CIR/CodeGen/goto.cpp b/clang/test/CIR/CodeGen/goto.cpp
index 48cb44e..257c255 100644
--- a/clang/test/CIR/CodeGen/goto.cpp
+++ b/clang/test/CIR/CodeGen/goto.cpp
@@ -205,6 +205,8 @@ extern "C" void case_follow_label(int v) {
// CIR: cir.func dso_local @case_follow_label
// CIR: cir.switch
// CIR: cir.case(equal, [#cir.int<1> : !s32i]) {
+// CIR: cir.br ^bb1
+// CIR: ^bb1:
// CIR: cir.label "label"
// CIR: cir.case(equal, [#cir.int<2> : !s32i]) {
// CIR: cir.call @action1()
@@ -215,9 +217,11 @@ extern "C" void case_follow_label(int v) {
// LLVM: define dso_local void @case_follow_label
// LLVM: switch i32 {{.*}}, label %[[SWDEFAULT:.*]] [
-// LLVM: i32 1, label %[[LABEL:.*]]
+// LLVM: i32 1, label %[[CASE1:.*]]
// LLVM: i32 2, label %[[CASE2:.*]]
// LLVM: ]
+// LLVM: [[CASE1]]:
+// LLVM: br label %[[LABEL:.*]]
// LLVM: [[LABEL]]:
// LLVM: br label %[[CASE2]]
// LLVM: [[CASE2]]:
@@ -303,3 +307,24 @@ extern "C" void default_follow_label(int v) {
// OGCG: br label %label
// OGCG: sw.epilog:
// OGCG: ret void
+
+void g3() {
+label:
+ goto label;
+}
+
+// CIR: cir.func dso_local @_Z2g3v
+// CIR: cir.br ^bb1
+// CIR: ^bb1:
+// CIR: cir.label "label"
+// CIR: cir.goto "label"
+
+// LLVM: define dso_local void @_Z2g3v()
+// LLVM: br label %1
+// LLVM: 1:
+// LLVM: br label %1
+
+// OGCG: define dso_local void @_Z2g3v()
+// OGCG: br label %label
+// OGCG: label:
+// OGCG: br label %label
diff --git a/clang/test/CIR/CodeGen/label.c b/clang/test/CIR/CodeGen/label.c
index a050094..f5345ef 100644
--- a/clang/test/CIR/CodeGen/label.c
+++ b/clang/test/CIR/CodeGen/label.c
@@ -11,10 +11,14 @@ labelA:
}
// CIR: cir.func no_proto dso_local @label
+// CIR: cir.br ^bb1
+// CIR: ^bb1:
// CIR: cir.label "labelA"
// CIR: cir.return
// LLVM:define dso_local void @label
+// LLVM: br label %1
+// LLVM: 1:
// LLVM: ret void
// OGCG: define dso_local void @label
@@ -29,15 +33,19 @@ labelC:
}
// CIR: cir.func no_proto dso_local @multiple_labels
-// CIR: cir.label "labelB"
// CIR: cir.br ^bb1
-// CIR: ^bb1: // pred: ^bb0
+// CIR: ^bb1:
+// CIR: cir.label "labelB"
+// CIR: cir.br ^bb2
+// CIR: ^bb2:
// CIR: cir.label "labelC"
// CIR: cir.return
// LLVM: define dso_local void @multiple_labels()
// LLVM: br label %1
// LLVM: 1:
+// LLVM: br label %2
+// LLVM: 2:
// LLVM: ret void
// OGCG: define dso_local void @multiple_labels
@@ -56,6 +64,8 @@ labelD:
// CIR: cir.func dso_local @label_in_if
// CIR: cir.if {{.*}} {
+// CIR: cir.br ^bb1
+// CIR: ^bb1:
// CIR: cir.label "labelD"
// CIR: [[LOAD:%.*]] = cir.load align(4) [[COND:%.*]] : !cir.ptr<!s32i>, !s32i
// CIR: [[INC:%.*]] = cir.unary(inc, %3) nsw : !s32i, !s32i
@@ -68,15 +78,17 @@ labelD:
// LLVM: 3:
// LLVM: [[LOAD:%.*]] = load i32, ptr [[COND:%.*]], align 4
// LLVM: [[CMP:%.*]] = icmp ne i32 [[LOAD]], 0
-// LLVM: br i1 [[CMP]], label %6, label %9
+// LLVM: br i1 [[CMP]], label %6, label %10
// LLVM: 6:
+// LLVM: br label %7
+// LLVM: 7:
// LLVM: [[LOAD2:%.*]] = load i32, ptr [[COND]], align 4
// LLVM: [[ADD1:%.*]] = add nsw i32 [[LOAD2]], 1
// LLVM: store i32 [[ADD1]], ptr [[COND]], align 4
-// LLVM: br label %9
-// LLVM: 9:
// LLVM: br label %10
// LLVM: 10:
+// LLVM: br label %11
+// LLVM: 11:
// LLVM: ret void
// OGCG: define dso_local void @label_in_if
@@ -142,11 +154,15 @@ end:
return;
}
// CIR: cir.func no_proto dso_local @labelWithoutMatch
+// CIR: cir.br ^bb1
+// CIR: ^bb1:
// CIR: cir.label "end"
// CIR: cir.return
// CIR: }
// LLVM: define dso_local void @labelWithoutMatch
+// LLVM: br label %1
+// LLVM: 1:
// LLVM: ret void
// OGCG: define dso_local void @labelWithoutMatch
@@ -167,13 +183,17 @@ void foo() {
// CIR: cir.func no_proto dso_local @foo
// CIR: cir.scope {
-// CIR: cir.label "label"
// CIR: %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["agg.tmp0"]
+// CIR: cir.br ^bb1
+// CIR: ^bb1:
+// CIR: cir.label "label"
// LLVM:define dso_local void @foo() {
// LLVM: [[ALLOC:%.*]] = alloca %struct.S, i64 1, align 1
// LLVM: br label %2
// LLVM:2:
+// LLVM: br label %3
+// LLVM:3:
// LLVM: [[CALL:%.*]] = call %struct.S @get()
// LLVM: store %struct.S [[CALL]], ptr [[ALLOC]], align 1
// LLVM: [[LOAD:%.*]] = load %struct.S, ptr [[ALLOC]], align 1
diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl
deleted file mode 100644
index f48521b..0000000
--- a/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl
+++ /dev/null
@@ -1,70 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL
-// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPIRV
-
-// DXIL: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", i16, 1, 0, 1) }
-// DXIL: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", i16, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.1" = type { target("dx.TypedBuffer", i32, 1, 0, 1) }
-// DXIL: %"class.hlsl::RWBuffer.2" = type { target("dx.TypedBuffer", i32, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.3" = type { target("dx.TypedBuffer", i64, 1, 0, 1) }
-// DXIL: %"class.hlsl::RWBuffer.4" = type { target("dx.TypedBuffer", i64, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.5" = type { target("dx.TypedBuffer", half, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.6" = type { target("dx.TypedBuffer", float, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.7" = type { target("dx.TypedBuffer", double, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.8" = type { target("dx.TypedBuffer", <4 x i16>, 1, 0, 1) }
-// DXIL: %"class.hlsl::RWBuffer.9" = type { target("dx.TypedBuffer", <3 x i32>, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.10" = type { target("dx.TypedBuffer", <2 x half>, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.11" = type { target("dx.TypedBuffer", <3 x float>, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.12" = type { target("dx.TypedBuffer", <4 x i32>, 1, 0, 1) }
-
-// SPIRV: %"class.hlsl::RWBuffer" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 24) }
-// SPIRV: %"class.hlsl::RWBuffer.2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) }
-// SPIRV: %"class.hlsl::RWBuffer.3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 2, 41) }
-// SPIRV: %"class.hlsl::RWBuffer.4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 2, 40) }
-// SPIRV: %"class.hlsl::RWBuffer.5" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.6" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 3) }
-// SPIRV: %"class.hlsl::RWBuffer.7" = type { target("spirv.Image", double, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.10" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.11" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 21) }
-
-RWBuffer<int16_t> BufI16;
-RWBuffer<uint16_t> BufU16;
-RWBuffer<int> BufI32;
-RWBuffer<uint> BufU32;
-RWBuffer<int64_t> BufI64;
-RWBuffer<uint64_t> BufU64;
-RWBuffer<half> BufF16;
-RWBuffer<float> BufF32;
-RWBuffer<double> BufF64;
-RWBuffer< vector<int16_t, 4> > BufI16x4;
-RWBuffer< vector<uint, 3> > BufU32x3;
-RWBuffer<half2> BufF16x2;
-RWBuffer<float3> BufF32x3;
-RWBuffer<int4> BufI32x4;
-// TODO: RWBuffer<snorm half> BufSNormF16; -> 11
-// TODO: RWBuffer<unorm half> BufUNormF16; -> 12
-// TODO: RWBuffer<snorm float> BufSNormF32; -> 13
-// TODO: RWBuffer<unorm float> BufUNormF32; -> 14
-// TODO: RWBuffer<snorm double> BufSNormF64; -> 15
-// TODO: RWBuffer<unorm double> BufUNormF64; -> 16
-
-[numthreads(1,1,1)]
-void main(int GI : SV_GroupIndex) {
- BufI16[GI] = 0;
- BufU16[GI] = 0;
- BufI32[GI] = 0;
- BufU32[GI] = 0;
- BufI64[GI] = 0;
- BufU64[GI] = 0;
- BufF16[GI] = 0;
- BufF32[GI] = 0;
- BufF64[GI] = 0;
- BufI16x4[GI] = 0;
- BufU32x3[GI] = 0;
- BufF16x2[GI] = 0;
- BufF32x3[GI] = 0;
-}
diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl
deleted file mode 100644
index 0de171c..0000000
--- a/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl
+++ /dev/null
@@ -1,26 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=DXC,CHECK
-// RUN: %clang_cc1 -triple spirv1.6-pc-vulkan1.3-compute -fspv-use-unknown-image-format -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=SPIRV,CHECK
-
-RWBuffer<int> In;
-RWBuffer<int> Out;
-
-[numthreads(1,1,1)]
-void main(unsigned GI : SV_GroupIndex) {
- // CHECK: define void @main()
-
- // DXC: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
- // SPIRV: %[[INPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
- // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]]
- // DXC: %[[OUTPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
- // SPIRV: %[[OUTPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
- // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]]
- Out[GI] = In[GI];
-
- // DXC: %[[INPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
- // SPIRV: %[[INPTR:.*]] = call ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
- // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]]
- // DXC: %[[OUTPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
- // SPIRV: %[[OUTPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
- // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]]
- Out[GI] = In.Load(GI);
-}
diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-constructor.hlsl
index ca33c42..1ec9f0f 100644
--- a/clang/test/CodeGenHLSL/resources/RWBuffer-constructor.hlsl
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-constructor.hlsl
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | \
-// RUN: llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
+// RUN: llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
// FIXME: SPIR-V codegen of llvm.spv.resource.handlefrombinding and resource types is not yet implemented
// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | \
// llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
@@ -14,7 +14,7 @@
RWBuffer<float> Buf1 : register(u5, space3);
// Resource with implicit binding
-RWBuffer<double> Buf2;
+Buffer<double> Buf2;
export void foo() {
// Local resource declaration
@@ -22,12 +22,12 @@ export void foo() {
}
// CHECK: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", float, 1, 0, 0) }
-// CHECK: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", double, 1, 0, 0) }
-// CHECK: %"class.hlsl::RWBuffer.1" = type { target("dx.TypedBuffer", i32, 1, 0, 1) }
+// CHECK: %"class.hlsl::Buffer" = type { target("dx.TypedBuffer", double, 0, 0, 0) }
+// CHECK: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", i32, 1, 0, 1) }
// CHECK: @Buf1 = internal global %"class.hlsl::RWBuffer" poison, align 4
// CHECK: @[[Buf1Str:.*]] = private unnamed_addr constant [5 x i8] c"Buf1\00", align 1
-// CHECK: @Buf2 = internal global %"class.hlsl::RWBuffer.0" poison, align 4
+// CHECK: @Buf2 = internal global %"class.hlsl::Buffer" poison, align 4
// CHECK: @[[Buf2Str:.*]] = private unnamed_addr constant [5 x i8] c"Buf2\00", align 1
// Buf1 initialization part 1 - global init function that calls RWBuffer<float>::__createFromBinding
@@ -50,24 +50,24 @@ export void foo() {
// Buf2 initialization part 1 - global init function that RWBuffer<float>::__createFromImplicitBinding
// CHECK: define internal void @__cxx_global_var_init.1()
// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @hlsl::RWBuffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+// CHECK-NEXT: call void @hlsl::Buffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
// CHECK-SAME: (ptr {{.*}} @Buf2, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 0, ptr noundef @[[Buf2Str]])
-// Buf2 initialization part 2 - body of RWBuffer<float>::__createFromImplicitBinding call
-// CHECK: define linkonce_odr hidden void @hlsl::RWBuffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
-// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWBuffer.0") align 4 %[[RetValue2:.*]], i32 noundef %orderId,
+// Buf2 initialization part 2 - body of Buffer<double>::__createFromImplicitBinding call
+// CHECK: define linkonce_odr hidden void @hlsl::Buffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::Buffer") align 4 %[[RetValue2:.*]], i32 noundef %orderId,
// CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, ptr noundef %name)
-// CHECK: %[[Tmp2:.*]] = alloca %"class.hlsl::RWBuffer.0", align 4
-// CHECK: %[[Handle2:.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.TypedBuffer_f64_1_0_0t(
-// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.0", ptr %[[Tmp2]], i32 0, i32 0
-// CHECK-DXIL: store target("dx.TypedBuffer", double, 1, 0, 0) %[[Handle2]], ptr %__handle, align 4
-// CHECK: call void @hlsl::RWBuffer<double>::RWBuffer(hlsl::RWBuffer<double> const&)(ptr {{.*}} %[[RetValue2]], ptr {{.*}} %[[Tmp2]])
+// CHECK: %[[Tmp2:.*]] = alloca %"class.hlsl::Buffer", align 4
+// CHECK: %[[Handle2:.*]] = call target("dx.TypedBuffer", double, 0, 0, 0)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.TypedBuffer_f64_0_0_0t(
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %[[Tmp2]], i32 0, i32 0
+// CHECK-DXIL: store target("dx.TypedBuffer", double, 0, 0, 0) %[[Handle2]], ptr %__handle, align 4
+// CHECK: call void @hlsl::Buffer<double>::Buffer(hlsl::Buffer<double> const&)(ptr {{.*}} %[[RetValue2]], ptr {{.*}} %[[Tmp2]])
// Buf3 initialization part 1 - local variable declared in function foo() is initialized by RWBuffer<int> C1 default constructor
// CHECK: define void @foo()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %Buf3 = alloca %"class.hlsl::RWBuffer.1", align 4
+// CHECK-NEXT: %Buf3 = alloca %"class.hlsl::RWBuffer.0", align 4
// CHECK-NEXT: call void @hlsl::RWBuffer<int>::RWBuffer()(ptr {{.*}} %Buf3)
// Buf3 initialization part 2 - body of RWBuffer<int> default C1 constructor that calls the default C2 constructor
@@ -76,11 +76,11 @@ export void foo() {
// Buf3 initialization part 3 - body of RWBuffer<int> default C2 constructor that initializes handle to poison
// CHECK: define linkonce_odr hidden void @hlsl::RWBuffer<int>::RWBuffer()(ptr {{.*}} %this)
-// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.1", ptr %{{.*}}, i32 0, i32 0
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.0", ptr %{{.*}}, i32 0, i32 0
// CHECK-NEXT: store target("dx.TypedBuffer", i32, 1, 0, 1) poison, ptr %__handle, align 4
// Module initialization
-// CHECK: define internal void @_GLOBAL__sub_I_RWBuffer_constructor.hlsl()
+// CHECK: define internal void @_GLOBAL__sub_I_TypedBuffers_constructor.hlsl()
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @__cxx_global_var_init()
// CHECK-NEXT: call void @__cxx_global_var_init.1()
diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl
new file mode 100644
index 0000000..d3dba8a
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=Buffer %s | FileCheck %s -DRESOURCE=Buffer -DRW=0 -check-prefixes=DXIL
+
+// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=Buffer %s | FileCheck %s -DRESOURCE=Buffer -DRW=1 -check-prefixes=SPV-RO
+
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=RWBuffer %s | FileCheck %s -DRESOURCE=RWBuffer -DRW=1 -check-prefixes=DXIL
+
+// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=RWBuffer %s | FileCheck %s -DRESOURCE=RWBuffer --DRW=2 -check-prefixes=SPV-RW
+
+// DXIL: %"class.hlsl::[[RESOURCE]]" = type { target("dx.TypedBuffer", i16, [[RW]], 0, 1) }
+// DXIL: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.TypedBuffer", i16, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.TypedBuffer", i32, [[RW]], 0, 1) }
+// DXIL: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.TypedBuffer", i32, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.TypedBuffer", i64, [[RW]], 0, 1) }
+// DXIL: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.TypedBuffer", i64, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.TypedBuffer", half, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.TypedBuffer", float, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.TypedBuffer", double, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.TypedBuffer", <4 x i16>, [[RW]], 0, 1) }
+// DXIL: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.TypedBuffer", <3 x i32>, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.TypedBuffer", <2 x half>, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.TypedBuffer", <3 x float>, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.TypedBuffer", <4 x i32>, [[RW]], 0, 1) }
+
+// SPV-RO: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.Image", half, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.Image", float, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.Image", double, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.Image", half, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.Image", float, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) }
+
+// SPV-RW: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 24) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 2, 41) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 2, 40) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 3) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.Image", double, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 21) }
+
+RESOURCE<int16_t> BufI16;
+RESOURCE<uint16_t> BufU16;
+RESOURCE<int> BufI32;
+RESOURCE<uint> BufU32;
+RESOURCE<int64_t> BufI64;
+RESOURCE<uint64_t> BufU64;
+RESOURCE<half> BufF16;
+RESOURCE<float> BufF32;
+RESOURCE<double> BufF64;
+RESOURCE< vector<int16_t, 4> > BufI16x4;
+RESOURCE< vector<uint, 3> > BufU32x3;
+RESOURCE<half2> BufF16x2;
+RESOURCE<float3> BufF32x3;
+RESOURCE<int4> BufI32x4;
+// TODO: RESOURCE<snorm half> BufSNormF16; -> 11
+// TODO: RESOURCE<unorm half> BufUNormF16; -> 12
+// TODO: RESOURCE<snorm float> BufSNormF32; -> 13
+// TODO: RESOURCE<unorm float> BufUNormF32; -> 14
+// TODO: RESOURCE<snorm double> BufSNormF64; -> 15
+// TODO: RESOURCE<unorm double> BufUNormF64; -> 16
+
+[numthreads(1,1,1)]
+void main(int GI : SV_GroupIndex) {
+ int16_t v1 = BufI16[GI];
+ uint16_t v2 = BufU16[GI];
+ int v3 = BufI32[GI];
+ uint v4 = BufU32[GI];
+ int64_t v5 = BufI64[GI];
+ uint64_t v6 = BufU64[GI];
+ half v7 = BufF16[GI];
+ float v8 = BufF32[GI];
+ double v9 = BufF64[GI];
+ vector<int16_t,4> v10 = BufI16x4[GI];
+ vector<int, 3> v11 = BufU32x3[GI];
+ half2 v12 = BufF16x2[GI];
+ float3 v13 = BufF32x3[GI];
+}
diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
new file mode 100644
index 0000000..b153bda
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL
+// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPIRV
+
+// NOTE: SPIRV codegen for resource methods is not yet implemented
+
+Buffer<float> Buf : register(t0);
+RWBuffer<uint4> RWBuf : register(u0);
+
+// DXIL: %"class.hlsl::Buffer" = type { target("dx.TypedBuffer", float, 0, 0, 0) }
+// DXIL: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) }
+
+// DXIL: @Buf = internal global %"class.hlsl::Buffer" poison
+// DXIL: @RWBuf = internal global %"class.hlsl::RWBuffer" poison
+
+export float TestLoad() {
+ return Buf.Load(1) + RWBuf.Load(2).y;
+}
+
+// CHECK: define noundef nofpclass(nan inf) float @TestLoad()()
+// CHECK: call {{.*}} float @hlsl::Buffer<float>::Load(unsigned int)(ptr {{.*}} @Buf, i32 noundef 1)
+// CHECK: call {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int)(ptr {{.*}} @RWBuf, i32 noundef 2)
+// CHECK: add
+// CHECK: ret float
+
+// CHECK: define {{.*}} float @hlsl::Buffer<float>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", float, 0, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0) %[[HANDLE]], i32 %[[INDEX]])
+// CHECK-NEXT: %[[VAL:.*]] = load float, ptr %[[PTR]]
+// CHECK-NEXT: ret float %[[VAL]]
+
+// CHECK: define {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %[[HANDLE]], i32 %[[INDEX]])
+// CHECK-NEXT: %[[VEC:.*]] = load <4 x i32>, ptr %[[PTR]]
+// CHECK-NEXT: ret <4 x i32> %[[VEC]]
+
+// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0), i32)
+// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), i32)
diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl
new file mode 100644
index 0000000..adc35f6
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=DXIL,CHECK
+// RUN: %clang_cc1 -triple spirv1.6-pc-vulkan1.3-compute -fspv-use-unknown-image-format -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=SPIRV,CHECK
+
+Buffer<int> In;
+RWBuffer<int> Out;
+
+[numthreads(1,1,1)]
+void main(unsigned GI : SV_GroupIndex) {
+ // CHECK: define void @main()
+
+ // DXIL: %[[INPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_0_0_1t(target("dx.TypedBuffer", i32, 0, 0, 1) %{{.*}}, i32 %{{.*}})
+ // SPIRV: %[[INPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_1_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) %{{.*}}, i32 %{{.*}})
+ // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]]
+ // DXIL: %[[OUTPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
+ // SPIRV: %[[OUTPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
+ // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]]
+ Out[GI] = In[GI];
+
+ // DXIL: %[[INPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
+ // SPIRV: %[[INPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
+ // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]]
+ // DXIL: %[[OUTPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
+ // SPIRV: %[[OUTPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
+ // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]]
+ Out[GI + 1] = Out[GI];
+}
diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 9f8e833..5931b60 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -83,6 +83,8 @@ mark_as_advanced(COMPILER_RT_BUILD_ORC)
option(COMPILER_RT_BUILD_GWP_ASAN "Build GWP-ASan, and link it into SCUDO" ON)
mark_as_advanced(COMPILER_RT_BUILD_GWP_ASAN)
option(COMPILER_RT_ENABLE_CET "Build Compiler RT with CET enabled" OFF)
+option(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME "Build asan unit tests without depending upon a just-built asan runtime" OFF)
+mark_as_advanced(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME)
option(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH "Set custom sysroot for building SCUDO standalone" OFF)
mark_as_advanced(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH)
diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt
index 9cd9c97..6d88c96 100644
--- a/compiler-rt/lib/asan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/asan/tests/CMakeLists.txt
@@ -170,11 +170,21 @@ function(add_asan_tests arch test_runtime)
set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
set(CONFIG_NAME_DYNAMIC ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig)
+ # On some platforms, unit tests can be run against the runtime that shipped
+ # with the host compiler with COMPILER_RT_TEST_STANDALONE_BUILD_LIBS=OFF.
+ # COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME=ON removes the dependency
+ # on `asan`, allowing the tests to be run independently without
+ # a newly built asan runtime.
+ set(ASAN_UNIT_TEST_DEPS asan)
+ if(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME)
+ set(ASAN_UNIT_TEST_DEPS)
+ endif()
+
# Closure to keep the values.
function(generate_asan_tests test_objects test_suite testname)
generate_compiler_rt_tests(${test_objects} ${test_suite} ${testname} ${arch}
COMPILE_DEPS ${ASAN_UNITTEST_HEADERS} ${ASAN_IGNORELIST_FILE}
- DEPS asan
+ DEPS ${ASAN_UNIT_TEST_DEPS}
KIND ${TEST_KIND}
${ARGN}
)
@@ -215,7 +225,7 @@ function(add_asan_tests arch test_runtime)
add_compiler_rt_test(AsanDynamicUnitTests "${dynamic_test_name}" "${arch}"
SUBDIR "${CONFIG_NAME_DYNAMIC}"
OBJECTS ${ASAN_INST_TEST_OBJECTS}
- DEPS asan ${ASAN_INST_TEST_OBJECTS}
+ DEPS ${ASAN_UNIT_TEST_DEPS} ${ASAN_INST_TEST_OBJECTS}
LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS} ${TARGET_LINK_FLAGS} ${DYNAMIC_LINK_FLAGS}
)
endif()
diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h
index 7fb58be..edb2699 100644
--- a/compiler-rt/lib/msan/msan.h
+++ b/compiler-rt/lib/msan/msan.h
@@ -303,6 +303,7 @@ u32 ChainOrigin(u32 id, StackTrace *stack);
const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1;
const int STACK_TRACE_TAG_FIELDS = STACK_TRACE_TAG_POISON + 1;
const int STACK_TRACE_TAG_VPTR = STACK_TRACE_TAG_FIELDS + 1;
+const int STACK_TRACE_TAG_ALLOC_PADDING = STACK_TRACE_TAG_VPTR + 1;
#define GET_MALLOC_STACK_TRACE \
UNINITIALIZED BufferedStackTrace stack; \
diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index 64df863..80608aa 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -217,25 +217,52 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
}
auto *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated));
meta->requested_size = size;
+ uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
+ void* padding_start = reinterpret_cast<char*>(allocated) + size;
+ uptr padding_size = actually_allocated_size - size;
+
+ // - With calloc(7,1), we can set the ideal tagging:
+ // bytes 0-6: initialized, origin not set (and irrelevant)
+ // byte 7: uninitialized, origin TAG_ALLOC_PADDING
+ // bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING
+ // - If we have malloc(7) and __msan_get_track_origins() > 1, the 4-byte
+ // origin granularity only allows the slightly suboptimal tagging:
+ // bytes 0-6: uninitialized, origin TAG_ALLOC
+ // byte 7: uninitialized, origin TAG_ALLOC (suboptimal)
+ // bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING
+ // - If we have malloc(7) and __msan_get_track_origins() == 1, we use a
+ // single origin bean to reduce overhead:
+ // bytes 0-6: uninitialized, origin TAG_ALLOC
+ // byte 7: uninitialized, origin TAG_ALLOC (suboptimal)
+ // bytes 8-15: uninitialized, origin TAG_ALLOC (suboptimal)
+ if (__msan_get_track_origins() && flags()->poison_in_malloc &&
+ (zero || (__msan_get_track_origins() > 1))) {
+ stack->tag = STACK_TRACE_TAG_ALLOC_PADDING;
+ Origin o2 = Origin::CreateHeapOrigin(stack);
+ __msan_set_origin(padding_start, padding_size, o2.raw_id());
+ }
+
if (zero) {
if (allocator.FromPrimary(allocated))
__msan_clear_and_unpoison(allocated, size);
else
__msan_unpoison(allocated, size); // Mem is already zeroed.
+
+ if (flags()->poison_in_malloc)
+ __msan_poison(padding_start, padding_size);
} else if (flags()->poison_in_malloc) {
- __msan_poison(allocated, size);
+ __msan_poison(allocated, actually_allocated_size);
+
if (__msan_get_track_origins()) {
stack->tag = StackTrace::TAG_ALLOC;
Origin o = Origin::CreateHeapOrigin(stack);
- __msan_set_origin(allocated, size, o.raw_id());
+ __msan_set_origin(
+ allocated,
+ __msan_get_track_origins() == 1 ? actually_allocated_size : size,
+ o.raw_id());
}
}
- uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
- // For compatibility, the allocator converted 0-sized allocations into 1 byte
- if (size == 0 && actually_allocated_size > 0 && flags()->poison_in_malloc)
- __msan_poison(allocated, 1);
-
UnpoisonParam(2);
RunMallocHooks(allocated, size);
return allocated;
@@ -255,9 +282,10 @@ void __msan::MsanDeallocate(BufferedStackTrace *stack, void *p) {
if (flags()->poison_in_free && allocator.FromPrimary(p)) {
__msan_poison(p, size);
if (__msan_get_track_origins()) {
+ uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(p);
stack->tag = StackTrace::TAG_DEALLOC;
Origin o = Origin::CreateHeapOrigin(stack);
- __msan_set_origin(p, size, o.raw_id());
+ __msan_set_origin(p, actually_allocated_size, o.raw_id());
}
}
if (MsanThread *t = GetCurrentThread()) {
diff --git a/compiler-rt/lib/msan/msan_report.cpp b/compiler-rt/lib/msan/msan_report.cpp
index 99bf81f..cd0bf67 100644
--- a/compiler-rt/lib/msan/msan_report.cpp
+++ b/compiler-rt/lib/msan/msan_report.cpp
@@ -90,6 +90,10 @@ static void DescribeOrigin(u32 id) {
Printf(" %sVirtual table ptr was destroyed%s\n", d.Origin(),
d.Default());
break;
+ case STACK_TRACE_TAG_ALLOC_PADDING:
+ Printf(" %sUninitialized value is outside of heap allocation%s\n",
+ d.Origin(), d.Default());
+ break;
default:
Printf(" %sUninitialized value was created%s\n", d.Origin(),
d.Default());
diff --git a/compiler-rt/test/msan/allocator_padding.cpp b/compiler-rt/test/msan/allocator_padding.cpp
new file mode 100644
index 0000000..72acf31
--- /dev/null
+++ b/compiler-rt/test/msan/allocator_padding.cpp
@@ -0,0 +1,94 @@
+// *** malloc: all bytes are uninitialized
+// * malloc byte 0
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 0 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 0 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// * malloc byte 6
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 6 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 6 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// This test assumes the allocator allocates 16 bytes for malloc(7). Bytes
+// 7-15 are padding.
+//
+// * malloc byte 7
+// Edge case: when the origin granularity spans both ALLOC and ALLOC_PADDING,
+// ALLOC always takes precedence.
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 7 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 7 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// Bytes 8-15 are padding
+// For track-origins=1, ALLOC is used instead of ALLOC_PADDING.
+//
+// * malloc byte 8
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 8 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 8 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// * malloc byte 15
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 15 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 15 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+// *** calloc
+// Bytes 0-6 are fully initialized, so no MSan report should happen.
+//
+// * calloc byte 0
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1
+//
+// * calloc byte 6
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1
+//
+// * calloc byte 7
+// Byte 7 is uninitialized. Unlike malloc, this is tagged as ALLOC_PADDING
+// (since the origin does not need to track bytes 4-6).
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// * calloc byte 8
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// * calloc byte 15
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+#ifdef USE_CALLOC
+ char *p = (char *)calloc(7, 1);
+#else
+ char *p = (char *)malloc(7);
+#endif
+
+ if (argc == 2) {
+ int index = atoi(argv[1]);
+
+ printf("p[%d] = %d\n", index, p[index]);
+ // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
+ // CHECK: {{#0 0x.* in main .*allocator_padding.cpp:}}[[@LINE-2]]
+ // ORIGIN-ALLOC: Uninitialized value was created by a heap allocation
+ // ORIGIN-ALLOC-PADDING: Uninitialized value is outside of heap allocation
+ free(p);
+ }
+
+ return 0;
+}
diff --git a/compiler-rt/test/msan/zero_alloc.cpp b/compiler-rt/test/msan/zero_alloc.cpp
index 1451e1e..f4cf1d8 100644
--- a/compiler-rt/test/msan/zero_alloc.cpp
+++ b/compiler-rt/test/msan/zero_alloc.cpp
@@ -1,4 +1,9 @@
-// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,DISCOUNT
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGINS
#include <stdio.h>
#include <stdlib.h>
@@ -10,6 +15,7 @@ int main(int argc, char **argv) {
printf("Content of p1 is: %d\n", *p1);
// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
// CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+ // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation
free(p1);
}
@@ -19,6 +25,7 @@ int main(int argc, char **argv) {
printf("Content of p2 is: %d\n", *p2);
// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
// CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+ // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation
free(p2);
}
@@ -28,6 +35,8 @@ int main(int argc, char **argv) {
printf("Content of p2 is: %d\n", *p3);
// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
// CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+ // DISCOUNT: Uninitialized value was created by a heap allocation
+ // ORIGINS: Uninitialized value is outside of heap allocation
free(p3);
}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 0afb295..70bb43a2 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
llvm::LogicalResult
matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
+
+ if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) {
+ auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
+ replaceWithAddrOfOrASCast(
+ rewriter, addr->getLoc(),
+ global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
+ getProgramAddressSpace(rewriter),
+ global ? global.getSymName()
+ : addr.getSymbol().getRootReference().getValue(),
+ convertType(addr.getType()), addr);
+ return mlir::success();
+ }
+
auto global = addr->getParentOfType<mlir::ModuleOp>()
.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
replaceWithAddrOfOrASCast(
@@ -3231,7 +3244,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
if (global.getDataAttr() &&
*global.getDataAttr() == cuf::DataAttribute::Constant)
- TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation");
+ g.setAddrSpace(
+ static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant));
rewriter.eraseOp(global);
return mlir::success();
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 41383fb..9bf10b5 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -353,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) {
// calculation op.
mlir::Value baseRef =
llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op)
+ .Case<fir::DeclareOp>([&](auto op) {
+ // If this declare binds a view with an underlying storage operand,
+ // treat that storage as the base reference. Otherwise, fall back
+ // to the declared memref.
+ if (auto storage = op.getStorage())
+ return storage;
+ return mlir::Value(varPtr);
+ })
.Case<hlfir::DesignateOp>([&](auto op) {
// Get the base object.
return op.getMemref();
diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir
index bbd3f9f..60cda9e 100644
--- a/flang/test/Fir/CUDA/cuda-code-gen.mlir
+++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir
@@ -284,3 +284,31 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e
// CHECK-LABEL: llvm.func @_QQxxx()
// CHECK: llvm.alloca %{{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
// CHECK-NOT: llvm.call @_FortranACUFAllocDescriptor
+
+// -----
+
+module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
+ gpu.module @cuda_device_mod {
+ fir.global @_QMkernelsEinitial_val {data_attr = #cuf.cuda<constant>} : i32 {
+ %0 = fir.zero_bits i32
+ fir.has_value %0 : i32
+ }
+ gpu.func @_QMkernelsPassign(%arg0: !fir.ref<!fir.array<?xi32>>) kernel {
+ %c-1 = arith.constant -1 : index
+ %c1_i32 = arith.constant 1 : i32
+ %0 = arith.constant 1 : i32
+ %1 = arith.addi %0, %c1_i32 : i32
+ %2 = fir.address_of(@_QMkernelsEinitial_val) : !fir.ref<i32>
+ %4 = fir.load %2 : !fir.ref<i32>
+ %5 = fir.convert %1 : (i32) -> i64
+ %6 = fircg.ext_array_coor %arg0(%c-1)<%5> : (!fir.ref<!fir.array<?xi32>>, index, i64) -> !fir.ref<i32>
+ fir.store %4 to %6 : !fir.ref<i32>
+ gpu.return
+ }
+ }
+}
+
+// CHECK: llvm.mlir.global external @_QMkernelsEinitial_val() {addr_space = 4 : i32} : i32
+// CHECK-LABEL: gpu.func @_QMkernelsPassign
+// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMkernelsEinitial_val : !llvm.ptr<4>
+// CHECK: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<4> to !llvm.ptr
diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir
new file mode 100644
index 0000000..fabfe4c
--- /dev/null
+++ b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir
@@ -0,0 +1,24 @@
+// Use --mlir-disable-threading so that the diagnostic printing is serialized.
+// RUN: fir-opt %s -pass-pipeline='builtin.module(test-fir-openacc-interfaces)' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s
+
+module {
+ // Build a scalar view via fir.declare with a storage operand into an array of i8
+ func.func @_QPdeclare_with_storage_is_nonscalar() {
+ %c0 = arith.constant 0 : index
+ %arr = fir.alloca !fir.array<4xi8>
+ %elem_i8 = fir.coordinate_of %arr, %c0 : (!fir.ref<!fir.array<4xi8>>, index) -> !fir.ref<i8>
+ %elem_f32 = fir.convert %elem_i8 : (!fir.ref<i8>) -> !fir.ref<f32>
+ %view = fir.declare %elem_f32 storage(%arr[0]) {uniq_name = "_QFpi"}
+ : (!fir.ref<f32>, !fir.ref<!fir.array<4xi8>>) -> !fir.ref<f32>
+ // Force interface query through an acc op that prints type category
+ %cp = acc.copyin varPtr(%view : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false}
+ acc.enter_data dataOperands(%cp : !fir.ref<f32>)
+ return
+ }
+
+ // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false}
+ // CHECK: Pointer-like and Mappable: !fir.ref<f32>
+ // CHECK: Type category: array
+}
+
+
diff --git a/lldb/cmake/modules/LLDBFramework.cmake b/lldb/cmake/modules/LLDBFramework.cmake
index c6f00ed..23d9d49 100644
--- a/lldb/cmake/modules/LLDBFramework.cmake
+++ b/lldb/cmake/modules/LLDBFramework.cmake
@@ -68,8 +68,6 @@ if(NOT APPLE_EMBEDDED)
)
endif()
-find_program(unifdef_EXECUTABLE unifdef)
-
# Wrap output in a target, so lldb-framework can depend on it.
add_custom_target(liblldb-resource-headers DEPENDS lldb-sbapi-dwarf-enums ${lldb_staged_headers})
set_target_properties(liblldb-resource-headers PROPERTIES FOLDER "LLDB/Resources")
diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h
index fec9fde..1a7db8f 100644
--- a/lldb/include/lldb/lldb-enumerations.h
+++ b/lldb/include/lldb/lldb-enumerations.h
@@ -130,6 +130,8 @@ FLAGS_ENUM(LaunchFlags){
eLaunchFlagInheritTCCFromParent =
(1u << 12), ///< Don't make the inferior responsible for its own TCC
///< permissions but instead inherit them from its parent.
+ eLaunchFlagMemoryTagging =
+ (1u << 13), ///< Launch process with memory tagging explicitly enabled.
};
/// Thread Run Modes.
diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
index 53e991a..1a2860a 100644
--- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
+++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
@@ -1,3 +1,4 @@
+from abc import ABC, abstractmethod
import ctypes
import errno
import io
@@ -5,6 +6,7 @@ import threading
import socket
import traceback
from lldbsuite.support import seven
+from typing import Optional, List, Tuple
def checksum(message):
@@ -86,7 +88,7 @@ class MockGDBServerResponder:
handles any packet not recognized in the common packet handling code.
"""
- registerCount = 40
+ registerCount: int = 40
class RESPONSE_DISCONNECT:
pass
@@ -95,7 +97,7 @@ class MockGDBServerResponder:
pass
def __init__(self):
- self.packetLog = []
+ self.packetLog: List[str] = []
def respond(self, packet):
"""
@@ -241,7 +243,7 @@ class MockGDBServerResponder:
def qHostInfo(self):
return "ptrsize:8;endian:little;"
- def qEcho(self):
+ def qEcho(self, num: int):
return "E04"
def qQueryGDBServer(self):
@@ -262,10 +264,10 @@ class MockGDBServerResponder:
def D(self, packet):
return "OK"
- def readRegisters(self):
+ def readRegisters(self) -> str:
return "00000000" * self.registerCount
- def readRegister(self, register):
+ def readRegister(self, register: int) -> str:
return "00000000"
def writeRegisters(self, registers_hex):
@@ -305,7 +307,9 @@ class MockGDBServerResponder:
# SIGINT is 2, return type is 2 digit hex string
return "S02"
- def qXferRead(self, obj, annex, offset, length):
+ def qXferRead(
+ self, obj: str, annex: str, offset: int, length: int
+ ) -> Tuple[Optional[str], bool]:
return None, False
def _qXferResponse(self, data, has_more):
@@ -373,15 +377,17 @@ class MockGDBServerResponder:
pass
-class ServerChannel:
+class ServerChannel(ABC):
"""
A wrapper class for TCP or pty-based server.
"""
- def get_connect_address(self):
+ @abstractmethod
+ def get_connect_address(self) -> str:
"""Get address for the client to connect to."""
- def get_connect_url(self):
+ @abstractmethod
+ def get_connect_url(self) -> str:
"""Get URL suitable for process connect command."""
def close_server(self):
@@ -393,10 +399,12 @@ class ServerChannel:
def close_connection(self):
"""Close all resources used by the accepted connection."""
- def recv(self):
+ @abstractmethod
+ def recv(self) -> bytes:
"""Receive a data packet from the connected client."""
- def sendall(self, data):
+ @abstractmethod
+ def sendall(self, data: bytes) -> None:
"""Send the data to the connected client."""
@@ -427,11 +435,11 @@ class ServerSocket(ServerChannel):
self._connection.close()
self._connection = None
- def recv(self):
+ def recv(self) -> bytes:
assert self._connection is not None
return self._connection.recv(4096)
- def sendall(self, data):
+ def sendall(self, data: bytes) -> None:
assert self._connection is not None
return self._connection.sendall(data)
@@ -443,10 +451,10 @@ class TCPServerSocket(ServerSocket):
)[0]
super().__init__(family, type, proto, addr)
- def get_connect_address(self):
+ def get_connect_address(self) -> str:
return "[{}]:{}".format(*self._server_socket.getsockname())
- def get_connect_url(self):
+ def get_connect_url(self) -> str:
return "connect://" + self.get_connect_address()
@@ -454,10 +462,10 @@ class UnixServerSocket(ServerSocket):
def __init__(self, addr):
super().__init__(socket.AF_UNIX, socket.SOCK_STREAM, 0, addr)
- def get_connect_address(self):
+ def get_connect_address(self) -> str:
return self._server_socket.getsockname()
- def get_connect_url(self):
+ def get_connect_url(self) -> str:
return "unix-connect://" + self.get_connect_address()
@@ -471,7 +479,7 @@ class PtyServerSocket(ServerChannel):
self._primary = io.FileIO(primary, "r+b")
self._secondary = io.FileIO(secondary, "r+b")
- def get_connect_address(self):
+ def get_connect_address(self) -> str:
libc = ctypes.CDLL(None)
libc.ptsname.argtypes = (ctypes.c_int,)
libc.ptsname.restype = ctypes.c_char_p
@@ -484,7 +492,7 @@ class PtyServerSocket(ServerChannel):
self._secondary.close()
self._primary.close()
- def recv(self):
+ def recv(self) -> bytes:
try:
return self._primary.read(4096)
except OSError as e:
@@ -493,8 +501,8 @@ class PtyServerSocket(ServerChannel):
return b""
raise
- def sendall(self, data):
- return self._primary.write(data)
+ def sendall(self, data: bytes) -> None:
+ self._primary.write(data)
class MockGDBServer:
@@ -527,18 +535,21 @@ class MockGDBServer:
self._thread.join()
self._thread = None
- def get_connect_address(self):
+ def get_connect_address(self) -> str:
+ assert self._socket is not None
return self._socket.get_connect_address()
- def get_connect_url(self):
+ def get_connect_url(self) -> str:
+ assert self._socket is not None
return self._socket.get_connect_url()
def run(self):
+ assert self._socket is not None
# For testing purposes, we only need to worry about one client
# connecting just one time.
try:
self._socket.accept()
- except:
+ except Exception:
traceback.print_exc()
return
self._shouldSendAck = True
@@ -553,7 +564,7 @@ class MockGDBServer:
self._receive(data)
except self.TerminateConnectionException:
pass
- except Exception as e:
+ except Exception:
print(
"An exception happened when receiving the response from the gdb server. Closing the client..."
)
@@ -586,7 +597,9 @@ class MockGDBServer:
Once a complete packet is found at the front of self._receivedData,
its data is removed form self._receivedData.
"""
+ assert self._receivedData is not None
data = self._receivedData
+ assert self._receivedDataOffset is not None
i = self._receivedDataOffset
data_len = len(data)
if data_len == 0:
@@ -639,10 +652,13 @@ class MockGDBServer:
self._receivedDataOffset = 0
return packet
- def _sendPacket(self, packet):
- self._socket.sendall(seven.bitcast_to_bytes(frame_packet(packet)))
+ def _sendPacket(self, packet: str):
+ assert self._socket is not None
+ framed_packet = seven.bitcast_to_bytes(frame_packet(packet))
+ self._socket.sendall(framed_packet)
def _handlePacket(self, packet):
+ assert self._socket is not None
if packet is self.PACKET_ACK:
# Ignore ACKs from the client. For the future, we can consider
# adding validation code to make sure the client only sends ACKs
diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp
index 21d94d6..8ae20bd 100644
--- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp
+++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp
@@ -127,6 +127,10 @@ Status CommandOptionsProcessLaunch::SetOptionValue(
break;
}
+ case 'M':
+ launch_info.GetFlags().Set(eLaunchFlagMemoryTagging);
+ break;
+
case 'c':
if (!option_arg.empty())
launch_info.SetShell(FileSpec(option_arg));
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 595b3d0..a9f054e 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -1173,6 +1173,11 @@ let Command = "process launch" in {
Arg<"Boolean">,
Desc<"Set whether to shell expand arguments to the process when "
"launching.">;
+ def process_launch_memory_tagging
+ : Option<"memory-tagging", "M">,
+ Desc<"Set whether to explicitly enable memory tagging when launching "
+ "the process. Requires hardware support. "
+ "(Only supported on Darwin.)">;
}
let Command = "process attach" in {
diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm
index 3c1d117..7120892 100644
--- a/lldb/source/Host/macosx/objcxx/Host.mm
+++ b/lldb/source/Host/macosx/objcxx/Host.mm
@@ -1210,6 +1210,39 @@ static Status LaunchProcessPosixSpawn(const char *exe_path,
}
}
+ if (launch_info.GetFlags().Test(eLaunchFlagMemoryTagging)) {
+ // The following function configures the spawn attributes to launch the
+ // process with memory tagging explicitly enabled. We look it up
+ // dynamically since it is only available on newer OS. Does nothing on
+ // hardware which does not support MTE.
+ //
+ // int posix_spawnattr_set_use_sec_transition_shims_np(
+ // posix_spawnattr_t *attr, uint32_t flags);
+ //
+ using posix_spawnattr_set_use_sec_transition_shims_np_t =
+ int (*)(posix_spawnattr_t *attr, uint32_t flags);
+ auto posix_spawnattr_set_use_sec_transition_shims_np_fn =
+ (posix_spawnattr_set_use_sec_transition_shims_np_t)dlsym(
+ RTLD_DEFAULT, "posix_spawnattr_set_use_sec_transition_shims_np");
+ if (posix_spawnattr_set_use_sec_transition_shims_np_fn) {
+ error =
+ Status(posix_spawnattr_set_use_sec_transition_shims_np_fn(&attr, 0),
+ eErrorTypePOSIX);
+ if (error.Fail()) {
+ LLDB_LOG(log,
+ "error: {0}, "
+ "posix_spawnattr_set_use_sec_transition_shims_np(&attr, 0)",
+ error);
+ return error;
+ }
+ } else {
+ LLDB_LOG(log,
+ "error: posix_spawnattr_set_use_sec_transition_shims_np not "
+ "available",
+ error);
+ }
+ }
+
// Don't set the binpref if a shell was provided. After all, that's only
// going to affect what version of the shell is launched, not what fork of
// the binary is launched. We insert "arch --arch <ARCH> as part of the
diff --git a/lldb/source/Host/windows/MainLoopWindows.cpp b/lldb/source/Host/windows/MainLoopWindows.cpp
index c0b1079..9b7df10 100644
--- a/lldb/source/Host/windows/MainLoopWindows.cpp
+++ b/lldb/source/Host/windows/MainLoopWindows.cpp
@@ -55,11 +55,7 @@ public:
if (m_monitor_thread.joinable()) {
m_stopped = true;
SetEvent(m_ready);
- // Keep trying to cancel ReadFile() until the thread exits.
- do {
- CancelIoEx(m_handle, /*lpOverlapped=*/NULL);
- } while (WaitForSingleObject(m_monitor_thread.native_handle(), 1) ==
- WAIT_TIMEOUT);
+ CancelIoEx(m_handle, /*lpOverlapped=*/NULL);
m_monitor_thread.join();
}
CloseHandle(m_event);
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
index cc0c9e7..6d8f41a 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
@@ -14,6 +14,7 @@
#include "lldb/Utility/LLDBLog.h"
#include "lldb/Utility/Log.h"
#include "lldb/lldb-enumerations.h"
+#include "llvm/ADT/Sequence.h"
using namespace lldb;
using namespace lldb_private;
@@ -266,22 +267,47 @@ bool ClassDescriptorV2::method_list_t::Read(Process *process,
return true;
}
-bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr,
- lldb::addr_t relative_selector_base_addr,
- bool is_small, bool has_direct_sel) {
- size_t ptr_size = process->GetAddressByteSize();
- size_t size = GetSize(process, is_small);
+llvm::SmallVector<ClassDescriptorV2::method_t, 0>
+ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses,
+ lldb::addr_t relative_selector_base_addr,
+ bool is_small, bool has_direct_sel) const {
+ lldb_private::Process *process = m_runtime.GetProcess();
+ if (!process)
+ return {};
- DataBufferHeap buffer(size, '\0');
- Status error;
+ const size_t size = method_t::GetSize(process, is_small);
+ const size_t num_methods = addresses.size();
- process->ReadMemory(addr, buffer.GetBytes(), size, error);
- if (error.Fail()) {
- return false;
+ llvm::SmallVector<uint8_t, 0> buffer(num_methods * size, 0);
+ llvm::DenseSet<uint32_t> failed_indices;
+
+ for (auto [idx, addr] : llvm::enumerate(addresses)) {
+ Status error;
+ process->ReadMemory(addr, buffer.data() + idx * size, size, error);
+ if (error.Fail())
+ failed_indices.insert(idx);
}
- DataExtractor extractor(buffer.GetBytes(), size, process->GetByteOrder(),
- ptr_size);
+ llvm::SmallVector<method_t, 0> methods;
+ methods.reserve(num_methods);
+ for (auto [idx, addr] : llvm::enumerate(addresses)) {
+ if (failed_indices.contains(idx))
+ continue;
+ DataExtractor extractor(buffer.data() + idx * size, size,
+ process->GetByteOrder(),
+ process->GetAddressByteSize());
+ methods.push_back(method_t());
+ methods.back().Read(extractor, process, addr, relative_selector_base_addr,
+ is_small, has_direct_sel);
+ }
+
+ return methods;
+}
+
+bool ClassDescriptorV2::method_t::Read(DataExtractor &extractor,
+ Process *process, lldb::addr_t addr,
+ lldb::addr_t relative_selector_base_addr,
+ bool is_small, bool has_direct_sel) {
lldb::offset_t cursor = 0;
if (is_small) {
@@ -291,11 +317,11 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr,
m_name_ptr = addr + nameref_offset;
+ Status error;
if (!has_direct_sel) {
// The SEL offset points to a SELRef. We need to dereference twice.
- m_name_ptr = process->ReadUnsignedIntegerFromMemory(m_name_ptr, ptr_size,
- 0, error);
- if (!error.Success())
+ m_name_ptr = process->ReadPointerFromMemory(m_name_ptr, error);
+ if (error.Fail())
return false;
} else if (relative_selector_base_addr != LLDB_INVALID_ADDRESS) {
m_name_ptr = relative_selector_base_addr + nameref_offset;
@@ -308,13 +334,13 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr,
m_imp_ptr = extractor.GetAddress_unchecked(&cursor);
}
+ Status error;
process->ReadCStringFromMemory(m_name_ptr, m_name, error);
- if (error.Fail()) {
+ if (error.Fail())
return false;
- }
process->ReadCStringFromMemory(m_types_ptr, m_types, error);
- return !error.Fail();
+ return error.Success();
}
bool ClassDescriptorV2::ivar_list_t::Read(Process *process, lldb::addr_t addr) {
@@ -447,17 +473,19 @@ ClassDescriptorV2::GetMethodList(Process *process,
bool ClassDescriptorV2::ProcessMethodList(
std::function<bool(const char *, const char *)> const &instance_method_func,
ClassDescriptorV2::method_list_t &method_list) const {
- lldb_private::Process *process = m_runtime.GetProcess();
- auto method = std::make_unique<method_t>();
- lldb::addr_t relative_selector_base_addr =
- m_runtime.GetRelativeSelectorBaseAddr();
- for (uint32_t i = 0, e = method_list.m_count; i < e; ++i) {
- method->Read(process, method_list.m_first_ptr + (i * method_list.m_entsize),
- relative_selector_base_addr, method_list.m_is_small,
- method_list.m_has_direct_selector);
- if (instance_method_func(method->m_name.c_str(), method->m_types.c_str()))
+ auto idx_to_method_addr = [&](uint32_t idx) {
+ return method_list.m_first_ptr + (idx * method_list.m_entsize);
+ };
+ llvm::SmallVector<addr_t> addresses = llvm::to_vector(llvm::map_range(
+ llvm::seq<uint32_t>(method_list.m_count), idx_to_method_addr));
+
+ llvm::SmallVector<method_t, 0> methods =
+ ReadMethods(addresses, m_runtime.GetRelativeSelectorBaseAddr(),
+ method_list.m_is_small, method_list.m_has_direct_selector);
+
+ for (const auto &method : methods)
+ if (instance_method_func(method.m_name.c_str(), method.m_types.c_str()))
break;
- }
return true;
}
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
index 920a5eb..78b3311 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
@@ -172,11 +172,16 @@ private:
+ field_size; // IMP imp;
}
- bool Read(Process *process, lldb::addr_t addr,
+ bool Read(DataExtractor &extractor, Process *process, lldb::addr_t addr,
lldb::addr_t relative_selector_base_addr, bool is_small,
bool has_direct_sel);
};
+ llvm::SmallVector<method_t, 0>
+ ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses,
+ lldb::addr_t relative_selector_base_addr, bool is_small,
+ bool has_direct_sel) const;
+
struct ivar_list_t {
uint32_t m_entsize;
uint32_t m_count;
diff --git a/lldb/test/API/macosx/mte/Makefile b/lldb/test/API/macosx/mte/Makefile
index cb20942..d614e0f 100644
--- a/lldb/test/API/macosx/mte/Makefile
+++ b/lldb/test/API/macosx/mte/Makefile
@@ -1,12 +1,15 @@
C_SOURCES := main.c
-EXE := uaf_mte
+EXE := uaf
-all: uaf_mte sign
+binary-plain: uaf
+binary-entitled: uaf sign
+
+all: binary-entitled
include Makefile.rules
-sign: mte-entitlements.plist uaf_mte
+sign: mte-entitlements.plist uaf
ifeq ($(OS),Darwin)
codesign -s - -f --entitlements $^
endif
diff --git a/lldb/test/API/macosx/mte/TestDarwinMTE.py b/lldb/test/API/macosx/mte/TestDarwinMTE.py
index 489e24a..a70b4b4 100644
--- a/lldb/test/API/macosx/mte/TestDarwinMTE.py
+++ b/lldb/test/API/macosx/mte/TestDarwinMTE.py
@@ -7,13 +7,25 @@ from lldbsuite.test.lldbtest import *
from lldbsuite.test import lldbutil
import lldbsuite.test.cpu_feature as cpu_feature
-exe_name = "uaf_mte" # Must match Makefile
+exe_name = "uaf" # Must match Makefile
class TestDarwinMTE(TestBase):
NO_DEBUG_INFO_TESTCASE = True
@skipUnlessFeature(cpu_feature.AArch64.MTE)
+ def test_process_launch_memory_tagging(self):
+ self.build(make_targets=["binary-plain"])
+ self.createTestTarget(self.getBuildArtifact(exe_name))
+
+ self.expect("process launch", substrs=["exited with status = 0"])
+
+ self.expect(
+ "process launch --memory-tagging",
+ substrs=["stopped", "stop reason = EXC_ARM_MTE_TAG_FAULT"],
+ )
+
+ @skipUnlessFeature(cpu_feature.AArch64.MTE)
def test_tag_fault(self):
self.build()
exe = self.getBuildArtifact(exe_name)
diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
index f06e7ce..bb7f3be 100644
--- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
+++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
@@ -32,8 +32,11 @@ bool IsAnnotationOK(const GlobalVariable &GV);
/// profile information and provides methods to operate on them.
class StaticDataProfileInfo {
public:
- /// Accummulate the profile count of a constant that will be lowered to static
- /// data sections.
+ /// A constant is tracked only if the following conditions are met.
+ /// 1) It has local (i.e., private or internal) linkage.
+ // 2) Its data kind is one of {.rodata, .data, .bss, .data.rel.ro}.
+ // 3) It's eligible for section prefix annotation. See `AnnotationKind`
+ // above for ineligible reasons.
DenseMap<const Constant *, uint64_t> ConstantProfileCounts;
/// Keeps track of the constants that are seen at least once without profile
@@ -44,6 +47,22 @@ public:
LLVM_ABI std::optional<uint64_t>
getConstantProfileCount(const Constant *C) const;
+ /// Use signed enums for enum value comparison, and make 'LukewarmOrUnknown'
+ /// as 0 so any accidentally uninitialized value will default to unknown.
+ enum class StaticDataHotness : int8_t {
+ Cold = -1,
+ LukewarmOrUnknown = 0,
+ Hot = 1,
+ };
+
+ /// Return the hotness of the constant \p C based on its profile count \p
+ /// Count.
+ LLVM_ABI StaticDataHotness getConstantHotnessUsingProfileCount(
+ const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const;
+
+ /// Return the string representation of the hotness enum \p Hotness.
+ LLVM_ABI StringRef hotnessToStr(StaticDataHotness Hotness) const;
+
public:
StaticDataProfileInfo() = default;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index 77ce052..2c59a52 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -43,8 +43,14 @@ public:
///
/// \param LDCS Flag to indicate whether we should load the call site
/// information from DWARF `DW_TAG_call_site` entries
- DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false)
- : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {}
+ ///
+ /// \param MachO Flag to indicate if the object file is mach-o (Apple's
+ /// executable format). Apple has some compile unit attributes that look like
+ /// split DWARF, but they aren't and they can cause warnins to be emitted
+ /// about missing DWO files.
+ DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false,
+ bool MachO = false)
+ : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS), IsMachO(MachO) {}
/// Extract the DWARF from the supplied object file and convert it into the
/// Gsym format in the GsymCreator object that is passed in. Returns an
@@ -97,6 +103,7 @@ private:
DWARFContext &DICtx;
GsymCreator &Gsym;
bool LoadDwarfCallSites;
+ bool IsMachO;
friend class DwarfTransformerTest;
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h
index 596cc18..b0197f0 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h
@@ -13,7 +13,6 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H
#define LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H
-#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
#include "llvm/Support/Compiler.h"
@@ -54,9 +53,6 @@ inline size_t numDeallocActions(const AllocActions &AAs) {
AAs, [](const AllocActionCallPair &P) { return !!P.Dealloc; });
}
-using OnRunFinalizeActionsCompleteFn =
- unique_function<void(Expected<std::vector<WrapperFunctionCall>>)>;
-
/// Run finalize actions.
///
/// If any finalize action fails then the corresponding dealloc actions will be
@@ -67,16 +63,13 @@ using OnRunFinalizeActionsCompleteFn =
/// be returned. The dealloc actions should be run by calling
/// runDeallocationActions. If this function succeeds then the AA argument will
/// be cleared before the function returns.
-LLVM_ABI void runFinalizeActions(AllocActions &AAs,
- OnRunFinalizeActionsCompleteFn OnComplete);
-
-using OnRunDeallocActionsComeleteFn = unique_function<void(Error)>;
+LLVM_ABI Expected<std::vector<WrapperFunctionCall>>
+runFinalizeActions(AllocActions &AAs);
/// Run deallocation actions.
/// Dealloc actions will be run in reverse order (from last element of DAs to
/// first).
-LLVM_ABI void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs,
- OnRunDeallocActionsComeleteFn OnComplete);
+LLVM_ABI Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs);
using SPSAllocActionCallPair =
SPSTuple<SPSWrapperFunctionCall, SPSWrapperFunctionCall>;
diff --git a/llvm/include/llvm/TableGen/CodeGenHelpers.h b/llvm/include/llvm/TableGen/CodeGenHelpers.h
index 5b823db..ce91f62 100644
--- a/llvm/include/llvm/TableGen/CodeGenHelpers.h
+++ b/llvm/include/llvm/TableGen/CodeGenHelpers.h
@@ -34,6 +34,21 @@ private:
raw_ostream &OS;
};
+// Simple RAII helper for emitting header include guard (ifndef-define-endif).
+class IncludeGuardEmitter {
+public:
+ IncludeGuardEmitter(raw_ostream &OS, StringRef Name)
+ : Name(Name.str()), OS(OS) {
+ OS << "#ifndef " << Name << "\n"
+ << "#define " << Name << "\n\n";
+ }
+ ~IncludeGuardEmitter() { OS << "\n#endif // " << Name << "\n"; }
+
+private:
+ std::string Name;
+ raw_ostream &OS;
+};
+
// Simple RAII helper for emitting namespace scope. Name can be a single
// namespace (empty for anonymous namespace) or nested namespace.
class NamespaceEmitter {
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4bafd3f..a64b93d 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6417,8 +6417,18 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S,
case scSequentialUMinExpr:
return GetGCDMultiple(cast<SCEVNAryExpr>(S));
case scUnknown: {
- // ask ValueTracking for known bits
+ // Ask ValueTracking for known bits. SCEVUnknown only become available at
+ // the point their underlying IR instruction has been defined. If CtxI was
+ // not provided, use:
+ // * the first instruction in the entry block if it is an argument
+ // * the instruction itself otherwise.
const SCEVUnknown *U = cast<SCEVUnknown>(S);
+ if (!CtxI) {
+ if (isa<Argument>(U->getValue()))
+ CtxI = &*F.getEntryBlock().begin();
+ else if (auto *I = dyn_cast<Instruction>(U->getValue()))
+ CtxI = I;
+ }
unsigned Known =
computeKnownBits(U->getValue(), getDataLayout(), &AC, CtxI, &DT)
.countMinTrailingZeros();
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index 1f751ee..e7f0b2c 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -60,6 +60,36 @@ void StaticDataProfileInfo::addConstantProfileCount(
OriginalCount = getInstrMaxCountValue();
}
+StaticDataProfileInfo::StaticDataHotness
+StaticDataProfileInfo::getConstantHotnessUsingProfileCount(
+ const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const {
+ // The accummulated counter shows the constant is hot. Return enum 'hot'
+ // whether this variable is seen by unprofiled functions or not.
+ if (PSI->isHotCount(Count))
+ return StaticDataHotness::Hot;
+ // The constant is not hot, and seen by unprofiled functions. We don't want to
+ // assign it to unlikely sections, even if the counter says 'cold'. So return
+ // enum 'LukewarmOrUnknown'.
+ if (ConstantWithoutCounts.count(C))
+ return StaticDataHotness::LukewarmOrUnknown;
+ // The accummulated counter shows the constant is cold so return enum 'cold'.
+ if (PSI->isColdCount(Count))
+ return StaticDataHotness::Cold;
+
+ return StaticDataHotness::LukewarmOrUnknown;
+}
+
+StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const {
+ switch (Hotness) {
+ case StaticDataHotness::Cold:
+ return "unlikely";
+ case StaticDataHotness::Hot:
+ return "hot";
+ default:
+ return "";
+ }
+}
+
std::optional<uint64_t>
StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
auto I = ConstantProfileCounts.find(C);
@@ -70,23 +100,10 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
StringRef StaticDataProfileInfo::getConstantSectionPrefix(
const Constant *C, const ProfileSummaryInfo *PSI) const {
- auto Count = getConstantProfileCount(C);
+ std::optional<uint64_t> Count = getConstantProfileCount(C);
if (!Count)
return "";
- // The accummulated counter shows the constant is hot. Return 'hot' whether
- // this variable is seen by unprofiled functions or not.
- if (PSI->isHotCount(*Count))
- return "hot";
- // The constant is not hot, and seen by unprofiled functions. We don't want to
- // assign it to unlikely sections, even if the counter says 'cold'. So return
- // an empty prefix before checking whether the counter is cold.
- if (ConstantWithoutCounts.count(C))
- return "";
- // The accummulated counter shows the constant is cold. Return 'unlikely'.
- if (PSI->isColdCount(*Count))
- return "unlikely";
- // The counter says lukewarm. Return an empty prefix.
- return "";
+ return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count));
}
bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 0e38017..d2f2c3e 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -661,7 +661,10 @@ void LiveIntervals::extendToIndices(LiveRange &LR,
void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
SmallVectorImpl<SlotIndex> *EndPoints) {
LiveQueryResult LRQ = LR.Query(Kill);
- VNInfo *VNI = LRQ.valueOutOrDead();
+ // LR may have liveness reachable from early clobber slot, which may be
+ // only live-in instead of live-out of the instruction.
+ // For example, LR =[1r, 3r), Kill = 3e, we have to prune [3e, 3r) of LR.
+ VNInfo *VNI = LRQ.valueOutOrDead() ? LRQ.valueOutOrDead() : LRQ.valueIn();
if (!VNI)
return;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 787a81a..358e060 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -658,13 +658,13 @@ namespace {
bool InexpensiveOnly = false,
std::optional<EVT> OutVT = std::nullopt);
SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
- SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
- SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
- SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
+ SDValue buildRsqrtEstimate(SDValue Op);
+ SDValue buildSqrtEstimate(SDValue Op);
+ SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal);
+ bool Reciprocal);
SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal);
+ bool Reciprocal);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -18590,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV =
- buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV =
- buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -18635,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
SDValue AAZ =
DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
- if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
// Estimate creation failed. Clean up speculatively created nodes.
@@ -18645,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// We found a FSQRT, so try to make this fold:
// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
- if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
+ if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
AddToWorklist(Div.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
@@ -18742,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
return SDValue();
// FSQRT nodes have flags that propagate to the created nodes.
+ SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
// transform the fdiv, we may produce a sub-optimal estimate sequence
// because the reciprocal calculation may not have to filter out a
// 0.0 input.
- return buildSqrtEstimate(N0, Flags);
+ return buildSqrtEstimate(N0);
}
/// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -29743,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal) {
+ unsigned Iterations, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
- HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
}
// If non-reciprocal square root is requested, multiply the result by Arg.
if (!Reciprocal)
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
return Est;
}
@@ -29775,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal) {
+ unsigned Iterations, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -29789,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// Newton iterations for reciprocal square root:
// E = (E * -0.5) * ((A * E) * E + -3.0)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
- SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
- SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+ SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est);
+ SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est);
+ SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree);
// When calculating a square root at the last iteration build:
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -29799,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
SDValue LHS;
if (Reciprocal || (i + 1) < Iterations) {
// RSQRT: LHS = (E * -0.5)
- LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
} else {
// SQRT: LHS = (A * E) * -0.5
- LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf);
}
- Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS);
}
return Est;
@@ -29814,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
/// Op can be zero.
-SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
- bool Reciprocal) {
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) {
if (LegalDAG)
return SDValue();
@@ -29843,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (Iterations > 0)
Est = UseOneConstNR
- ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
- : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal);
if (!Reciprocal) {
SDLoc DL(Op);
// Try the target specific test first.
@@ -29862,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
return SDValue();
}
-SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
- return buildSqrtEstimateImpl(Op, Flags, true);
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) {
+ return buildSqrtEstimateImpl(Op, true);
}
-SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
- return buildSqrtEstimateImpl(Op, Flags, false);
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) {
+ return buildSqrtEstimateImpl(Op, false);
}
/// Return true if there is any possibility that the two addresses overlap.
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 7a0256f..fa39603 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -338,9 +338,13 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
if (FilePath.empty()) {
// If we had a DW_AT_decl_file, but got no file then we need to emit a
// warning.
+ const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+ Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
+ // Check if there is no DW_AT_decl_line attribute, and don't report an
+ // error if it isn't there.
+ if (DwarfFileIdx == UINT32_MAX)
+ return;
Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) {
- const uint64_t DwarfFileIdx = dwarf::toUnsigned(
- Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
OS << "error: function DIE at " << HEX32(Die.getOffset())
<< " has an invalid file index " << DwarfFileIdx
<< " in its DW_AT_decl_file attribute, unable to create a single "
@@ -629,6 +633,10 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
size_t NumBefore = Gsym.getNumFunctionInfos();
auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
+ // Apple uses DW_AT_GNU_dwo_id for things other than split DWARF.
+ if (IsMachO)
+ return ReturnDie;
+
if (DwarfUnit.getDWOId()) {
DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
if (!DWOCU->isDWOUnit())
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index 5b3c05e..6c7e27e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -260,22 +260,17 @@ public:
}
// Run finalization actions.
- using WrapperFunctionCall = orc::shared::WrapperFunctionCall;
- runFinalizeActions(
- G->allocActions(),
- [this, OnFinalized = std::move(OnFinalized)](
- Expected<std::vector<WrapperFunctionCall>> DeallocActions) mutable {
- completeFinalization(std::move(OnFinalized),
- std::move(DeallocActions));
- });
- }
+ auto DeallocActions = runFinalizeActions(G->allocActions());
+ if (!DeallocActions) {
+ OnFinalized(DeallocActions.takeError());
+ return;
+ }
- void abandon(OnAbandonedFunction OnAbandoned) override {
- Error Err = Error::success();
- if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
- if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ // Release the finalize segments slab.
+ if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
+ OnFinalized(errorCodeToError(EC));
+ return;
+ }
#ifndef NDEBUG
// Set 'G' to null to flag that we've been successfully finalized.
@@ -284,22 +279,17 @@ public:
G = nullptr;
#endif
- OnAbandoned(std::move(Err));
+ // Continue with finalized allocation.
+ OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
+ std::move(*DeallocActions)));
}
-private:
- void completeFinalization(
- OnFinalizedFunction OnFinalized,
- Expected<std::vector<orc::shared::WrapperFunctionCall>> DeallocActions) {
-
- if (!DeallocActions)
- return OnFinalized(DeallocActions.takeError());
-
- // Release the finalize segments slab.
- if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
- OnFinalized(errorCodeToError(EC));
- return;
- }
+ void abandon(OnAbandonedFunction OnAbandoned) override {
+ Error Err = Error::success();
+ if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
#ifndef NDEBUG
// Set 'G' to null to flag that we've been successfully finalized.
@@ -308,11 +298,10 @@ private:
G = nullptr;
#endif
- // Continue with finalized allocation.
- OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
- std::move(*DeallocActions)));
+ OnAbandoned(std::move(Err));
}
+private:
Error applyProtections() {
for (auto &KV : BL.segments()) {
const auto &AG = KV.first;
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index 7b327af..7e606c6a 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -91,19 +91,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
sys::Memory::InvalidateInstructionCache(Base.toPtr<void *>(), Size);
}
- std::vector<shared::WrapperFunctionCall> DeinitializeActions;
- {
- std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P;
- auto F = P.get_future();
- shared::runFinalizeActions(
- AI.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) {
- P.set_value(std::move(R));
- });
- if (auto DeinitializeActionsOrErr = F.get())
- DeinitializeActions = std::move(*DeinitializeActionsOrErr);
- else
- return OnInitialized(DeinitializeActionsOrErr.takeError());
- }
+ auto DeinitializeActions = shared::runFinalizeActions(AI.Actions);
+ if (!DeinitializeActions)
+ return OnInitialized(DeinitializeActions.takeError());
{
std::lock_guard<std::mutex> Lock(Mutex);
@@ -111,7 +101,7 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
// This is the maximum range whose permission have been possibly modified
auto &Alloc = Allocations[MinAddr];
Alloc.Size = MaxAddr - MinAddr;
- Alloc.DeinitializationActions = std::move(DeinitializeActions);
+ Alloc.DeinitializationActions = std::move(*DeinitializeActions);
Reservations[AI.MappingBase.toPtr<void *>()].Allocations.push_back(MinAddr);
}
@@ -128,10 +118,10 @@ void InProcessMemoryMapper::deinitialize(
for (auto Base : llvm::reverse(Bases)) {
- shared::runDeallocActions(
- Allocations[Base].DeinitializationActions, [&](Error Err) {
- AllErr = joinErrors(std::move(AllErr), std::move(Err));
- });
+ if (Error Err = shared::runDeallocActions(
+ Allocations[Base].DeinitializationActions)) {
+ AllErr = joinErrors(std::move(AllErr), std::move(Err));
+ }
// Reset protections to read/write so the area can be reused
if (auto EC = sys::Memory::protectMappedMemory(
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
index 08ab0c6..91f2899 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
@@ -12,39 +12,31 @@ namespace llvm {
namespace orc {
namespace shared {
-void runFinalizeActions(AllocActions &AAs,
- OnRunFinalizeActionsCompleteFn OnComplete) {
+Expected<std::vector<WrapperFunctionCall>>
+runFinalizeActions(AllocActions &AAs) {
std::vector<WrapperFunctionCall> DeallocActions;
DeallocActions.reserve(numDeallocActions(AAs));
for (auto &AA : AAs) {
if (AA.Finalize)
-
- if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) {
- while (!DeallocActions.empty()) {
- Err = joinErrors(std::move(Err),
- DeallocActions.back().runWithSPSRetErrorMerged());
- DeallocActions.pop_back();
- }
- return OnComplete(std::move(Err));
- }
+ if (auto Err = AA.Finalize.runWithSPSRetErrorMerged())
+ return joinErrors(std::move(Err), runDeallocActions(DeallocActions));
if (AA.Dealloc)
DeallocActions.push_back(std::move(AA.Dealloc));
}
AAs.clear();
- OnComplete(std::move(DeallocActions));
+ return DeallocActions;
}
-void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs,
- OnRunDeallocActionsComeleteFn OnComplete) {
+Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs) {
Error Err = Error::success();
while (!DAs.empty()) {
Err = joinErrors(std::move(Err), DAs.back().runWithSPSRetErrorMerged());
DAs = DAs.drop_back();
}
- OnComplete(std::move(Err));
+ return Err;
}
} // namespace shared
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
index 8c24b1f..4fbf232 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
@@ -9,10 +9,8 @@
#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/WindowsError.h"
-#include <future>
#include <sstream>
#if defined(LLVM_ON_UNIX)
@@ -183,24 +181,15 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
}
// Run finalization actions and get deinitlization action list.
- std::vector<shared::WrapperFunctionCall> DeinitializeActions;
- {
- std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P;
- auto F = P.get_future();
- shared::runFinalizeActions(
- FR.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) {
- P.set_value(std::move(R));
- });
- if (auto DeinitializeActionsOrErr = F.get())
- DeinitializeActions = std::move(*DeinitializeActionsOrErr);
- else
- return DeinitializeActionsOrErr.takeError();
+ auto DeinitializeActions = shared::runFinalizeActions(FR.Actions);
+ if (!DeinitializeActions) {
+ return DeinitializeActions.takeError();
}
{
std::lock_guard<std::mutex> Lock(Mutex);
Allocations[MinAddr].DeinitializationActions =
- std::move(DeinitializeActions);
+ std::move(*DeinitializeActions);
Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr);
}
@@ -221,11 +210,10 @@ Error ExecutorSharedMemoryMapperService::deinitialize(
std::lock_guard<std::mutex> Lock(Mutex);
for (auto Base : llvm::reverse(Bases)) {
- shared::runDeallocActions(
- Allocations[Base].DeinitializationActions, [&](Error Err) {
- if (Err)
- AllErr = joinErrors(std::move(AllErr), std::move(Err));
- });
+ if (Error Err = shared::runDeallocActions(
+ Allocations[Base].DeinitializationActions)) {
+ AllErr = joinErrors(std::move(AllErr), std::move(Err));
+ }
// Remove the allocation from the allocation list of its reservation
for (auto &Reservation : Reservations) {
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index ca81d30..8ace2d2 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/MD5.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <cstdint>
#include <optional>
using namespace llvm;
@@ -193,7 +194,12 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {
dxbc::PSV::v2::ResourceBindInfo BindInfo;
BindInfo.Type = Type;
BindInfo.LowerBound = Binding.LowerBound;
- BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1;
+ assert(Binding.Size == UINT32_MAX ||
+ (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX &&
+ "Resource range is too large");
+ BindInfo.UpperBound = (Binding.Size == UINT32_MAX)
+ ? UINT32_MAX
+ : Binding.LowerBound + Binding.Size - 1;
BindInfo.Space = Binding.Space;
BindInfo.Kind = static_cast<dxbc::PSV::ResourceKind>(Kind);
BindInfo.Flags = Flags;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 47c24fc..f973949 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -601,6 +601,29 @@ static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL,
MachineOperand Src = MI.getOperand(3);
MachineOperand Len = MI.getOperand(4);
+ // If the length is a constant, we don't actually need the check.
+ if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
+ if (Def->getOpcode() == WebAssembly::CONST_I32 ||
+ Def->getOpcode() == WebAssembly::CONST_I64) {
+ if (Def->getOperand(1).getImm() == 0) {
+ // A zero-length memcpy is a no-op.
+ MI.eraseFromParent();
+ return BB;
+ }
+ // A non-zero-length memcpy doesn't need a zero check.
+ unsigned MemoryCopy =
+ Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
+ BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
+ .add(DstMem)
+ .add(SrcMem)
+ .add(Dst)
+ .add(Src)
+ .add(Len);
+ MI.eraseFromParent();
+ return BB;
+ }
+ }
+
// We're going to add an extra use to `Len` to test if it's zero; that
// use shouldn't be a kill, even if the original use is.
MachineOperand NoKillLen = Len;
@@ -669,6 +692,28 @@ static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL,
MachineOperand Val = MI.getOperand(2);
MachineOperand Len = MI.getOperand(3);
+ // If the length is a constant, we don't actually need the check.
+ if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
+ if (Def->getOpcode() == WebAssembly::CONST_I32 ||
+ Def->getOpcode() == WebAssembly::CONST_I64) {
+ if (Def->getOperand(1).getImm() == 0) {
+ // A zero-length memset is a no-op.
+ MI.eraseFromParent();
+ return BB;
+ }
+ // A non-zero-length memset doesn't need a zero check.
+ unsigned MemoryFill =
+ Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
+ BuildMI(*BB, MI, DL, TII.get(MemoryFill))
+ .add(Mem)
+ .add(Dst)
+ .add(Val)
+ .add(Len);
+ MI.eraseFromParent();
+ return BB;
+ }
+ }
+
// We're going to add an extra use to `Len` to test if it's zero; that
// use shouldn't be a kill, even if the original use is.
MachineOperand NoKillLen = Len;
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index d2f09e9..578fec7 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2667,7 +2667,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
- V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
+ // No profiling support for vector selects.
+ V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,
+ DEBUG_TYPE, Name + "blend");
LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll
new file mode 100644
index 0000000..ebab9f0
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s --data-layout="e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64 %s
+; RUN: opt < %s --data-layout="e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32 %s
+
+declare void @useptr(ptr)
+
+define void @ptrtoaddr(ptr %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) {
+; X64-LABEL: 'ptrtoaddr'
+; X64-NEXT: Classifying expressions for: @ptrtoaddr
+; X64-NEXT: %p0 = ptrtoaddr ptr %in to i64
+; X64-NEXT: --> %p0 U: full-set S: full-set
+; X64-NEXT: %p1 = ptrtoaddr ptr %in to i32
+; X64-NEXT: --> %p1 U: full-set S: full-set
+; X64-NEXT: %p2 = ptrtoaddr ptr %in to i16
+; X64-NEXT: --> %p2 U: full-set S: full-set
+; X64-NEXT: %p3 = ptrtoaddr ptr %in to i128
+; X64-NEXT: --> %p3 U: full-set S: full-set
+; X64-NEXT: Determining loop execution counts for: @ptrtoaddr
+;
+; X32-LABEL: 'ptrtoaddr'
+; X32-NEXT: Classifying expressions for: @ptrtoaddr
+; X32-NEXT: %p0 = ptrtoaddr ptr %in to i64
+; X32-NEXT: --> %p0 U: full-set S: full-set
+; X32-NEXT: %p1 = ptrtoaddr ptr %in to i32
+; X32-NEXT: --> %p1 U: full-set S: full-set
+; X32-NEXT: %p2 = ptrtoaddr ptr %in to i16
+; X32-NEXT: --> %p2 U: full-set S: full-set
+; X32-NEXT: %p3 = ptrtoaddr ptr %in to i128
+; X32-NEXT: --> %p3 U: full-set S: full-set
+; X32-NEXT: Determining loop execution counts for: @ptrtoaddr
+;
+ %p0 = ptrtoaddr ptr %in to i64
+ %p1 = ptrtoaddr ptr %in to i32
+ %p2 = ptrtoaddr ptr %in to i16
+ %p3 = ptrtoaddr ptr %in to i128
+ store i64 %p0, ptr %out0
+ store i32 %p1, ptr %out1
+ store i16 %p2, ptr %out2
+ store i128 %p3, ptr %out3
+ ret void
+}
+
+define void @ptrtoaddr_as1(ptr addrspace(1) %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) {
+; X64-LABEL: 'ptrtoaddr_as1'
+; X64-NEXT: Classifying expressions for: @ptrtoaddr_as1
+; X64-NEXT: %p0 = ptrtoaddr ptr addrspace(1) %in to i64
+; X64-NEXT: --> %p0 U: full-set S: full-set
+; X64-NEXT: %p1 = ptrtoaddr ptr addrspace(1) %in to i32
+; X64-NEXT: --> %p1 U: full-set S: full-set
+; X64-NEXT: %p2 = ptrtoaddr ptr addrspace(1) %in to i16
+; X64-NEXT: --> %p2 U: full-set S: full-set
+; X64-NEXT: %p3 = ptrtoaddr ptr addrspace(1) %in to i128
+; X64-NEXT: --> %p3 U: full-set S: full-set
+; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_as1
+;
+; X32-LABEL: 'ptrtoaddr_as1'
+; X32-NEXT: Classifying expressions for: @ptrtoaddr_as1
+; X32-NEXT: %p0 = ptrtoaddr ptr addrspace(1) %in to i64
+; X32-NEXT: --> %p0 U: full-set S: full-set
+; X32-NEXT: %p1 = ptrtoaddr ptr addrspace(1) %in to i32
+; X32-NEXT: --> %p1 U: full-set S: full-set
+; X32-NEXT: %p2 = ptrtoaddr ptr addrspace(1) %in to i16
+; X32-NEXT: --> %p2 U: full-set S: full-set
+; X32-NEXT: %p3 = ptrtoaddr ptr addrspace(1) %in to i128
+; X32-NEXT: --> %p3 U: full-set S: full-set
+; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_as1
+;
+ %p0 = ptrtoaddr ptr addrspace(1) %in to i64
+ %p1 = ptrtoaddr ptr addrspace(1) %in to i32
+ %p2 = ptrtoaddr ptr addrspace(1) %in to i16
+ %p3 = ptrtoaddr ptr addrspace(1) %in to i128
+ store i64 %p0, ptr %out0
+ store i32 %p1, ptr %out1
+ store i16 %p2, ptr %out2
+ store i128 %p3, ptr %out3
+ ret void
+}
+
+define void @ptrtoaddr_of_bitcast(ptr %in, ptr %out0) {
+; X64-LABEL: 'ptrtoaddr_of_bitcast'
+; X64-NEXT: Classifying expressions for: @ptrtoaddr_of_bitcast
+; X64-NEXT: %in_casted = bitcast ptr %in to ptr
+; X64-NEXT: --> %in U: full-set S: full-set
+; X64-NEXT: %p0 = ptrtoaddr ptr %in_casted to i64
+; X64-NEXT: --> %p0 U: full-set S: full-set
+; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_of_bitcast
+;
+; X32-LABEL: 'ptrtoaddr_of_bitcast'
+; X32-NEXT: Classifying expressions for: @ptrtoaddr_of_bitcast
+; X32-NEXT: %in_casted = bitcast ptr %in to ptr
+; X32-NEXT: --> %in U: full-set S: full-set
+; X32-NEXT: %p0 = ptrtoaddr ptr %in_casted to i64
+; X32-NEXT: --> %p0 U: full-set S: full-set
+; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_of_bitcast
+;
+ %in_casted = bitcast ptr %in to ptr
+ %p0 = ptrtoaddr ptr %in_casted to i64
+ store i64 %p0, ptr %out0
+ ret void
+}
+
+define void @ptrtoaddr_of_nullptr(ptr %out0) {
+; ALL-LABEL: 'ptrtoaddr_of_nullptr'
+; ALL-NEXT: Classifying expressions for: @ptrtoaddr_of_nullptr
+; ALL-NEXT: %p0 = ptrtoaddr ptr null to i64
+; ALL-NEXT: --> %p0 U: full-set S: full-set
+; ALL-NEXT: Determining loop execution counts for: @ptrtoaddr_of_nullptr
+;
+ %p0 = ptrtoaddr ptr null to i64
+ store i64 %p0, ptr %out0
+ ret void
+}
+
+define void @ptrtoaddr_of_gep(ptr %in, ptr %out0) {
+; X64-LABEL: 'ptrtoaddr_of_gep'
+; X64-NEXT: Classifying expressions for: @ptrtoaddr_of_gep
+; X64-NEXT: %in_adj = getelementptr inbounds i8, ptr %in, i64 42
+; X64-NEXT: --> (42 + %in) U: full-set S: full-set
+; X64-NEXT: %p0 = ptrtoaddr ptr %in_adj to i64
+; X64-NEXT: --> %p0 U: full-set S: full-set
+; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_of_gep
+;
+; X32-LABEL: 'ptrtoaddr_of_gep'
+; X32-NEXT: Classifying expressions for: @ptrtoaddr_of_gep
+; X32-NEXT: %in_adj = getelementptr inbounds i8, ptr %in, i64 42
+; X32-NEXT: --> (42 + %in) U: full-set S: full-set
+; X32-NEXT: %p0 = ptrtoaddr ptr %in_adj to i64
+; X32-NEXT: --> %p0 U: full-set S: full-set
+; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_of_gep
+;
+ %in_adj = getelementptr inbounds i8, ptr %in, i64 42
+ %p0 = ptrtoaddr ptr %in_adj to i64
+ store i64 %p0, ptr %out0
+ ret void
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
index 7ba422d..a477465c 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
@@ -578,22 +578,22 @@ define void @test_ptr_aligned_by_2_and_4_via_assumption(ptr %start, ptr %end) {
; CHECK-LABEL: 'test_ptr_aligned_by_2_and_4_via_assumption'
; CHECK-NEXT: Classifying expressions for: @test_ptr_aligned_by_2_and_4_via_assumption
; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_2_and_4_via_assumption
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ]
@@ -615,9 +615,9 @@ define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) {
; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption'
; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption
; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903
@@ -644,9 +644,9 @@ define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) {
; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption'
; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption
; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(4 + %start)<nuw><nsw>,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903
@@ -677,22 +677,22 @@ define void @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors(ptr
; CHECK-NEXT: %c = call i1 @cond()
; CHECK-NEXT: --> %c U: full-set S: full-set
; CHECK-NEXT: %iv = phi ptr [ %start, %then ], [ %start, %else ], [ %iv.next, %loop ]
-; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ]
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
index bea0310..70224fc 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
@@ -94,6 +94,18 @@ define void @main() #0 {
%uav2_2 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f32_1_0(
i32 4, i32 0, i32 10, i32 5, ptr null)
+
+ ; RWBuffer<float4> UnboundedArray[] : register(u10, space5)
+; CHECK: - Type: UAVTyped
+; CHECK: Space: 5
+; CHECK: LowerBound: 10
+; CHECK: UpperBound: 4294967295
+; CHECK: Kind: TypedBuffer
+; CHECK: Flags:
+; CHECK: UsedByAtomic64: false
+ ; RWBuffer<float4> Buf = BufferArray[100];
+ %uav3 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 5, i32 10, i32 -1, i32 100, ptr null)
ret void
}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
index bf31ccb..559cc53 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
@@ -32,6 +32,40 @@ define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
ret void
}
+define void @select_v32i8_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v32i8_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0)
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %a0
+ %v1 = load <32 x i8>, ptr %a1
+ %sel = select <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i8> %v0, <32 x i8> %v1
+ store <32 x i8> %sel, ptr %res
+ ret void
+}
+
+define void @select_v32i8_2(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v32i8_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0)
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0)
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %a0
+ %v1 = load <32 x i8>, ptr %a1
+ %sel = select <32 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> %v1
+ store <32 x i8> %sel, ptr %res
+ ret void
+}
+
define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v16i16:
; CHECK: # %bb.0:
@@ -49,6 +83,40 @@ define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
ret void
}
+define void @select_v16i16_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i16_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0)
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI5_0)
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %a0
+ %v1 = load <16 x i16>, ptr %a1
+ %sel = select <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %v0, <16 x i16> %v1
+ store <16 x i16> %sel, ptr %res
+ ret void
+}
+
+define void @select_v16i16_2(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i16_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0)
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI6_0)
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %a0
+ %v1 = load <16 x i16>, ptr %a1
+ %sel = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i16> %v0, <16 x i16> %v1
+ store <16 x i16> %sel, ptr %res
+ ret void
+}
+
define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v8i32:
; CHECK: # %bb.0:
@@ -65,19 +133,70 @@ define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
ret void
}
+define void @select_v8i32_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0)
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %a0
+ %v1 = load <8 x i32>, ptr %a1
+ %sel = select <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i32> %v0, <8 x i32> %v1
+ store <8 x i32> %sel, ptr %res
+ ret void
+}
+
+define void @select_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0)
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI9_0)
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x float>, ptr %a0
+ %v1 = load <8 x float>, ptr %a1
+ %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false>, <8 x float> %v0, <8 x float> %v1
+ store <8 x float> %sel, ptr %res
+ ret void
+}
+
define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0)
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI10_0)
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <4 x i64>, ptr %a0
%v1 = load <4 x i64>, ptr %a1
- %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %v0, <4 x i64> %v1
+ %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v0, <4 x i64> %v1
store <4 x i64> %sel, ptr %res
ret void
}
+
+define void @select_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0)
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI11_0)
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x double>, ptr %a0
+ %v1 = load <4 x double>, ptr %a1
+ %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v0, <4 x double> %v1
+ store <4 x double> %sel, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
index 8f25a6b..25c4f09 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
@@ -16,6 +16,20 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind {
ret void
}
+define void @select_v16i8_imm_1(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: select_v16i8_imm_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrepli.h $vr1, -256
+; CHECK-NEXT: vbitseli.b $vr1, $vr0, 1
+; CHECK-NEXT: vst $vr1, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %a0
+ %sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %v0
+ store <16 x i8> %sel, ptr %res
+ ret void
+}
+
define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v16i8:
; CHECK: # %bb.0:
@@ -32,6 +46,40 @@ define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
ret void
}
+define void @select_v16i8_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i8_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0)
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI3_0)
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %a0
+ %v1 = load <16 x i8>, ptr %a1
+ %sel = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %v0, <16 x i8> %v1
+ store <16 x i8> %sel, ptr %res
+ ret void
+}
+
+define void @select_v16i8_2(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i8_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %a0
+ %v1 = load <16 x i8>, ptr %a1
+ %sel = select <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <16 x i8> %v0, <16 x i8> %v1
+ store <16 x i8> %sel, ptr %res
+ ret void
+}
+
define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v8i16:
; CHECK: # %bb.0:
@@ -49,6 +97,40 @@ define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
ret void
}
+define void @select_v8i16_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8i16_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0)
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI6_0)
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %a0
+ %v1 = load <8 x i16>, ptr %a1
+ %sel = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1
+ store <8 x i16> %sel, ptr %res
+ ret void
+}
+
+define void @select_v8i16_2(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8i16_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI7_0)
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI7_0)
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %a0
+ %v1 = load <8 x i16>, ptr %a1
+ %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1
+ store <8 x i16> %sel, ptr %res
+ ret void
+}
+
define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v4i32:
; CHECK: # %bb.0:
@@ -65,13 +147,47 @@ define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
ret void
}
+define void @select_v4i32_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v4i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0)
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI9_0)
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %a0
+ %v1 = load <4 x i32>, ptr %a1
+ %sel = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> %v0, <4 x i32> %v1
+ store <4 x i32> %sel, ptr %res
+ ret void
+}
+
+define void @select_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0)
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI10_0)
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x float>, ptr %a0
+ %v1 = load <4 x float>, ptr %a1
+ %sel = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %v0, <4 x float> %v1
+ store <4 x float> %sel, ptr %res
+ ret void
+}
+
define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0)
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI11_0)
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
@@ -81,3 +197,20 @@ define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
store <2 x i64> %sel, ptr %res
ret void
}
+
+define void @select_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0)
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI12_0)
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <2 x double>, ptr %a0
+ %v1 = load <2 x double>, ptr %a1
+ %sel = select <2 x i1> <i1 false, i1 true>, <2 x double> %v0, <2 x double> %v1
+ store <2 x double> %sel, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index e71f59c..cad684e 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -325,24 +325,21 @@ define float @sqrt_afn_ieee(float %x) #0 {
;
; GLOBAL-LABEL: sqrt_afn_ieee:
; GLOBAL: # %bb.0:
-; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha
-; GLOBAL-NEXT: xsabsdp 0, 1
-; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3)
-; GLOBAL-NEXT: fcmpu 0, 0, 2
-; GLOBAL-NEXT: xxlxor 0, 0, 0
-; GLOBAL-NEXT: blt 0, .LBB11_2
-; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha
-; GLOBAL-NEXT: xvcvsxwdp 2, 34
-; GLOBAL-NEXT: xsmulsp 1, 1, 0
-; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xvcvsxwdp 3, 34
+; GLOBAL-NEXT: xsmulsp 2, 1, 0
+; GLOBAL-NEXT: xsabsdp 1, 1
+; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3)
-; GLOBAL-NEXT: xsmulsp 0, 1, 0
-; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB11_2:
-; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha
+; GLOBAL-NEXT: xsmulsp 0, 2, 0
+; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3)
+; GLOBAL-NEXT: xssubsp 1, 1, 2
+; GLOBAL-NEXT: xxlxor 2, 2, 2
+; GLOBAL-NEXT: xsmulsp 0, 0, 3
+; GLOBAL-NEXT: fsel 1, 1, 0, 2
; GLOBAL-NEXT: blr
%rt = call afn ninf float @llvm.sqrt.f32(float %x)
ret float %rt
@@ -393,21 +390,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
;
; GLOBAL-LABEL: sqrt_afn_preserve_sign:
; GLOBAL: # %bb.0:
-; GLOBAL-NEXT: xxlxor 0, 0, 0
-; GLOBAL-NEXT: fcmpu 0, 1, 0
-; GLOBAL-NEXT: beq 0, .LBB13_2
-; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha
-; GLOBAL-NEXT: xvcvsxwdp 2, 34
-; GLOBAL-NEXT: xsmulsp 1, 1, 0
-; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xvcvsxwdp 3, 34
+; GLOBAL-NEXT: xsmulsp 2, 1, 0
+; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3)
-; GLOBAL-NEXT: xsmulsp 0, 1, 0
-; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB13_2:
-; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: xsmulsp 0, 2, 0
+; GLOBAL-NEXT: xxlxor 2, 2, 2
+; GLOBAL-NEXT: xsmulsp 0, 0, 3
+; GLOBAL-NEXT: fsel 2, 1, 2, 0
+; GLOBAL-NEXT: xsnegdp 1, 1
+; GLOBAL-NEXT: fsel 1, 1, 2, 0
; GLOBAL-NEXT: blr
%rt = call afn ninf float @llvm.sqrt.f32(float %x)
ret float %rt
@@ -462,24 +457,21 @@ define float @sqrt_fast_ieee(float %x) #0 {
;
; GLOBAL-LABEL: sqrt_fast_ieee:
; GLOBAL: # %bb.0:
-; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha
-; GLOBAL-NEXT: xsabsdp 0, 1
-; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3)
-; GLOBAL-NEXT: fcmpu 0, 0, 2
-; GLOBAL-NEXT: xxlxor 0, 0, 0
-; GLOBAL-NEXT: blt 0, .LBB15_2
-; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha
-; GLOBAL-NEXT: xvcvsxwdp 2, 34
-; GLOBAL-NEXT: xsmulsp 1, 1, 0
-; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xvcvsxwdp 3, 34
+; GLOBAL-NEXT: xsmulsp 2, 1, 0
+; GLOBAL-NEXT: xsabsdp 1, 1
+; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3)
-; GLOBAL-NEXT: xsmulsp 0, 1, 0
-; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB15_2:
-; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha
+; GLOBAL-NEXT: xsmulsp 0, 2, 0
+; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3)
+; GLOBAL-NEXT: xssubsp 1, 1, 2
+; GLOBAL-NEXT: xxlxor 2, 2, 2
+; GLOBAL-NEXT: xsmulsp 0, 0, 3
+; GLOBAL-NEXT: fsel 1, 1, 0, 2
; GLOBAL-NEXT: blr
%rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x)
ret float %rt
@@ -517,21 +509,19 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
;
; GLOBAL-LABEL: sqrt_fast_preserve_sign:
; GLOBAL: # %bb.0:
-; GLOBAL-NEXT: xxlxor 0, 0, 0
-; GLOBAL-NEXT: fcmpu 0, 1, 0
-; GLOBAL-NEXT: beq 0, .LBB16_2
-; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha
-; GLOBAL-NEXT: xvcvsxwdp 2, 34
-; GLOBAL-NEXT: xsmulsp 1, 1, 0
-; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xvcvsxwdp 3, 34
+; GLOBAL-NEXT: xsmulsp 2, 1, 0
+; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3)
-; GLOBAL-NEXT: xsmulsp 0, 1, 0
-; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB16_2:
-; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: xsmulsp 0, 2, 0
+; GLOBAL-NEXT: xxlxor 2, 2, 2
+; GLOBAL-NEXT: xsmulsp 0, 0, 3
+; GLOBAL-NEXT: fsel 2, 1, 2, 0
+; GLOBAL-NEXT: xsnegdp 1, 1
+; GLOBAL-NEXT: fsel 1, 1, 2, 0
; GLOBAL-NEXT: blr
%rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x)
ret float %rt
diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll
new file mode 100644
index 0000000..c19e93d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O1 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
+
+define i32 @pr134424(i64 %input_value, i32 %base_value, i1 %cond_flag1, i1 %cond_flag2, i1 %cond_flag3) {
+; CHECK-LABEL: pr134424:
+; CHECK: # %bb.0: # %for.body.us.preheader.i
+; CHECK-NEXT: andi a3, a3, 1
+; CHECK-NEXT: andi a5, a2, 1
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v0, 14
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bnez a5, .LBB0_2
+; CHECK-NEXT: # %bb.1: # %for.body.us.preheader.i
+; CHECK-NEXT: li a2, 1
+; CHECK-NEXT: .LBB0_2: # %for.body.us.preheader.i
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT: andi a4, a4, 1
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: bnez a3, .LBB0_4
+; CHECK-NEXT: # %bb.3: # %for.body.us.preheader.i
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: .LBB0_4: # %for.body.us.preheader.i
+; CHECK-NEXT: vmsle.vi v0, v8, 0
+; CHECK-NEXT: sext.w a2, a2
+; CHECK-NEXT: bnez a4, .LBB0_6
+; CHECK-NEXT: # %bb.5: # %for.body.us.preheader.i
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: .LBB0_6: # %for.body.us.preheader.i
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vredmin.vs v8, v8, v8
+; CHECK-NEXT: vmv.x.s a3, v8
+; CHECK-NEXT: sext.w a1, a1
+; CHECK-NEXT: bge a3, a2, .LBB0_11
+; CHECK-NEXT: # %bb.7: # %for.body.us.preheader.i
+; CHECK-NEXT: bge a0, a1, .LBB0_12
+; CHECK-NEXT: .LBB0_8: # %for.body.us.preheader.i
+; CHECK-NEXT: blt a3, a0, .LBB0_10
+; CHECK-NEXT: .LBB0_9: # %for.body.us.preheader.i
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: .LBB0_10: # %for.body.us.preheader.i
+; CHECK-NEXT: sw a3, 0(zero)
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_11: # %for.body.us.preheader.i
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: blt a0, a1, .LBB0_8
+; CHECK-NEXT: .LBB0_12: # %for.body.us.preheader.i
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: bge a3, a0, .LBB0_9
+; CHECK-NEXT: j .LBB0_10
+for.body.us.preheader.i:
+ %partial_vector = insertelement <4 x i64> zeroinitializer, i64 %input_value, i64 1
+ %comparison_vector = shufflevector <4 x i64> %partial_vector, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ %comparison_result = icmp sle <4 x i64> %comparison_vector, zeroinitializer
+ %selected_value1 = select i1 %cond_flag1, i32 %base_value, i32 1
+ %selected_value2 = select i1 %cond_flag2, i32 %base_value, i32 1
+ %selected_value3 = select i1 %cond_flag3, i32 %base_value, i32 1
+ %bool_to_int = zext <4 x i1> %comparison_result to <4 x i32>
+ %extended_vector = shufflevector <4 x i32> %bool_to_int, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+ %vector_min = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %extended_vector)
+ %min1 = call i32 @llvm.smin.i32(i32 %vector_min, i32 %selected_value1)
+ %min2 = call i32 @llvm.smin.i32(i32 %selected_value2, i32 %selected_value3)
+ %final_min = call i32 @llvm.smin.i32(i32 %min1, i32 %min2)
+ store i32 %final_min, ptr null, align 4
+ ret i32 0
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir
new file mode 100644
index 0000000..aeab8f6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir
@@ -0,0 +1,57 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=register-coalescer -o - %s | FileCheck %s
+
+---
+name: pr71023
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: pr71023
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $x10, $v8, $v10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_2:vrn8m1 = PseudoVMV_V_I_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_6:vrn8m1 = COPY undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2
+ ; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3
+ ; CHECK-NEXT: PseudoBR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3
+ ; CHECK-NEXT: PseudoBR %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber [[PseudoVMV_V_I_M1_]].sub_vrm1_0:vrn8m1 = PseudoVRGATHER_VI_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_0, [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoVSSEG6E8_V_M1_MASK [[PseudoVMV_V_I_M1_]].sub_vrm1_0_sub_vrm1_1_sub_vrm1_2_sub_vrm1_3_sub_vrm1_4_sub_vrm1_5, undef [[DEF]], killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1)
+ ; CHECK-NEXT: PseudoRET
+ bb.0:
+ successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ liveins: $x10, $v8, $v10
+ %0:gpr = IMPLICIT_DEF
+ %1:vrnov0 = PseudoVMV_V_I_M1 undef %1, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ %2:vrnov0 = IMPLICIT_DEF
+ undef %3.sub_vrm1_0:vrn6m1nov0 = COPY undef %1
+ %3.sub_vrm1_3:vrn6m1nov0 = COPY %2
+ %3.sub_vrm1_4:vrn6m1nov0 = COPY undef %1
+ BNE undef %0, $x0, %bb.3
+ PseudoBR %bb.1
+ bb.1:
+ successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ BNE killed undef %0, $x0, %bb.3
+ PseudoBR %bb.2
+ bb.2:
+ successors: %bb.3(0x80000000)
+ bb.3:
+ %4:vr = IMPLICIT_DEF
+ early-clobber %4:vr = PseudoVRGATHER_VI_M1 undef %4, killed %1, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ undef %5.sub_vrm1_0:vrn6m1 = COPY killed %4
+ %5.sub_vrm1_5:vrn6m1 = COPY killed %2
+ PseudoVSSEG6E8_V_M1_MASK killed %5, undef %0, killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1)
+ PseudoRET
+...
diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll
index ae170d7..d949068 100644
--- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll
+++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll
@@ -104,6 +104,31 @@ define void @memset_i32(ptr %dest, i8 %val, i32 %len) {
ret void
}
+; CHECK-LABEL: memcpy_0:
+; CHECK-NEXT: .functype memcpy_0 (i32, i32) -> ()
+; CHECK-NEXT: return
+define void @memcpy_0(ptr %dest, ptr %src) {
+ call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0)
+ ret void
+}
+
+; CHECK-LABEL: memmove_0:
+; CHECK-NEXT: .functype memmove_0 (i32, i32) -> ()
+; CHECK-NEXT: return
+define void @memmove_0(ptr %dest, ptr %src) {
+ call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0)
+ ret void
+}
+
+; CHECK-LABEL: memset_0:
+; NO-BULK-MEM-NOT: memory.fill
+; BULK-MEM-NEXT: .functype memset_0 (i32, i32) -> ()
+; BULK-MEM-NEXT: return
+define void @memset_0(ptr %dest, i8 %val) {
+ call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 0, i1 0)
+ ret void
+}
+
; CHECK-LABEL: memcpy_1:
; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> ()
; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
@@ -137,14 +162,8 @@ define void @memset_1(ptr %dest, i8 %val) {
; CHECK-LABEL: memcpy_1024:
; NO-BULK-MEM-NOT: memory.copy
; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> ()
-; BULK-MEM-NEXT: block
; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L1]]
-; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
; BULK-MEM-NEXT: return
define void @memcpy_1024(ptr %dest, ptr %src) {
call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0)
@@ -154,14 +173,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) {
; CHECK-LABEL: memmove_1024:
; NO-BULK-MEM-NOT: memory.copy
; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> ()
-; BULK-MEM-NEXT: block
; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L1]]
-; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
; BULK-MEM-NEXT: return
define void @memmove_1024(ptr %dest, ptr %src) {
call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0)
@@ -171,14 +184,8 @@ define void @memmove_1024(ptr %dest, ptr %src) {
; CHECK-LABEL: memset_1024:
; NO-BULK-MEM-NOT: memory.fill
; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> ()
-; BULK-MEM-NEXT: block
; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L1]]
-; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L2]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]]
; BULK-MEM-NEXT: return
define void @memset_1024(ptr %dest, i8 %val) {
call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 1024, i1 0)
@@ -201,17 +208,11 @@ define void @memset_1024(ptr %dest, i8 %val) {
; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> ()
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L4]]
-; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]]
-; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
; BULK-MEM-NEXT: return
define void @memcpy_alloca_src(ptr %dst) {
%a = alloca [100 x i8]
@@ -224,17 +225,11 @@ define void @memcpy_alloca_src(ptr %dst) {
; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> ()
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L4]]
-; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]]
-; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
; BULK-MEM-NEXT: return
define void @memcpy_alloca_dst(ptr %src) {
%a = alloca [100 x i8]
@@ -247,17 +242,11 @@ define void @memcpy_alloca_dst(ptr %src) {
; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i32.sub $1=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i32.eqz $push[[L3:[0-9]+]]=, $pop[[L2]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L3]]
-; BULK-MEM-NEXT: i32.const $push[[L4:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i32.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]]
-; BULK-MEM-NEXT: i32.const $push[[L6:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
; BULK-MEM-NEXT: return
define void @memset_alloca(i8 %val) {
%a = alloca [100 x i8]
diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll
index 0cf8493..d0206a3 100644
--- a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll
+++ b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll
@@ -110,6 +110,31 @@ define void @memset_i32(ptr %dest, i8 %val, i64 %len) {
ret void
}
+; CHECK-LABEL: memcpy_0:
+; CHECK-NEXT: .functype memcpy_0 (i64, i64) -> ()
+; CHECK-NEXT: return
+define void @memcpy_0(ptr %dest, ptr %src) {
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0)
+ ret void
+}
+
+; CHECK-LABEL: memmove_0:
+; CHECK-NEXT: .functype memmove_0 (i64, i64) -> ()
+; CHECK-NEXT: return
+define void @memmove_0(ptr %dest, ptr %src) {
+ call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0)
+ ret void
+}
+
+; CHECK-LABEL: memset_0:
+; NO-BULK-MEM-NOT: memory.fill
+; BULK-MEM-NEXT: .functype memset_0 (i64, i32) -> ()
+; BULK-MEM-NEXT: return
+define void @memset_0(ptr %dest, i8 %val) {
+ call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 0, i1 0)
+ ret void
+}
+
; CHECK-LABEL: memcpy_1:
; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> ()
; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
@@ -143,14 +168,8 @@ define void @memset_1(ptr %dest, i8 %val) {
; CHECK-LABEL: memcpy_1024:
; NO-BULK-MEM-NOT: memory.copy
; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> ()
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]]
-; BULK-MEM-NEXT: br_if 0, $pop0
; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024
; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
; BULK-MEM-NEXT: return
define void @memcpy_1024(ptr %dest, ptr %src) {
call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0)
@@ -160,14 +179,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) {
; CHECK-LABEL: memmove_1024:
; NO-BULK-MEM-NOT: memory.copy
; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> ()
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]]
-; BULK-MEM-NEXT: br_if 0, $pop0
; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024
; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
; BULK-MEM-NEXT: return
define void @memmove_1024(ptr %dest, ptr %src) {
call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0)
@@ -177,14 +190,8 @@ define void @memmove_1024(ptr %dest, ptr %src) {
; CHECK-LABEL: memset_1024:
; NO-BULK-MEM-NOT: memory.fill
; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> ()
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]]
-; BULK-MEM-NEXT: br_if 0, $pop0
; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024
; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
; BULK-MEM-NEXT: return
define void @memset_1024(ptr %dest, i8 %val) {
call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 1024, i1 0)
@@ -207,17 +214,11 @@ define void @memset_1024(ptr %dest, i8 %val) {
; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> ()
; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L4]]
-; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]]
-; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
+; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
; BULK-MEM-NEXT: return
define void @memcpy_alloca_src(ptr %dst) {
%a = alloca [100 x i8]
@@ -230,17 +231,11 @@ define void @memcpy_alloca_src(ptr %dst) {
; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> ()
; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L4]]
-; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]]
-; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
+; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
; BULK-MEM-NEXT: return
define void @memcpy_alloca_dst(ptr %src) {
%a = alloca [100 x i8]
@@ -253,17 +248,11 @@ define void @memcpy_alloca_dst(ptr %src) {
; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i64.sub $1=, $pop[[L1]], $pop[[L0]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const $push[[L2:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i64.eqz $push[[L3:[0-9]+]]=, $pop[[L2]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L3]]
-; BULK-MEM-NEXT: i64.const $push[[L4:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i64.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]]
-; BULK-MEM-NEXT: i64.const $push[[L6:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
+; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
; BULK-MEM-NEXT: return
define void @memset_alloca(i8 %val) {
%a = alloca [100 x i8]
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index cdf6bdd..caec02e 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4765,8 +4765,8 @@ define void @scaleidx_scatter_outofrange(<8 x float> %value, ptr %base, <8 x i32
}
declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>)
-define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) {
-; X64-LABEL: pr163023:
+define <16 x i32> @pr163023_sext(ptr %a0, <16 x i32> %a1) {
+; X64-LABEL: pr163023_sext:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
@@ -4774,7 +4774,7 @@ define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) {
; X64-NEXT: vmovdqa64 %zmm1, %zmm0
; X64-NEXT: retq
;
-; X86-LABEL: pr163023:
+; X86-LABEL: pr163023_sext:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kxnorw %k0, %k0, %k1
@@ -4788,7 +4788,40 @@ define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) {
%ofs = sext <16 x i32> %a1 to <16 x i64>
%addr = add nuw <16 x i64> %addr.splat, %ofs
%ptr = inttoptr <16 x i64> %addr to <16 x ptr>
- %gather = tail call fastcc <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison)
+ %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison)
+ ret <16 x i32> %gather
+}
+
+define <16 x i32> @pr163023_zext(ptr %a0, <16 x i32> %a1) {
+; X64-LABEL: pr163023_zext:
+; X64: # %bb.0:
+; X64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; X64-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-NEXT: kxnorw %k0, %k0, %k1
+; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; X64-NEXT: kxnorw %k0, %k0, %k2
+; X64-NEXT: vpgatherqd (%rdi,%zmm0), %ymm3 {%k2}
+; X64-NEXT: vpgatherqd (%rdi,%zmm1), %ymm2 {%k1}
+; X64-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: pr163023_zext:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kxnorw %k0, %k0, %k1
+; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1}
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0
+; X86-NEXT: retl
+ %addr.p = ptrtoint ptr %a0 to i64
+ %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0
+ %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer
+ %ofs = zext <16 x i32> %a1 to <16 x i64>
+ %addr = add nuw <16 x i64> %addr.splat, %ofs
+ %ptr = inttoptr <16 x i64> %addr to <16 x ptr>
+ %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison)
ret <16 x i32> %gather
}
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 42617c1..18588aa 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -24,7 +24,7 @@ define float @sqrt_ieee_ninf(float %f) #0 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+ ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]]
; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool)
; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr
@@ -71,7 +71,7 @@ define float @sqrt_daz_ninf(float %f) #1 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+ ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]]
; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool)
; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr
diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td
index 3eda077..475faf9 100644
--- a/llvm/test/TableGen/directive1.td
+++ b/llvm/test/TableGen/directive1.td
@@ -177,6 +177,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-NEXT: static constexpr bool is_iterable = true;
// CHECK-NEXT: };
// CHECK-NEXT: } // namespace llvm
+// CHECK-EMPTY:
// CHECK-NEXT: #endif // LLVM_Tdl_INC
diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td
index a25197c..ccc0944 100644
--- a/llvm/test/TableGen/directive2.td
+++ b/llvm/test/TableGen/directive2.td
@@ -150,6 +150,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-NEXT: static constexpr bool is_iterable = true;
// CHECK-NEXT: };
// CHECK-NEXT: } // namespace llvm
+// CHECK-EMPTY:
// CHECK-NEXT: #endif // LLVM_Tdl_INC
// IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
new file mode 100644
index 0000000..e4322cf
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
@@ -0,0 +1,588 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
+
+define i32 @umax_phi_used_outside(ptr %src, i32 %n) {
+; CHECK-LABEL: define i32 @umax_phi_used_outside(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
+; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT: [[SPEC_SELECT]] = tail call i32 @llvm.umax.i32(i32 [[MAX]], i32 [[L_EXT]])
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_LCSSA:%.*]] = phi i32 [ [[MAX]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[MAX_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi i32 [ 0, %entry ], [ %spec.select, %loop ]
+ %gep.src = getelementptr inbounds i8, ptr %src, i32 %iv
+ %l = load i8, ptr %gep.src
+ %l.ext = zext i8 %l to i32
+ %spec.select = tail call i32 @llvm.umax.i32(i32 %max, i32 %l.ext)
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %max
+}
+
+define i32 @chained_smax(i32 %x, ptr %src) {
+; CHECK-LABEL: define i32 @chained_smax(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1)
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[VEC_PHI]])
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
+; CHECK: [[PRED_LOAD_IF3]]:
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]]
+; CHECK: [[PRED_LOAD_CONTINUE4]]:
+; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], %[[PRED_LOAD_IF3]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]]
+; CHECK: [[PRED_LOAD_IF5]]:
+; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
+; CHECK: [[PRED_LOAD_CONTINUE6]]:
+; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ]
+; CHECK-NEXT: [[TMP26]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP25]], <4 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP27]])
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i32 [[TMP28]]
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv
+ %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max)
+ %l = load i32, ptr %gep.src, align 4
+ %max.next = tail call i32 @llvm.smax.i32(i32 %l, i32 %max.1)
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 1
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %max.next
+}
+
+define void @smax_with_invariant_store_user(ptr noalias %src, ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @smax_with_invariant_store_user(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+ %l = load i32, ptr %gep.src, align 4
+ %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+ store i32 %max.next, ptr %dst, align 4
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @smax_with_multiple_invariant_store_user_same_addr(ptr noalias %src, ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+ %l = load i32, ptr %gep.src, align 4
+ %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+ store i32 %max.next, ptr %dst, align 4
+ %iv.next = add i64 %iv, 1
+ store i32 %max.next, ptr %dst, align 4
+ %ec = icmp eq i64 %iv, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @smax_with_multiple_invariant_store_user_same_addr2(ptr noalias %src, ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr2(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: store i32 0, ptr [[DST]], align 4
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+ %l = load i32, ptr %gep.src, align 4
+ %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+ store i32 %max.next, ptr %dst, align 4
+ %iv.next = add i64 %iv, 1
+ store i32 0, ptr %dst, align 4
+ %ec = icmp eq i64 %iv, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @smax_with_multiple_invariant_store_user_same_addr3(ptr noalias %src, ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr3(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT: store i32 0, ptr [[DST]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+ %l = load i32, ptr %gep.src, align 4
+ %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+ store i32 0, ptr %dst, align 4
+ %iv.next = add i64 %iv, 1
+ store i32 %max.next, ptr %dst, align 4
+ %ec = icmp eq i64 %iv, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @smax_with_multiple_invariant_store_user_different_addr(ptr noalias %src, ptr noalias %dst, ptr noalias %dst.2, i64 %n) {
+; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_different_addr(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], ptr noalias [[DST_2:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST_2]], align 4
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+ %l = load i32, ptr %gep.src, align 4
+ %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+ store i32 %max.next, ptr %dst, align 4
+ %iv.next = add i64 %iv, 1
+ store i32 %max.next, ptr %dst.2, align 4
+ %ec = icmp eq i64 %iv, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define i32 @chained_instructions_feeding_max1(i32 %x, ptr %src) {
+; CHECK-LABEL: define i32 @chained_instructions_feeding_max1(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MAX]], [[L]]
+; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 [[L]])
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv
+ %l = load i32, ptr %gep.src, align 4
+ %add = add i32 %max, %l
+ %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 %l)
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 1
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %max.next
+}
+
+define i32 @chained_instructions_feeding_max2(i32 %x, ptr %src) {
+; CHECK-LABEL: define i32 @chained_instructions_feeding_max2(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[MAX_1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 [[MAX]])
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[L]], [[MAX_1]]
+; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 100)
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+ %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv
+ %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max)
+ %l = load i32, ptr %gep.src, align 4
+ %add = add i32 %l, %max.1
+ %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 100)
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 1
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %max.next
+}
+
+
+define i32 @test_predicated_smin(ptr %src) {
+; CHECK-LABEL: define i32 @test_predicated_smin(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
+; CHECK-NEXT: [[TMP3:%.*]] = fptosi <4 x float> [[TMP2]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP3]])
+; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[PREDPHI]])
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %min = phi i32 [ 0, %entry ], [ %min.merge, %loop.latch ]
+ %gep.src = getelementptr float, ptr %src, i64 %iv
+ %l = load float, ptr %gep.src, align 4
+ %c = fcmp une float %l, 0.0
+ br i1 %c, label %then, label %loop.latch
+
+then:
+ %div = fdiv float %l, 3.0
+ %div.i32 = fptosi float %div to i32
+ %min.next = tail call i32 @llvm.smin.i32(i32 %min, i32 %div.i32)
+ br label %loop.latch
+
+loop.latch:
+ %min.merge = phi i32 [ %min.next, %then ], [ %min, %loop.header ]
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 111
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret i32 %min.merge
+}
+
+define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) {
+; CHECK-LABEL: define i32 @smax_reduction_multiple_incoming(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[COND]], label %[[LOOP_HEADER_PREHEADER:.*]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER]]
+; CHECK: [[LOOP_HEADER_PREHEADER]]:
+; CHECK-NEXT: [[IV_PH:%.*]] = phi i32 [ 10, %[[ELSE]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[MAX_PH:%.*]] = phi i32 [ 5, %[[ELSE]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[IV_PH]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK: [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[N]], [[IV_PH]]
+; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[IV_PH]], [[N_VEC]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[MAX_PH]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IV_PH]], [[INDEX]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[IV_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[IV_PH]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[MAX_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[MAX_PH]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[MAX_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP_HEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]]
+;
+entry:
+ br i1 %cond, label %loop.header, label %else
+
+else:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ 10, %else ], [ %iv.next, %loop.header ]
+ %max = phi i32 [ 0, %entry ], [ 5, %else ], [ %max.next, %loop.header ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
+ %l = load i32, ptr %gep.src, align 4
+ %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv, %n
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret i32 %max.next
+}
diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll
index eabb697..3b77e49 100644
--- a/llvm/test/Transforms/SROA/slice-width.ll
+++ b/llvm/test/Transforms/SROA/slice-width.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
@@ -8,6 +8,10 @@ declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
; This tests that allocas are not split into slices that are not byte width multiple
+;.
+; CHECK: @foo_copy_source = external constant %union.Foo
+; CHECK: @i64_sink = global i64 0
+;.
define void @no_split_on_non_byte_width(i32) {
; CHECK-LABEL: @no_split_on_non_byte_width(
; CHECK-NEXT: [[ARG_SROA_0:%.*]] = alloca i8, align 8
@@ -92,12 +96,12 @@ declare i32 @memcpy_vec3float_helper(ptr)
; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
; vector store, hence accidentally putting gibberish onto the stack.
-define i32 @memcpy_vec3float_widening(ptr %x) {
+define i32 @memcpy_vec3float_widening(ptr %x) !prof !0 {
; CHECK-LABEL: @memcpy_vec3float_widening(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef
+; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4
@@ -158,6 +162,15 @@ define i1 @presplit_overlarge_load() {
%L2 = load i1, ptr %A
ret i1 %L2
}
+!0 = !{!"function_entry_count", i32 10}
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"unknown", !"sroa"}
+;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-MODIFY-CFG: {{.*}}
; CHECK-PRESERVE-CFG: {{.*}}
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index de83a0d..4c08b57 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -386,7 +386,9 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
// Make a DWARF transformer object and populate the ranges of the code
// so we don't end up adding invalid functions to GSYM data.
- DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites);
+ bool IsMachO = dyn_cast<object::MachOObjectFile>(&Obj) != nullptr;
+
+ DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites, IsMachO);
if (!TextRanges.empty())
Gsym.SetValidTextRanges(TextRanges);
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index 33f53de..d560073 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -4899,3 +4899,189 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) {
for (const auto &Line : ExpectedDumpLines)
EXPECT_TRUE(DumpStr.find(Line) != std::string::npos);
}
+
+TEST(GSYMTest, TestUnableToLocateDWO) {
+ // Test that llvm-gsymutil will not produce "uanble to locate DWO file" for
+ // Apple binaries. Apple uses DW_AT_GNU_dwo_id for non split DWARF purposes
+ // and this makes llvm-gsymutil create warnings and errors.
+ //
+ // 0x0000000b: DW_TAG_compile_unit
+ // DW_AT_name ("main.cpp")
+ // DW_AT_language (DW_LANG_C)
+ // DW_AT_GNU_dwo_id (0xfffffffe)
+ StringRef yamldata = R"(
+ debug_str:
+ - ''
+ - main.cpp
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_language
+ Form: DW_FORM_udata
+ - Attribute: DW_AT_GNU_dwo_id
+ Form: DW_FORM_data4
+ debug_info:
+ - Length: 0x11
+ Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x1
+ - Value: 0x2
+ - Value: 0xFFFFFFFE
+ )";
+ auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata);
+ ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded());
+ std::unique_ptr<DWARFContext> DwarfContext =
+ DWARFContext::create(*ErrOrSections, 8);
+ ASSERT_TRUE(DwarfContext.get() != nullptr);
+ std::string errors;
+ raw_string_ostream OS(errors);
+ OutputAggregator OSAgg(&OS);
+ GsymCreator GC;
+ // Make a DWARF transformer that is MachO (Apple) to avoid warnings about
+ // not finding DWO files.
+ DwarfTransformer DT(*DwarfContext, GC, /*LDCS=*/false, /*MachO*/ true);
+ const uint32_t ThreadCount = 1;
+ ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
+ ASSERT_THAT_ERROR(GC.finalize(OSAgg), Succeeded());
+
+ // Make sure this warning is not in the binary
+ std::string warn("warning: Unable to retrieve DWO .debug_info section for");
+ EXPECT_TRUE(errors.find(warn) == std::string::npos);
+}
+
+TEST(GSYMTest, TestDWARFTransformNoErrorForMissingFileDecl) {
+ // Test that if llvm-gsymutil finds a line table for a compile unit and if
+ // there are no matching entries for a function in that compile unit, that
+ // it doesn't print out a error saying that a DIE has an invalid file index
+ // if there is no DW_AT_decl_file attribute.
+ //
+ // 0x0000000b: DW_TAG_compile_unit
+ // DW_AT_name ("main.cpp")
+ // DW_AT_language (DW_LANG_C)
+ // DW_AT_stmt_list (0x00000000)
+ //
+ // 0x00000015: DW_TAG_subprogram
+ // DW_AT_name ("foo")
+ // DW_AT_low_pc (0x0000000000001000)
+ // DW_AT_high_pc (0x0000000000001050)
+ //
+ // 0x0000002a: NULL
+ //
+ // Line table that has entries, but none that match "foo":
+ //
+ // Address Line Column File ISA Discriminator OpIndex Flags
+ // ------------------ ------ ------ ------ --- ------------- ------- -----
+ // 0x0000000000002000 10 0 1 0 0 0 is_stmt
+ // 0x0000000000002050 13 0 1 0 0 0 is_stmt
+
+ StringRef yamldata = R"(
+ debug_str:
+ - ''
+ - main.cpp
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_language
+ Form: DW_FORM_udata
+ - Attribute: DW_AT_stmt_list
+ Form: DW_FORM_sec_offset
+ - Code: 0x2
+ Tag: DW_TAG_subprogram
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_string
+ - Attribute: DW_AT_low_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_high_pc
+ Form: DW_FORM_addr
+ debug_info:
+ - Length: 0x27
+ Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x1
+ - Value: 0x2
+ - Value: 0x0
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0xDEADBEEFDEADBEEF
+ CStr: foo
+ - Value: 0x1000
+ - Value: 0x1050
+ - AbbrCode: 0x0
+ debug_line:
+ - Length: 58
+ Version: 2
+ PrologueLength: 31
+ MinInstLength: 1
+ DefaultIsStmt: 1
+ LineBase: 251
+ LineRange: 14
+ OpcodeBase: 13
+ StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+ Files:
+ - Name: main.cpp
+ DirIdx: 0
+ ModTime: 0
+ Length: 0
+ Opcodes:
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 9
+ SubOpcode: DW_LNE_set_address
+ Data: 8192
+ - Opcode: DW_LNS_advance_line
+ SData: 9
+ Data: 0
+ - Opcode: DW_LNS_copy
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 80
+ - Opcode: DW_LNS_advance_line
+ SData: 3
+ Data: 0
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 1
+ SubOpcode: DW_LNE_end_sequence
+ Data: 0
+ )";
+ auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata);
+ ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded());
+ std::unique_ptr<DWARFContext> DwarfContext =
+ DWARFContext::create(*ErrOrSections, 8);
+ ASSERT_TRUE(DwarfContext.get() != nullptr);
+ std::string errors;
+ raw_string_ostream OS(errors);
+ OutputAggregator OSAgg(&OS);
+ GsymCreator GC;
+ DwarfTransformer DT(*DwarfContext, GC);
+ const uint32_t ThreadCount = 1;
+ ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
+ ASSERT_THAT_ERROR(GC.finalize(OSAgg), Succeeded());
+
+ // Make sure this warning is not in the binary
+ std::string error_str("error: function DIE at 0x00000015 has an invalid file "
+ "index 4294967295 in its DW_AT_decl_file attribute");
+ EXPECT_TRUE(errors.find(error_str) == std::string::npos);
+}
diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
index b4d816e..3c6ff11 100644
--- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
@@ -266,10 +266,9 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
return;
StringRef Lang = DirLang.getName();
+ IncludeGuardEmitter IncGuard(OS, (Twine("LLVM_") + Lang + "_INC").str());
- OS << "#ifndef LLVM_" << Lang << "_INC\n";
- OS << "#define LLVM_" << Lang << "_INC\n";
- OS << "\n#include \"llvm/ADT/ArrayRef.h\"\n";
+ OS << "#include \"llvm/ADT/ArrayRef.h\"\n";
if (DirLang.hasEnableBitmaskEnumInNamespace())
OS << "#include \"llvm/ADT/BitmaskEnum.h\"\n";
@@ -370,7 +369,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
OS << "};\n";
}
LlvmNS.close();
- OS << "#endif // LLVM_" << Lang << "_INC\n";
}
// Given a list of spellings (for a given clause/directive), order them
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index a5c5426..3f8be5e 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -1310,14 +1310,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
-Transforms/SROA/phi-gep.ll
-Transforms/SROA/scalable-vectors-with-known-vscale.ll
-Transforms/SROA/select-gep.ll
-Transforms/SROA/select-load.ll
-Transforms/SROA/slice-width.ll
-Transforms/SROA/vector-conversion.ll
-Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll
-Transforms/SROA/vector-promotion.ll
Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll
Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
Transforms/StructurizeCFG/hoist-zerocost.ll
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
index c477c6c..dcc1ef9 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -315,7 +315,8 @@ bool mlir::linalg::detail::isContractionBody(
Value yielded = getSourceSkipUnary(terminator->getOperand(0));
Operation *reductionOp = yielded.getDefiningOp();
- if (reductionOp->getNumResults() != 1 || reductionOp->getNumOperands() != 2) {
+ if (!reductionOp || reductionOp->getNumResults() != 1 ||
+ reductionOp->getNumOperands() != 2) {
errs << "expected reduction op to be binary";
return false;
}
diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
index 618ba34..66cae5c 100644
--- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
+++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
@@ -1011,6 +1011,20 @@ module attributes { transform.target_tag = "start_here" } {
} -> tensor<1x1x4xf32>
return
}
+
+ func.func @generic_none(%arg0: tensor<128x128xi32>, %arg1: tensor<128x128xi32>, %arg2: tensor<128x128xi32>) {
+ %0 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
+ affine_map<(d0, d1, d2) -> (d2, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%arg0, %arg1 : tensor<128x128xi32>, tensor<128x128xi32>)
+ outs(%arg2 : tensor<128x128xi32>) {
+ ^bb0(%in: i32, %in_0: i32, %out: i32):
+ linalg.yield %out : i32
+ } -> tensor<128x128xi32>
+ return
+ }
}
// -----
diff --git a/orc-rt/include/orc-rt/ExecutorAddress.h b/orc-rt/include/orc-rt/ExecutorAddress.h
index cc7bbf5..6ec583f 100644
--- a/orc-rt/include/orc-rt/ExecutorAddress.h
+++ b/orc-rt/include/orc-rt/ExecutorAddress.h
@@ -204,6 +204,9 @@ struct ExecutorAddrRange {
constexpr bool contains(ExecutorAddr Addr) const noexcept {
return Start <= Addr && Addr < End;
}
+ constexpr bool contains(const ExecutorAddrRange &Other) const noexcept {
+ return (Other.Start >= Start && Other.End <= End);
+ }
constexpr bool overlaps(const ExecutorAddrRange &Other) const noexcept {
return !(Other.End <= Start || End <= Other.Start);
}
diff --git a/orc-rt/unittests/ExecutorAddressTest.cpp b/orc-rt/unittests/ExecutorAddressTest.cpp
index 98074a7..2e04901 100644
--- a/orc-rt/unittests/ExecutorAddressTest.cpp
+++ b/orc-rt/unittests/ExecutorAddressTest.cpp
@@ -97,10 +97,16 @@ TEST(ExecutorAddrTest, AddrRanges) {
EXPECT_FALSE(R1.contains(A0));
EXPECT_FALSE(R1.contains(A2));
+ EXPECT_TRUE(R3.contains(R0)); // True for singleton range at start.
+ EXPECT_TRUE(R3.contains(R1)); // True for singleton range at end.
+ EXPECT_FALSE(R3.contains(R2)); // False for non-overlaping singleton range.
+ EXPECT_FALSE(R3.contains(R4)); // False for overlapping, uncontained range.
+
EXPECT_FALSE(R1.overlaps(R0));
EXPECT_FALSE(R1.overlaps(R2));
EXPECT_TRUE(R1.overlaps(R3));
EXPECT_TRUE(R1.overlaps(R4));
+ EXPECT_TRUE(R3.overlaps(R4));
}
TEST(ExecutorAddrTest, Hashable) {
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index c83ab59..74d632b 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -1412,6 +1412,13 @@ td_library(
)
td_library(
+ name = "InferStridedMetadataInterfaceTdFiles",
+ srcs = ["include/mlir/Interfaces/InferStridedMetadataInterface.td"],
+ includes = ["include"],
+ deps = [":OpBaseTdFiles"],
+)
+
+td_library(
name = "InferTypeOpInterfaceTdFiles",
srcs = ["include/mlir/Interfaces/InferTypeOpInterface.td"],
includes = ["include"],
@@ -4159,6 +4166,7 @@ cc_library(
":MathToLibm",
":MathToROCDL",
":MathToSPIRV",
+ ":MathToXeVM",
":MemRefToEmitC",
":MemRefToLLVM",
":MemRefToSPIRV",
@@ -7049,6 +7057,33 @@ cc_library(
)
cc_library(
+ name = "MathToXeVM",
+ srcs = glob([
+ "lib/Conversion/MathToXeVM/*.cpp",
+ ]),
+ hdrs = glob([
+ "include/mlir/Conversion/MathToXeVM/*.h",
+ ]),
+ includes = [
+ "include",
+ "lib/Conversion/MathToXeVM",
+ ],
+ deps = [
+ ":ArithAttrToLLVMConversion",
+ ":ArithDialect",
+ ":ConversionPassIncGen",
+ ":IR",
+ ":LLVMCommonConversion",
+ ":LLVMDialect",
+ ":MathDialect",
+ ":Pass",
+ ":Transforms",
+ ":XeVMDialect",
+ "//llvm:Support",
+ ],
+)
+
+cc_library(
name = "FuncToEmitC",
srcs = glob([
"lib/Conversion/FuncToEmitC/*.cpp",
@@ -7622,6 +7657,30 @@ cc_library(
],
)
+gentbl_cc_library(
+ name = "InferStridedMetadataInterfaceIncGen",
+ tbl_outs = {
+ "include/mlir/Interfaces/InferStridedMetadataInterface.h.inc": ["-gen-op-interface-decls"],
+ "include/mlir/Interfaces/InferStridedMetadataInterface.cpp.inc": ["-gen-op-interface-defs"],
+ },
+ tblgen = ":mlir-tblgen",
+ td_file = "include/mlir/Interfaces/InferStridedMetadataInterface.td",
+ deps = [":InferStridedMetadataInterfaceTdFiles"],
+)
+
+cc_library(
+ name = "InferStridedMetadataInterface",
+ srcs = ["lib/Interfaces/InferStridedMetadataInterface.cpp"],
+ hdrs = ["include/mlir/Interfaces/InferStridedMetadataInterface.h"],
+ includes = ["include"],
+ deps = [
+ ":IR",
+ ":InferIntRangeInterface",
+ ":InferStridedMetadataInterfaceIncGen",
+ "//llvm:Support",
+ ],
+)
+
td_library(
name = "DataLayoutInterfacesTdFiles",
srcs = ["include/mlir/Interfaces/DataLayoutInterfaces.td"],
@@ -8550,9 +8609,11 @@ cc_library(
":CallOpInterfaces",
":ControlFlowInterfaces",
":DataLayoutInterfaces",
+ ":DialectUtils",
":FunctionInterfaces",
":IR",
":InferIntRangeInterface",
+ ":InferStridedMetadataInterface",
":LoopLikeInterface",
":Pass",
":SideEffectInterfaces",
@@ -12474,6 +12535,7 @@ cc_library(
":IR",
":InferIntRangeCommon",
":InferIntRangeInterface",
+ ":InferStridedMetadataInterface",
":InferTypeOpInterface",
":InliningUtils",
":Pass",
@@ -12695,6 +12757,7 @@ td_library(
":ArithOpsTdFiles",
":CastInterfacesTdFiles",
":ControlFlowInterfacesTdFiles",
+ ":InferStridedMetadataInterfaceTdFiles",
":MemOpInterfacesTdFiles",
":MemorySlotInterfacesTdFiles",
":OpBaseTdFiles",
@@ -12785,6 +12848,7 @@ cc_library(
":IR",
":InferIntRangeCommon",
":InferIntRangeInterface",
+ ":InferStridedMetadataInterface",
":InferTypeOpInterface",
":InliningUtils",
":MemOpInterfaces",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel
index b00e8f2..d8fcb53 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel
@@ -1,4 +1,5 @@
load("//llvm:lit_test.bzl", "lit_test")
+load("//llvm:targets.bzl", "llvm_targets")
licenses(["notice"])
@@ -15,6 +16,9 @@ package(default_visibility = ["//visibility:public"])
)
for src in glob(
include = ["**/*.mlir"],
- exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"],
+ exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"] + (
+ # MathToXeVM needs SPIRV; see MathToXeVM/lit.local.cfg
+ ["MathToXeVM/**"] if "SPIRV" not in llvm_targets else []
+ ),
)
]