diff options
88 files changed, 2696 insertions, 595 deletions
diff --git a/.ci/generate_test_report_lib.py b/.ci/generate_test_report_lib.py index 5026c29..36c9585 100644 --- a/.ci/generate_test_report_lib.py +++ b/.ci/generate_test_report_lib.py @@ -98,6 +98,23 @@ def _format_ninja_failures(ninja_failures: list[tuple[str, str]]) -> list[str]: ) return output +def get_failures(junit_objects) -> dict[str, list[tuple[str, str]]]: + failures = {} + for results in junit_objects: + for testsuite in results: + for test in testsuite: + if ( + not test.is_passed + and test.result + and isinstance(test.result[0], Failure) + ): + if failures.get(testsuite.name) is None: + failures[testsuite.name] = [] + failures[testsuite.name].append( + (test.classname + "/" + test.name, test.result[0].text) + ) + return failures + # Set size_limit to limit the byte size of the report. The default is 1MB as this # is the most that can be put into an annotation. If the generated report exceeds @@ -113,7 +130,7 @@ def generate_report( size_limit=1024 * 1024, list_failures=True, ): - failures = {} + failures = get_failures(junit_objects) tests_run = 0 tests_skipped = 0 tests_failed = 0 @@ -124,18 +141,6 @@ def generate_report( tests_skipped += testsuite.skipped tests_failed += testsuite.failures - for test in testsuite: - if ( - not test.is_passed - and test.result - and isinstance(test.result[0], Failure) - ): - if failures.get(testsuite.name) is None: - failures[testsuite.name] = [] - failures[testsuite.name].append( - (test.classname + "/" + test.name, test.result[0].text) - ) - report = [f"# {title}", ""] if tests_run == 0: @@ -258,7 +263,7 @@ def generate_report( return report -def generate_report_from_files(title, return_code, build_log_files): +def load_info_from_files(build_log_files): junit_files = [ junit_file for junit_file in build_log_files if junit_file.endswith(".xml") ] @@ -271,6 +276,9 @@ def generate_report_from_files(title, return_code, build_log_files): ninja_logs.append( [log_line.strip() for log_line in ninja_log_file_handle.readlines()] ) - return generate_report( - title, return_code, [JUnitXml.fromfile(p) for p in junit_files], ninja_logs - ) + return [JUnitXml.fromfile(p) for p in junit_files], ninja_logs + + +def generate_report_from_files(title, return_code, build_log_files): + junit_objects, ninja_logs = load_info_from_files(build_log_files) + return generate_report(title, return_code, junit_objects, ninja_logs) diff --git a/clang/docs/PointerAuthentication.rst b/clang/docs/PointerAuthentication.rst index 96eb498..7e65f4b 100644 --- a/clang/docs/PointerAuthentication.rst +++ b/clang/docs/PointerAuthentication.rst @@ -592,6 +592,36 @@ The result value is never zero and always within range for both the This can be used in constant expressions. +``ptrauth_type_discriminator`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + ptrauth_type_discriminator(type) + +Compute the constant discriminator derived from the given type, as is computed +for automatically type diversified schemas. + +``type`` must be a type. The result has the type ``ptrauth_extra_data_t``. + +This can be used in constant expressions. + +``ptrauth_function_pointer_type_discriminator`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + ptrauth_function_pointer_type_discriminator(function_type) + +Compute the constant discriminator derived from the provided function type, for +use in contexts where the default function authentication schema. If function +pointer type diversity is enabled, this is equivalent to +`ptrauth_type_discriminator(function_type)`, if it is not enabled this is `0`. + +``function_type`` must be a function type. The result has the type ``ptrauth_extra_data_t``. + +This can be used in constant expressions. + ``ptrauth_strip`` ^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index edb872c..79ff821 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -191,6 +191,8 @@ C23 Feature Support - Added ``FLT_SNAN``, ``DBL_SNAN``, and ``LDBL_SNAN`` to Clang's ``<float.h>`` header in C23 and later modes. This implements `WG14 N2710 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2710.htm>`_. +- Fixed accepting as compatible unnamed tag types with the same fields within + the same translation unit but from different types. Non-comprehensive list of changes in this release ------------------------------------------------- diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a8b41ba..3603e9cd 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11581,6 +11581,12 @@ QualType ASTContext::mergeTagDefinitions(QualType LHS, QualType RHS) { if (LangOpts.CPlusPlus || !LangOpts.C23) return {}; + // Nameless tags are comparable only within outer definitions. At the top + // level they are not comparable. + const TagDecl *LTagD = LHS->castAsTagDecl(), *RTagD = RHS->castAsTagDecl(); + if (!LTagD->getIdentifier() || !RTagD->getIdentifier()) + return {}; + // C23, on the other hand, requires the members to be "the same enough", so // we use a structural equivalence check. StructuralEquivalenceContext::NonEquivalentDeclSet NonEquivalentDecls; diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index 1557346..b17cd6f 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -1763,19 +1763,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, // another anonymous structure or union, respectively, if their members // fulfill the preceding requirements. ... Otherwise, the structure, union, // or enumerated types are incompatible. - - // Note: "the same tag" refers to the identifier for the structure; two - // structures without names are not compatible within a TU. In C23, if either - // declaration has no name, they're not equivalent. However, the paragraph - // after the bulleted list goes on to talk about compatibility of anonymous - // structure and union members, so this prohibition only applies to top-level - // declarations; if either declaration is not a member, they cannot be - // compatible. - if (Context.LangOpts.C23 && (!D1->getIdentifier() || !D2->getIdentifier()) && - (!D1->getDeclContext()->isRecord() || !D2->getDeclContext()->isRecord())) - return false; - - // Otherwise, check the names for equivalence. if (!NameIsStructurallyEquivalent(*D1, *D2)) return false; diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index cfd48a2..5ba64dd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -536,7 +536,7 @@ mlir::LogicalResult CIRGenFunction::emitLabel(const clang::LabelDecl &d) { mlir::Block *currBlock = builder.getBlock(); mlir::Block *labelBlock = currBlock; - if (!currBlock->empty()) { + if (!currBlock->empty() || currBlock->isEntryBlock()) { { mlir::OpBuilder::InsertionGuard guard(builder); labelBlock = builder.createBlock(builder.getBlock()->getParent()); diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h index f902ca1..ad28f06 100644 --- a/clang/lib/Headers/ptrauth.h +++ b/clang/lib/Headers/ptrauth.h @@ -241,6 +241,18 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; #define ptrauth_type_discriminator(__type) \ __builtin_ptrauth_type_discriminator(__type) +/* Compute the constant discriminator used by Clang to sign pointers with the + given C function pointer type. + + A call to this function is an integer constant expression. */ +#if __has_feature(ptrauth_function_pointer_type_discrimination) +#define ptrauth_function_pointer_type_discriminator(__type) \ + __builtin_ptrauth_type_discriminator(__type) +#else +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) +#endif + /* Compute a signature for the given pair of pointer-sized values. The order of the arguments is significant. @@ -372,6 +384,8 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; }) #define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0) +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) #define ptrauth_sign_generic_data(__value, __data) \ ({ \ diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index 66cfccb..c1a5000 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -26,6 +26,7 @@ bool tryToFindPtrOrigin( const Expr *E, bool StopAtFirstRefCountedObj, std::function<bool(const clang::CXXRecordDecl *)> isSafePtr, std::function<bool(const clang::QualType)> isSafePtrType, + std::function<bool(const clang::Decl *)> isSafeGlobalDecl, std::function<bool(const clang::Expr *, bool)> callback) { while (E) { if (auto *DRE = dyn_cast<DeclRefExpr>(E)) { @@ -34,6 +35,8 @@ bool tryToFindPtrOrigin( auto IsImmortal = safeGetName(VD) == "NSApp"; if (VD->hasGlobalStorage() && (IsImmortal || QT.isConstQualified())) return callback(E, true); + if (VD->hasGlobalStorage() && isSafeGlobalDecl(VD)) + return callback(E, true); } } if (auto *tempExpr = dyn_cast<MaterializeTemporaryExpr>(E)) { @@ -71,9 +74,11 @@ bool tryToFindPtrOrigin( } if (auto *Expr = dyn_cast<ConditionalOperator>(E)) { return tryToFindPtrOrigin(Expr->getTrueExpr(), StopAtFirstRefCountedObj, - isSafePtr, isSafePtrType, callback) && + isSafePtr, isSafePtrType, isSafeGlobalDecl, + callback) && tryToFindPtrOrigin(Expr->getFalseExpr(), StopAtFirstRefCountedObj, - isSafePtr, isSafePtrType, callback); + isSafePtr, isSafePtrType, isSafeGlobalDecl, + callback); } if (auto *cast = dyn_cast<CastExpr>(E)) { if (StopAtFirstRefCountedObj) { @@ -93,7 +98,8 @@ bool tryToFindPtrOrigin( if (auto *call = dyn_cast<CallExpr>(E)) { if (auto *Callee = call->getCalleeDecl()) { if (Callee->hasAttr<CFReturnsRetainedAttr>() || - Callee->hasAttr<NSReturnsRetainedAttr>()) { + Callee->hasAttr<NSReturnsRetainedAttr>() || + Callee->hasAttr<NSReturnsAutoreleasedAttr>()) { return callback(E, true); } } diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h index 3a009d6..9fff456 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h @@ -56,6 +56,7 @@ bool tryToFindPtrOrigin( const clang::Expr *E, bool StopAtFirstRefCountedObj, std::function<bool(const clang::CXXRecordDecl *)> isSafePtr, std::function<bool(const clang::QualType)> isSafePtrType, + std::function<bool(const clang::Decl *)> isSafeGlobalDecl, std::function<bool(const clang::Expr *, bool)> callback); /// For \p E referring to a ref-countable/-counted pointer/reference we return diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp index 9585ceb..791e709 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp @@ -29,12 +29,12 @@ namespace { class RawPtrRefCallArgsChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { BugType Bug; - mutable BugReporter *BR; TrivialFunctionAnalysis TFA; EnsureFunctionAnalysis EFA; protected: + mutable BugReporter *BR; mutable std::optional<RetainTypeChecker> RTC; public: @@ -46,6 +46,7 @@ public: virtual bool isSafePtr(const CXXRecordDecl *Record) const = 0; virtual bool isSafePtrType(const QualType type) const = 0; virtual bool isSafeExpr(const Expr *) const { return false; } + virtual bool isSafeDecl(const Decl *) const { return false; } virtual const char *ptrKind() const = 0; void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, @@ -214,6 +215,7 @@ public: Arg, /*StopAtFirstRefCountedObj=*/true, [&](const clang::CXXRecordDecl *Record) { return isSafePtr(Record); }, [&](const clang::QualType T) { return isSafePtrType(T); }, + [&](const clang::Decl *D) { return isSafeDecl(D); }, [&](const clang::Expr *ArgOrigin, bool IsSafe) { if (IsSafe) return true; @@ -479,6 +481,11 @@ public: isa<ObjCMessageExpr>(E); } + bool isSafeDecl(const Decl *D) const final { + // Treat NS/CF globals in system header as immortal. + return BR->getSourceManager().isInSystemHeader(D->getLocation()); + } + const char *ptrKind() const final { return "unretained"; } }; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp index dd9701f..c13df479 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp @@ -166,10 +166,10 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded, class RawPtrRefLocalVarsChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { BugType Bug; - mutable BugReporter *BR; EnsureFunctionAnalysis EFA; protected: + mutable BugReporter *BR; mutable std::optional<RetainTypeChecker> RTC; public: @@ -180,6 +180,7 @@ public: virtual bool isSafePtr(const CXXRecordDecl *) const = 0; virtual bool isSafePtrType(const QualType) const = 0; virtual bool isSafeExpr(const Expr *) const { return false; } + virtual bool isSafeDecl(const Decl *) const { return false; } virtual const char *ptrKind() const = 0; void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, @@ -288,6 +289,7 @@ public: return isSafePtr(Record); }, [&](const clang::QualType Type) { return isSafePtrType(Type); }, + [&](const clang::Decl *D) { return isSafeDecl(D); }, [&](const clang::Expr *InitArgOrigin, bool IsSafe) { if (!InitArgOrigin || IsSafe) return true; @@ -443,6 +445,10 @@ public: return ento::cocoa::isCocoaObjectRef(E->getType()) && isa<ObjCMessageExpr>(E); } + bool isSafeDecl(const Decl *D) const final { + // Treat NS/CF globals in system header as immortal. + return BR->getSourceManager().isInSystemHeader(D->getLocation()); + } const char *ptrKind() const final { return "unretained"; } }; diff --git a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h index 1e44de8..d55b3ab 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h @@ -34,6 +34,8 @@ void os_log_msg(os_log_t oslog, os_log_type_t type, const char *msg, ...); typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStringRef; +extern CFStringRef const kCFURLTagNamesKey; + #ifdef __OBJC__ @class NSString; @interface SystemObject { @@ -41,4 +43,8 @@ typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStrin CFStringRef cf_string; } @end + +typedef NSString *NSNotificationName; +extern "C" NSNotificationName NSApplicationDidBecomeActiveNotification; + #endif diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm index a517dbc..5dc3b38 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm @@ -567,6 +567,17 @@ struct Derived : Base { } // namespace ns_retained_return_value +namespace autoreleased { + +NSString *provideAutoreleased() __attribute__((ns_returns_autoreleased)); +void consume(NSString *); + +void foo() { + consume(provideAutoreleased()); +} + +} // autoreleased + @interface TestObject : NSObject - (void)doWork:(NSString *)msg, ...; - (void)doWorkOnSelf; diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm index 307a4d03..f49e7bd 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm @@ -1,8 +1,11 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedLocalVarsChecker -verify %s #import "objc-mock-types.h" +#import "mock-system-header.h" void someFunction(); +extern "C" CFStringRef LocalGlobalCFString; +extern "C" NSString *LocalGlobalNSString; namespace raw_ptr { void foo() { @@ -535,6 +538,41 @@ unsigned foo() { } // namespace ns_retained_return_value +namespace autoreleased { + +NSString *provideAutoreleased() __attribute__((ns_returns_autoreleased)); +void consume(NSString *); + +void foo() { + auto *string = provideAutoreleased(); + consume(string); +} + +} // autoreleased + +namespace ns_global { + +void consumeCFString(CFStringRef); +void consumeNSString(NSString *); + +void cf() { + auto *str = kCFURLTagNamesKey; + consumeCFString(str); + auto *localStr = LocalGlobalCFString; + // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}} + consumeCFString(localStr); +} + +void ns() { + auto *str = NSApplicationDidBecomeActiveNotification; + consumeNSString(str); + auto *localStr = LocalGlobalNSString; + // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}} + consumeNSString(localStr); +} + +} + bool doMoreWorkOpaque(OtherObj*); SomeObj* provide(); diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm new file mode 100644 index 0000000..5c78b21 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm @@ -0,0 +1,18 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedCallArgsChecker -verify %s + +#import "mock-types.h" +#import "mock-system-header.h" + +void consumeCFString(CFStringRef); +extern "C" CFStringRef LocalGlobalCFString; +void consumeNSString(NSString *); +extern "C" NSString *LocalGlobalNSString; + +void foo() { + consumeCFString(kCFURLTagNamesKey); + consumeCFString(LocalGlobalCFString); + // expected-warning@-1{{Call argument is unretained and unsafe}} + consumeNSString(NSApplicationDidBecomeActiveNotification); + consumeNSString(LocalGlobalNSString); + // expected-warning@-1{{Call argument is unretained and unsafe}} +} diff --git a/clang/test/C/C23/n3037.c b/clang/test/C/C23/n3037.c index 3748375..113ecf7 100644 --- a/clang/test/C/C23/n3037.c +++ b/clang/test/C/C23/n3037.c @@ -30,11 +30,24 @@ void func2(PRODUCT(int, SUM(float, double)) y) { // c17-warning {{declaration of struct foop { struct { int x; }; }; // c17-note {{previous definition is here}} struct foop { struct { int x; }; }; // c17-error {{redefinition of 'foop'}} +// Test the field lookup compatibility isn't sufficient, the structure of types should be compatible. +struct AnonymousStructNotMatchingFields { // c17-note {{previous definition is here}} + struct { // c23-note {{field has name '' here}} + int x; + }; +}; +struct AnonymousStructNotMatchingFields { // c23-error {{type 'struct AnonymousStructNotMatchingFields' has incompatible definitions}} \ + c17-error {{redefinition of 'AnonymousStructNotMatchingFields'}} + int x; // c23-note {{field has name 'x' here}} +}; + union barp { int x; float y; }; // c17-note {{previous definition is here}} union barp { int x; float y; }; // c17-error {{redefinition of 'barp'}} typedef struct q { int x; } q_t; // c17-note 2 {{previous definition is here}} typedef struct q { int x; } q_t; // c17-error {{redefinition of 'q'}} \ c17-error-re {{typedef redefinition with different types ('struct (unnamed struct at {{.*}})' vs 'struct q')}} +typedef struct { int x; } untagged_q_t; // both-note {{previous definition is here}} +typedef struct { int x; } untagged_q_t; // both-error {{typedef redefinition with different types}} void func3(void) { struct S { int x; }; // c17-note {{previous definition is here}} struct T { struct S s; }; // c17-note {{previous definition is here}} @@ -389,13 +402,40 @@ void nontag_both_in_params(struct { int i; } Arg1, struct { int i; } Arg2) { _Static_assert(0 == _Generic(__typeof__(Arg1), __typeof__(Arg2) : 1, default : 0)); // both-warning {{passing a type argument as the first operand to '_Generic' is a C2y extension}} } -struct InnerAnonStruct { +struct InnerUnnamedStruct { struct { int i; } untagged; -} inner_anon_tagged; +} inner_unnamed_tagged; +_Static_assert(0 == _Generic(inner_unnamed_tagged.untagged, struct { int i; } : 1, default : 0)); -_Static_assert(0 == _Generic(inner_anon_tagged.untagged, struct { int i; } : 1, default : 0)); +struct InnerUnnamedStruct_same { + struct { + int i; + } untagged; +}; +struct InnerUnnamedStruct_differentNaming { + struct { + int i; + } untaggedDifferent; +}; +struct InnerUnnamedStruct_differentShape { + float x; + struct { + int i; + } untagged; + int y; +}; +void compare_unnamed_struct_from_different_outer_type( + struct InnerUnnamedStruct sameOuterType, + struct InnerUnnamedStruct_same matchingType, + struct InnerUnnamedStruct_differentNaming differentFieldName, + struct InnerUnnamedStruct_differentShape differentType) { + inner_unnamed_tagged.untagged = sameOuterType.untagged; + inner_unnamed_tagged.untagged = matchingType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}} + inner_unnamed_tagged.untagged = differentFieldName.untaggedDifferent; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}} + inner_unnamed_tagged.untagged = differentType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}} +} // Test the same thing with enumerations (test for unions is omitted because // unions and structures are both RecordDecl objects, whereas EnumDecl is not). diff --git a/clang/test/CIR/CodeGen/goto.cpp b/clang/test/CIR/CodeGen/goto.cpp index 48cb44e..257c255 100644 --- a/clang/test/CIR/CodeGen/goto.cpp +++ b/clang/test/CIR/CodeGen/goto.cpp @@ -205,6 +205,8 @@ extern "C" void case_follow_label(int v) { // CIR: cir.func dso_local @case_follow_label // CIR: cir.switch // CIR: cir.case(equal, [#cir.int<1> : !s32i]) { +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "label" // CIR: cir.case(equal, [#cir.int<2> : !s32i]) { // CIR: cir.call @action1() @@ -215,9 +217,11 @@ extern "C" void case_follow_label(int v) { // LLVM: define dso_local void @case_follow_label // LLVM: switch i32 {{.*}}, label %[[SWDEFAULT:.*]] [ -// LLVM: i32 1, label %[[LABEL:.*]] +// LLVM: i32 1, label %[[CASE1:.*]] // LLVM: i32 2, label %[[CASE2:.*]] // LLVM: ] +// LLVM: [[CASE1]]: +// LLVM: br label %[[LABEL:.*]] // LLVM: [[LABEL]]: // LLVM: br label %[[CASE2]] // LLVM: [[CASE2]]: @@ -303,3 +307,24 @@ extern "C" void default_follow_label(int v) { // OGCG: br label %label // OGCG: sw.epilog: // OGCG: ret void + +void g3() { +label: + goto label; +} + +// CIR: cir.func dso_local @_Z2g3v +// CIR: cir.br ^bb1 +// CIR: ^bb1: +// CIR: cir.label "label" +// CIR: cir.goto "label" + +// LLVM: define dso_local void @_Z2g3v() +// LLVM: br label %1 +// LLVM: 1: +// LLVM: br label %1 + +// OGCG: define dso_local void @_Z2g3v() +// OGCG: br label %label +// OGCG: label: +// OGCG: br label %label diff --git a/clang/test/CIR/CodeGen/label.c b/clang/test/CIR/CodeGen/label.c index a050094..f5345ef 100644 --- a/clang/test/CIR/CodeGen/label.c +++ b/clang/test/CIR/CodeGen/label.c @@ -11,10 +11,14 @@ labelA: } // CIR: cir.func no_proto dso_local @label +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "labelA" // CIR: cir.return // LLVM:define dso_local void @label +// LLVM: br label %1 +// LLVM: 1: // LLVM: ret void // OGCG: define dso_local void @label @@ -29,15 +33,19 @@ labelC: } // CIR: cir.func no_proto dso_local @multiple_labels -// CIR: cir.label "labelB" // CIR: cir.br ^bb1 -// CIR: ^bb1: // pred: ^bb0 +// CIR: ^bb1: +// CIR: cir.label "labelB" +// CIR: cir.br ^bb2 +// CIR: ^bb2: // CIR: cir.label "labelC" // CIR: cir.return // LLVM: define dso_local void @multiple_labels() // LLVM: br label %1 // LLVM: 1: +// LLVM: br label %2 +// LLVM: 2: // LLVM: ret void // OGCG: define dso_local void @multiple_labels @@ -56,6 +64,8 @@ labelD: // CIR: cir.func dso_local @label_in_if // CIR: cir.if {{.*}} { +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "labelD" // CIR: [[LOAD:%.*]] = cir.load align(4) [[COND:%.*]] : !cir.ptr<!s32i>, !s32i // CIR: [[INC:%.*]] = cir.unary(inc, %3) nsw : !s32i, !s32i @@ -68,15 +78,17 @@ labelD: // LLVM: 3: // LLVM: [[LOAD:%.*]] = load i32, ptr [[COND:%.*]], align 4 // LLVM: [[CMP:%.*]] = icmp ne i32 [[LOAD]], 0 -// LLVM: br i1 [[CMP]], label %6, label %9 +// LLVM: br i1 [[CMP]], label %6, label %10 // LLVM: 6: +// LLVM: br label %7 +// LLVM: 7: // LLVM: [[LOAD2:%.*]] = load i32, ptr [[COND]], align 4 // LLVM: [[ADD1:%.*]] = add nsw i32 [[LOAD2]], 1 // LLVM: store i32 [[ADD1]], ptr [[COND]], align 4 -// LLVM: br label %9 -// LLVM: 9: // LLVM: br label %10 // LLVM: 10: +// LLVM: br label %11 +// LLVM: 11: // LLVM: ret void // OGCG: define dso_local void @label_in_if @@ -142,11 +154,15 @@ end: return; } // CIR: cir.func no_proto dso_local @labelWithoutMatch +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "end" // CIR: cir.return // CIR: } // LLVM: define dso_local void @labelWithoutMatch +// LLVM: br label %1 +// LLVM: 1: // LLVM: ret void // OGCG: define dso_local void @labelWithoutMatch @@ -167,13 +183,17 @@ void foo() { // CIR: cir.func no_proto dso_local @foo // CIR: cir.scope { -// CIR: cir.label "label" // CIR: %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["agg.tmp0"] +// CIR: cir.br ^bb1 +// CIR: ^bb1: +// CIR: cir.label "label" // LLVM:define dso_local void @foo() { // LLVM: [[ALLOC:%.*]] = alloca %struct.S, i64 1, align 1 // LLVM: br label %2 // LLVM:2: +// LLVM: br label %3 +// LLVM:3: // LLVM: [[CALL:%.*]] = call %struct.S @get() // LLVM: store %struct.S [[CALL]], ptr [[ALLOC]], align 1 // LLVM: [[LOAD:%.*]] = load %struct.S, ptr [[ALLOC]], align 1 diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl deleted file mode 100644 index f48521b..0000000 --- a/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl +++ /dev/null @@ -1,70 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL -// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPIRV - -// DXIL: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", i16, 1, 0, 1) } -// DXIL: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", i16, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.1" = type { target("dx.TypedBuffer", i32, 1, 0, 1) } -// DXIL: %"class.hlsl::RWBuffer.2" = type { target("dx.TypedBuffer", i32, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.3" = type { target("dx.TypedBuffer", i64, 1, 0, 1) } -// DXIL: %"class.hlsl::RWBuffer.4" = type { target("dx.TypedBuffer", i64, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.5" = type { target("dx.TypedBuffer", half, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.6" = type { target("dx.TypedBuffer", float, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.7" = type { target("dx.TypedBuffer", double, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.8" = type { target("dx.TypedBuffer", <4 x i16>, 1, 0, 1) } -// DXIL: %"class.hlsl::RWBuffer.9" = type { target("dx.TypedBuffer", <3 x i32>, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.10" = type { target("dx.TypedBuffer", <2 x half>, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.11" = type { target("dx.TypedBuffer", <3 x float>, 1, 0, 0) } -// DXIL: %"class.hlsl::RWBuffer.12" = type { target("dx.TypedBuffer", <4 x i32>, 1, 0, 1) } - -// SPIRV: %"class.hlsl::RWBuffer" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 24) } -// SPIRV: %"class.hlsl::RWBuffer.2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) } -// SPIRV: %"class.hlsl::RWBuffer.3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 2, 41) } -// SPIRV: %"class.hlsl::RWBuffer.4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 2, 40) } -// SPIRV: %"class.hlsl::RWBuffer.5" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.6" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 3) } -// SPIRV: %"class.hlsl::RWBuffer.7" = type { target("spirv.Image", double, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.10" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.11" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 0) } -// SPIRV: %"class.hlsl::RWBuffer.12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 21) } - -RWBuffer<int16_t> BufI16; -RWBuffer<uint16_t> BufU16; -RWBuffer<int> BufI32; -RWBuffer<uint> BufU32; -RWBuffer<int64_t> BufI64; -RWBuffer<uint64_t> BufU64; -RWBuffer<half> BufF16; -RWBuffer<float> BufF32; -RWBuffer<double> BufF64; -RWBuffer< vector<int16_t, 4> > BufI16x4; -RWBuffer< vector<uint, 3> > BufU32x3; -RWBuffer<half2> BufF16x2; -RWBuffer<float3> BufF32x3; -RWBuffer<int4> BufI32x4; -// TODO: RWBuffer<snorm half> BufSNormF16; -> 11 -// TODO: RWBuffer<unorm half> BufUNormF16; -> 12 -// TODO: RWBuffer<snorm float> BufSNormF32; -> 13 -// TODO: RWBuffer<unorm float> BufUNormF32; -> 14 -// TODO: RWBuffer<snorm double> BufSNormF64; -> 15 -// TODO: RWBuffer<unorm double> BufUNormF64; -> 16 - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { - BufI16[GI] = 0; - BufU16[GI] = 0; - BufI32[GI] = 0; - BufU32[GI] = 0; - BufI64[GI] = 0; - BufU64[GI] = 0; - BufF16[GI] = 0; - BufF32[GI] = 0; - BufF64[GI] = 0; - BufI16x4[GI] = 0; - BufU32x3[GI] = 0; - BufF16x2[GI] = 0; - BufF32x3[GI] = 0; -} diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl deleted file mode 100644 index 0de171c..0000000 --- a/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl +++ /dev/null @@ -1,26 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=DXC,CHECK -// RUN: %clang_cc1 -triple spirv1.6-pc-vulkan1.3-compute -fspv-use-unknown-image-format -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=SPIRV,CHECK - -RWBuffer<int> In; -RWBuffer<int> Out; - -[numthreads(1,1,1)] -void main(unsigned GI : SV_GroupIndex) { - // CHECK: define void @main() - - // DXC: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) - // SPIRV: %[[INPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) - // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]] - // DXC: %[[OUTPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) - // SPIRV: %[[OUTPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) - // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]] - Out[GI] = In[GI]; - - // DXC: %[[INPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) - // SPIRV: %[[INPTR:.*]] = call ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) - // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]] - // DXC: %[[OUTPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) - // SPIRV: %[[OUTPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) - // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]] - Out[GI] = In.Load(GI); -} diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-constructor.hlsl index ca33c42..1ec9f0f 100644 --- a/clang/test/CodeGenHLSL/resources/RWBuffer-constructor.hlsl +++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-constructor.hlsl @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | \ -// RUN: llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL +// RUN: llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL // FIXME: SPIR-V codegen of llvm.spv.resource.handlefrombinding and resource types is not yet implemented // RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | \ // llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV @@ -14,7 +14,7 @@ RWBuffer<float> Buf1 : register(u5, space3); // Resource with implicit binding -RWBuffer<double> Buf2; +Buffer<double> Buf2; export void foo() { // Local resource declaration @@ -22,12 +22,12 @@ export void foo() { } // CHECK: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", float, 1, 0, 0) } -// CHECK: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", double, 1, 0, 0) } -// CHECK: %"class.hlsl::RWBuffer.1" = type { target("dx.TypedBuffer", i32, 1, 0, 1) } +// CHECK: %"class.hlsl::Buffer" = type { target("dx.TypedBuffer", double, 0, 0, 0) } +// CHECK: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", i32, 1, 0, 1) } // CHECK: @Buf1 = internal global %"class.hlsl::RWBuffer" poison, align 4 // CHECK: @[[Buf1Str:.*]] = private unnamed_addr constant [5 x i8] c"Buf1\00", align 1 -// CHECK: @Buf2 = internal global %"class.hlsl::RWBuffer.0" poison, align 4 +// CHECK: @Buf2 = internal global %"class.hlsl::Buffer" poison, align 4 // CHECK: @[[Buf2Str:.*]] = private unnamed_addr constant [5 x i8] c"Buf2\00", align 1 // Buf1 initialization part 1 - global init function that calls RWBuffer<float>::__createFromBinding @@ -50,24 +50,24 @@ export void foo() { // Buf2 initialization part 1 - global init function that RWBuffer<float>::__createFromImplicitBinding // CHECK: define internal void @__cxx_global_var_init.1() // CHECK-NEXT: entry: -// CHECK-NEXT: call void @hlsl::RWBuffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) +// CHECK-NEXT: call void @hlsl::Buffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) // CHECK-SAME: (ptr {{.*}} @Buf2, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 0, ptr noundef @[[Buf2Str]]) -// Buf2 initialization part 2 - body of RWBuffer<float>::__createFromImplicitBinding call -// CHECK: define linkonce_odr hidden void @hlsl::RWBuffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) -// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWBuffer.0") align 4 %[[RetValue2:.*]], i32 noundef %orderId, +// Buf2 initialization part 2 - body of Buffer<double>::__createFromImplicitBinding call +// CHECK: define linkonce_odr hidden void @hlsl::Buffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) +// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::Buffer") align 4 %[[RetValue2:.*]], i32 noundef %orderId, // CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, ptr noundef %name) -// CHECK: %[[Tmp2:.*]] = alloca %"class.hlsl::RWBuffer.0", align 4 -// CHECK: %[[Handle2:.*]] = call target("dx.TypedBuffer", double, 1, 0, 0) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.TypedBuffer_f64_1_0_0t( -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.0", ptr %[[Tmp2]], i32 0, i32 0 -// CHECK-DXIL: store target("dx.TypedBuffer", double, 1, 0, 0) %[[Handle2]], ptr %__handle, align 4 -// CHECK: call void @hlsl::RWBuffer<double>::RWBuffer(hlsl::RWBuffer<double> const&)(ptr {{.*}} %[[RetValue2]], ptr {{.*}} %[[Tmp2]]) +// CHECK: %[[Tmp2:.*]] = alloca %"class.hlsl::Buffer", align 4 +// CHECK: %[[Handle2:.*]] = call target("dx.TypedBuffer", double, 0, 0, 0) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.TypedBuffer_f64_0_0_0t( +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %[[Tmp2]], i32 0, i32 0 +// CHECK-DXIL: store target("dx.TypedBuffer", double, 0, 0, 0) %[[Handle2]], ptr %__handle, align 4 +// CHECK: call void @hlsl::Buffer<double>::Buffer(hlsl::Buffer<double> const&)(ptr {{.*}} %[[RetValue2]], ptr {{.*}} %[[Tmp2]]) // Buf3 initialization part 1 - local variable declared in function foo() is initialized by RWBuffer<int> C1 default constructor // CHECK: define void @foo() // CHECK-NEXT: entry: -// CHECK-NEXT: %Buf3 = alloca %"class.hlsl::RWBuffer.1", align 4 +// CHECK-NEXT: %Buf3 = alloca %"class.hlsl::RWBuffer.0", align 4 // CHECK-NEXT: call void @hlsl::RWBuffer<int>::RWBuffer()(ptr {{.*}} %Buf3) // Buf3 initialization part 2 - body of RWBuffer<int> default C1 constructor that calls the default C2 constructor @@ -76,11 +76,11 @@ export void foo() { // Buf3 initialization part 3 - body of RWBuffer<int> default C2 constructor that initializes handle to poison // CHECK: define linkonce_odr hidden void @hlsl::RWBuffer<int>::RWBuffer()(ptr {{.*}} %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.1", ptr %{{.*}}, i32 0, i32 0 +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.0", ptr %{{.*}}, i32 0, i32 0 // CHECK-NEXT: store target("dx.TypedBuffer", i32, 1, 0, 1) poison, ptr %__handle, align 4 // Module initialization -// CHECK: define internal void @_GLOBAL__sub_I_RWBuffer_constructor.hlsl() +// CHECK: define internal void @_GLOBAL__sub_I_TypedBuffers_constructor.hlsl() // CHECK-NEXT: entry: // CHECK-NEXT: call void @__cxx_global_var_init() // CHECK-NEXT: call void @__cxx_global_var_init.1() diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl new file mode 100644 index 0000000..d3dba8a --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl @@ -0,0 +1,94 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=Buffer %s | FileCheck %s -DRESOURCE=Buffer -DRW=0 -check-prefixes=DXIL + +// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=Buffer %s | FileCheck %s -DRESOURCE=Buffer -DRW=1 -check-prefixes=SPV-RO + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=RWBuffer %s | FileCheck %s -DRESOURCE=RWBuffer -DRW=1 -check-prefixes=DXIL + +// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=RWBuffer %s | FileCheck %s -DRESOURCE=RWBuffer --DRW=2 -check-prefixes=SPV-RW + +// DXIL: %"class.hlsl::[[RESOURCE]]" = type { target("dx.TypedBuffer", i16, [[RW]], 0, 1) } +// DXIL: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.TypedBuffer", i16, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.TypedBuffer", i32, [[RW]], 0, 1) } +// DXIL: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.TypedBuffer", i32, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.TypedBuffer", i64, [[RW]], 0, 1) } +// DXIL: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.TypedBuffer", i64, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.TypedBuffer", half, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.TypedBuffer", float, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.TypedBuffer", double, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.TypedBuffer", <4 x i16>, [[RW]], 0, 1) } +// DXIL: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.TypedBuffer", <3 x i32>, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.TypedBuffer", <2 x half>, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.TypedBuffer", <3 x float>, [[RW]], 0, 0) } +// DXIL: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.TypedBuffer", <4 x i32>, [[RW]], 0, 1) } + +// SPV-RO: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.Image", half, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.Image", float, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.Image", double, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.Image", half, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.Image", float, 5, 2, 0, 0, 1, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) } + +// SPV-RW: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 24) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 2, 41) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 2, 40) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 3) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.Image", double, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 0) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 21) } + +RESOURCE<int16_t> BufI16; +RESOURCE<uint16_t> BufU16; +RESOURCE<int> BufI32; +RESOURCE<uint> BufU32; +RESOURCE<int64_t> BufI64; +RESOURCE<uint64_t> BufU64; +RESOURCE<half> BufF16; +RESOURCE<float> BufF32; +RESOURCE<double> BufF64; +RESOURCE< vector<int16_t, 4> > BufI16x4; +RESOURCE< vector<uint, 3> > BufU32x3; +RESOURCE<half2> BufF16x2; +RESOURCE<float3> BufF32x3; +RESOURCE<int4> BufI32x4; +// TODO: RESOURCE<snorm half> BufSNormF16; -> 11 +// TODO: RESOURCE<unorm half> BufUNormF16; -> 12 +// TODO: RESOURCE<snorm float> BufSNormF32; -> 13 +// TODO: RESOURCE<unorm float> BufUNormF32; -> 14 +// TODO: RESOURCE<snorm double> BufSNormF64; -> 15 +// TODO: RESOURCE<unorm double> BufUNormF64; -> 16 + +[numthreads(1,1,1)] +void main(int GI : SV_GroupIndex) { + int16_t v1 = BufI16[GI]; + uint16_t v2 = BufU16[GI]; + int v3 = BufI32[GI]; + uint v4 = BufU32[GI]; + int64_t v5 = BufI64[GI]; + uint64_t v6 = BufU64[GI]; + half v7 = BufF16[GI]; + float v8 = BufF32[GI]; + double v9 = BufF64[GI]; + vector<int16_t,4> v10 = BufI16x4[GI]; + vector<int, 3> v11 = BufU32x3[GI]; + half2 v12 = BufF16x2[GI]; + float3 v13 = BufF32x3[GI]; +} diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl new file mode 100644 index 0000000..b153bda --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL +// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPIRV + +// NOTE: SPIRV codegen for resource methods is not yet implemented + +Buffer<float> Buf : register(t0); +RWBuffer<uint4> RWBuf : register(u0); + +// DXIL: %"class.hlsl::Buffer" = type { target("dx.TypedBuffer", float, 0, 0, 0) } +// DXIL: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) } + +// DXIL: @Buf = internal global %"class.hlsl::Buffer" poison +// DXIL: @RWBuf = internal global %"class.hlsl::RWBuffer" poison + +export float TestLoad() { + return Buf.Load(1) + RWBuf.Load(2).y; +} + +// CHECK: define noundef nofpclass(nan inf) float @TestLoad()() +// CHECK: call {{.*}} float @hlsl::Buffer<float>::Load(unsigned int)(ptr {{.*}} @Buf, i32 noundef 1) +// CHECK: call {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int)(ptr {{.*}} @RWBuf, i32 noundef 2) +// CHECK: add +// CHECK: ret float + +// CHECK: define {{.*}} float @hlsl::Buffer<float>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index) +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %{{.*}}, i32 0, i32 0 +// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", float, 0, 0, 0), ptr %__handle +// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr +// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0) %[[HANDLE]], i32 %[[INDEX]]) +// CHECK-NEXT: %[[VAL:.*]] = load float, ptr %[[PTR]] +// CHECK-NEXT: ret float %[[VAL]] + +// CHECK: define {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index) +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %{{.*}}, i32 0, i32 0 +// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), ptr %__handle +// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr +// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %[[HANDLE]], i32 %[[INDEX]]) +// CHECK-NEXT: %[[VEC:.*]] = load <4 x i32>, ptr %[[PTR]] +// CHECK-NEXT: ret <4 x i32> %[[VEC]] + +// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0), i32) +// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), i32) diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl new file mode 100644 index 0000000..adc35f6 --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=DXIL,CHECK +// RUN: %clang_cc1 -triple spirv1.6-pc-vulkan1.3-compute -fspv-use-unknown-image-format -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=SPIRV,CHECK + +Buffer<int> In; +RWBuffer<int> Out; + +[numthreads(1,1,1)] +void main(unsigned GI : SV_GroupIndex) { + // CHECK: define void @main() + + // DXIL: %[[INPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_0_0_1t(target("dx.TypedBuffer", i32, 0, 0, 1) %{{.*}}, i32 %{{.*}}) + // SPIRV: %[[INPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_1_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) %{{.*}}, i32 %{{.*}}) + // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]] + // DXIL: %[[OUTPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) + // SPIRV: %[[OUTPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) + // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]] + Out[GI] = In[GI]; + + // DXIL: %[[INPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) + // SPIRV: %[[INPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) + // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]] + // DXIL: %[[OUTPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}}) + // SPIRV: %[[OUTPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}}) + // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]] + Out[GI + 1] = Out[GI]; +} diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 9f8e833..5931b60 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -83,6 +83,8 @@ mark_as_advanced(COMPILER_RT_BUILD_ORC) option(COMPILER_RT_BUILD_GWP_ASAN "Build GWP-ASan, and link it into SCUDO" ON) mark_as_advanced(COMPILER_RT_BUILD_GWP_ASAN) option(COMPILER_RT_ENABLE_CET "Build Compiler RT with CET enabled" OFF) +option(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME "Build asan unit tests without depending upon a just-built asan runtime" OFF) +mark_as_advanced(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME) option(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH "Set custom sysroot for building SCUDO standalone" OFF) mark_as_advanced(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH) diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt index 9cd9c97..6d88c96 100644 --- a/compiler-rt/lib/asan/tests/CMakeLists.txt +++ b/compiler-rt/lib/asan/tests/CMakeLists.txt @@ -170,11 +170,21 @@ function(add_asan_tests arch test_runtime) set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config) set(CONFIG_NAME_DYNAMIC ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig) + # On some platforms, unit tests can be run against the runtime that shipped + # with the host compiler with COMPILER_RT_TEST_STANDALONE_BUILD_LIBS=OFF. + # COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME=ON removes the dependency + # on `asan`, allowing the tests to be run independently without + # a newly built asan runtime. + set(ASAN_UNIT_TEST_DEPS asan) + if(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME) + set(ASAN_UNIT_TEST_DEPS) + endif() + # Closure to keep the values. function(generate_asan_tests test_objects test_suite testname) generate_compiler_rt_tests(${test_objects} ${test_suite} ${testname} ${arch} COMPILE_DEPS ${ASAN_UNITTEST_HEADERS} ${ASAN_IGNORELIST_FILE} - DEPS asan + DEPS ${ASAN_UNIT_TEST_DEPS} KIND ${TEST_KIND} ${ARGN} ) @@ -215,7 +225,7 @@ function(add_asan_tests arch test_runtime) add_compiler_rt_test(AsanDynamicUnitTests "${dynamic_test_name}" "${arch}" SUBDIR "${CONFIG_NAME_DYNAMIC}" OBJECTS ${ASAN_INST_TEST_OBJECTS} - DEPS asan ${ASAN_INST_TEST_OBJECTS} + DEPS ${ASAN_UNIT_TEST_DEPS} ${ASAN_INST_TEST_OBJECTS} LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS} ${TARGET_LINK_FLAGS} ${DYNAMIC_LINK_FLAGS} ) endif() diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h index 7fb58be..edb2699 100644 --- a/compiler-rt/lib/msan/msan.h +++ b/compiler-rt/lib/msan/msan.h @@ -303,6 +303,7 @@ u32 ChainOrigin(u32 id, StackTrace *stack); const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1; const int STACK_TRACE_TAG_FIELDS = STACK_TRACE_TAG_POISON + 1; const int STACK_TRACE_TAG_VPTR = STACK_TRACE_TAG_FIELDS + 1; +const int STACK_TRACE_TAG_ALLOC_PADDING = STACK_TRACE_TAG_VPTR + 1; #define GET_MALLOC_STACK_TRACE \ UNINITIALIZED BufferedStackTrace stack; \ diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp index 64df863..80608aa 100644 --- a/compiler-rt/lib/msan/msan_allocator.cpp +++ b/compiler-rt/lib/msan/msan_allocator.cpp @@ -217,25 +217,52 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment, } auto *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated)); meta->requested_size = size; + uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated); + void* padding_start = reinterpret_cast<char*>(allocated) + size; + uptr padding_size = actually_allocated_size - size; + + // - With calloc(7,1), we can set the ideal tagging: + // bytes 0-6: initialized, origin not set (and irrelevant) + // byte 7: uninitialized, origin TAG_ALLOC_PADDING + // bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING + // - If we have malloc(7) and __msan_get_track_origins() > 1, the 4-byte + // origin granularity only allows the slightly suboptimal tagging: + // bytes 0-6: uninitialized, origin TAG_ALLOC + // byte 7: uninitialized, origin TAG_ALLOC (suboptimal) + // bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING + // - If we have malloc(7) and __msan_get_track_origins() == 1, we use a + // single origin bean to reduce overhead: + // bytes 0-6: uninitialized, origin TAG_ALLOC + // byte 7: uninitialized, origin TAG_ALLOC (suboptimal) + // bytes 8-15: uninitialized, origin TAG_ALLOC (suboptimal) + if (__msan_get_track_origins() && flags()->poison_in_malloc && + (zero || (__msan_get_track_origins() > 1))) { + stack->tag = STACK_TRACE_TAG_ALLOC_PADDING; + Origin o2 = Origin::CreateHeapOrigin(stack); + __msan_set_origin(padding_start, padding_size, o2.raw_id()); + } + if (zero) { if (allocator.FromPrimary(allocated)) __msan_clear_and_unpoison(allocated, size); else __msan_unpoison(allocated, size); // Mem is already zeroed. + + if (flags()->poison_in_malloc) + __msan_poison(padding_start, padding_size); } else if (flags()->poison_in_malloc) { - __msan_poison(allocated, size); + __msan_poison(allocated, actually_allocated_size); + if (__msan_get_track_origins()) { stack->tag = StackTrace::TAG_ALLOC; Origin o = Origin::CreateHeapOrigin(stack); - __msan_set_origin(allocated, size, o.raw_id()); + __msan_set_origin( + allocated, + __msan_get_track_origins() == 1 ? actually_allocated_size : size, + o.raw_id()); } } - uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated); - // For compatibility, the allocator converted 0-sized allocations into 1 byte - if (size == 0 && actually_allocated_size > 0 && flags()->poison_in_malloc) - __msan_poison(allocated, 1); - UnpoisonParam(2); RunMallocHooks(allocated, size); return allocated; @@ -255,9 +282,10 @@ void __msan::MsanDeallocate(BufferedStackTrace *stack, void *p) { if (flags()->poison_in_free && allocator.FromPrimary(p)) { __msan_poison(p, size); if (__msan_get_track_origins()) { + uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(p); stack->tag = StackTrace::TAG_DEALLOC; Origin o = Origin::CreateHeapOrigin(stack); - __msan_set_origin(p, size, o.raw_id()); + __msan_set_origin(p, actually_allocated_size, o.raw_id()); } } if (MsanThread *t = GetCurrentThread()) { diff --git a/compiler-rt/lib/msan/msan_report.cpp b/compiler-rt/lib/msan/msan_report.cpp index 99bf81f..cd0bf67 100644 --- a/compiler-rt/lib/msan/msan_report.cpp +++ b/compiler-rt/lib/msan/msan_report.cpp @@ -90,6 +90,10 @@ static void DescribeOrigin(u32 id) { Printf(" %sVirtual table ptr was destroyed%s\n", d.Origin(), d.Default()); break; + case STACK_TRACE_TAG_ALLOC_PADDING: + Printf(" %sUninitialized value is outside of heap allocation%s\n", + d.Origin(), d.Default()); + break; default: Printf(" %sUninitialized value was created%s\n", d.Origin(), d.Default()); diff --git a/compiler-rt/test/msan/allocator_padding.cpp b/compiler-rt/test/msan/allocator_padding.cpp new file mode 100644 index 0000000..72acf31 --- /dev/null +++ b/compiler-rt/test/msan/allocator_padding.cpp @@ -0,0 +1,94 @@ +// *** malloc: all bytes are uninitialized +// * malloc byte 0 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 0 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 0 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// +// * malloc byte 6 +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 6 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 6 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// +// This test assumes the allocator allocates 16 bytes for malloc(7). Bytes +// 7-15 are padding. +// +// * malloc byte 7 +// Edge case: when the origin granularity spans both ALLOC and ALLOC_PADDING, +// ALLOC always takes precedence. +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 7 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 7 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// +// Bytes 8-15 are padding +// For track-origins=1, ALLOC is used instead of ALLOC_PADDING. +// +// * malloc byte 8 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 8 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 8 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// +// * malloc byte 15 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 15 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC +// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 15 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING + +// *** calloc +// Bytes 0-6 are fully initialized, so no MSan report should happen. +// +// * calloc byte 0 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1 +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1 +// +// * calloc byte 6 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1 +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1 +// +// * calloc byte 7 +// Byte 7 is uninitialized. Unlike malloc, this is tagged as ALLOC_PADDING +// (since the origin does not need to track bytes 4-6). +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// +// * calloc byte 8 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// +// * calloc byte 15 +// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char **argv) { +#ifdef USE_CALLOC + char *p = (char *)calloc(7, 1); +#else + char *p = (char *)malloc(7); +#endif + + if (argc == 2) { + int index = atoi(argv[1]); + + printf("p[%d] = %d\n", index, p[index]); + // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value + // CHECK: {{#0 0x.* in main .*allocator_padding.cpp:}}[[@LINE-2]] + // ORIGIN-ALLOC: Uninitialized value was created by a heap allocation + // ORIGIN-ALLOC-PADDING: Uninitialized value is outside of heap allocation + free(p); + } + + return 0; +} diff --git a/compiler-rt/test/msan/zero_alloc.cpp b/compiler-rt/test/msan/zero_alloc.cpp index 1451e1e..f4cf1d8 100644 --- a/compiler-rt/test/msan/zero_alloc.cpp +++ b/compiler-rt/test/msan/zero_alloc.cpp @@ -1,4 +1,9 @@ -// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK +// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,DISCOUNT +// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGINS #include <stdio.h> #include <stdlib.h> @@ -10,6 +15,7 @@ int main(int argc, char **argv) { printf("Content of p1 is: %d\n", *p1); // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]] + // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation free(p1); } @@ -19,6 +25,7 @@ int main(int argc, char **argv) { printf("Content of p2 is: %d\n", *p2); // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]] + // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation free(p2); } @@ -28,6 +35,8 @@ int main(int argc, char **argv) { printf("Content of p2 is: %d\n", *p3); // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]] + // DISCOUNT: Uninitialized value was created by a heap allocation + // ORIGINS: Uninitialized value is outside of heap allocation free(p3); } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 0afb295..70bb43a2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> { llvm::LogicalResult matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { + + if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) { + auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); + replaceWithAddrOfOrASCast( + rewriter, addr->getLoc(), + global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter), + getProgramAddressSpace(rewriter), + global ? global.getSymName() + : addr.getSymbol().getRootReference().getValue(), + convertType(addr.getType()), addr); + return mlir::success(); + } + auto global = addr->getParentOfType<mlir::ModuleOp>() .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); replaceWithAddrOfOrASCast( @@ -3231,7 +3244,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> { if (global.getDataAttr() && *global.getDataAttr() == cuf::DataAttribute::Constant) - TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation"); + g.setAddrSpace( + static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant)); rewriter.eraseOp(global); return mlir::success(); diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index 41383fb..9bf10b5 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -353,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) { // calculation op. mlir::Value baseRef = llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op) + .Case<fir::DeclareOp>([&](auto op) { + // If this declare binds a view with an underlying storage operand, + // treat that storage as the base reference. Otherwise, fall back + // to the declared memref. + if (auto storage = op.getStorage()) + return storage; + return mlir::Value(varPtr); + }) .Case<hlfir::DesignateOp>([&](auto op) { // Get the base object. return op.getMemref(); diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir index bbd3f9f..60cda9e 100644 --- a/flang/test/Fir/CUDA/cuda-code-gen.mlir +++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir @@ -284,3 +284,31 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e // CHECK-LABEL: llvm.func @_QQxxx() // CHECK: llvm.alloca %{{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK-NOT: llvm.call @_FortranACUFAllocDescriptor + +// ----- + +module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + gpu.module @cuda_device_mod { + fir.global @_QMkernelsEinitial_val {data_attr = #cuf.cuda<constant>} : i32 { + %0 = fir.zero_bits i32 + fir.has_value %0 : i32 + } + gpu.func @_QMkernelsPassign(%arg0: !fir.ref<!fir.array<?xi32>>) kernel { + %c-1 = arith.constant -1 : index + %c1_i32 = arith.constant 1 : i32 + %0 = arith.constant 1 : i32 + %1 = arith.addi %0, %c1_i32 : i32 + %2 = fir.address_of(@_QMkernelsEinitial_val) : !fir.ref<i32> + %4 = fir.load %2 : !fir.ref<i32> + %5 = fir.convert %1 : (i32) -> i64 + %6 = fircg.ext_array_coor %arg0(%c-1)<%5> : (!fir.ref<!fir.array<?xi32>>, index, i64) -> !fir.ref<i32> + fir.store %4 to %6 : !fir.ref<i32> + gpu.return + } + } +} + +// CHECK: llvm.mlir.global external @_QMkernelsEinitial_val() {addr_space = 4 : i32} : i32 +// CHECK-LABEL: gpu.func @_QMkernelsPassign +// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMkernelsEinitial_val : !llvm.ptr<4> +// CHECK: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<4> to !llvm.ptr diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir new file mode 100644 index 0000000..fabfe4c --- /dev/null +++ b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir @@ -0,0 +1,24 @@ +// Use --mlir-disable-threading so that the diagnostic printing is serialized. +// RUN: fir-opt %s -pass-pipeline='builtin.module(test-fir-openacc-interfaces)' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s + +module { + // Build a scalar view via fir.declare with a storage operand into an array of i8 + func.func @_QPdeclare_with_storage_is_nonscalar() { + %c0 = arith.constant 0 : index + %arr = fir.alloca !fir.array<4xi8> + %elem_i8 = fir.coordinate_of %arr, %c0 : (!fir.ref<!fir.array<4xi8>>, index) -> !fir.ref<i8> + %elem_f32 = fir.convert %elem_i8 : (!fir.ref<i8>) -> !fir.ref<f32> + %view = fir.declare %elem_f32 storage(%arr[0]) {uniq_name = "_QFpi"} + : (!fir.ref<f32>, !fir.ref<!fir.array<4xi8>>) -> !fir.ref<f32> + // Force interface query through an acc op that prints type category + %cp = acc.copyin varPtr(%view : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + acc.enter_data dataOperands(%cp : !fir.ref<f32>) + return + } + + // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + // CHECK: Pointer-like and Mappable: !fir.ref<f32> + // CHECK: Type category: array +} + + diff --git a/lldb/cmake/modules/LLDBFramework.cmake b/lldb/cmake/modules/LLDBFramework.cmake index c6f00ed..23d9d49 100644 --- a/lldb/cmake/modules/LLDBFramework.cmake +++ b/lldb/cmake/modules/LLDBFramework.cmake @@ -68,8 +68,6 @@ if(NOT APPLE_EMBEDDED) ) endif() -find_program(unifdef_EXECUTABLE unifdef) - # Wrap output in a target, so lldb-framework can depend on it. add_custom_target(liblldb-resource-headers DEPENDS lldb-sbapi-dwarf-enums ${lldb_staged_headers}) set_target_properties(liblldb-resource-headers PROPERTIES FOLDER "LLDB/Resources") diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index fec9fde..1a7db8f 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -130,6 +130,8 @@ FLAGS_ENUM(LaunchFlags){ eLaunchFlagInheritTCCFromParent = (1u << 12), ///< Don't make the inferior responsible for its own TCC ///< permissions but instead inherit them from its parent. + eLaunchFlagMemoryTagging = + (1u << 13), ///< Launch process with memory tagging explicitly enabled. }; /// Thread Run Modes. diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index 53e991a..1a2860a 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -1,3 +1,4 @@ +from abc import ABC, abstractmethod import ctypes import errno import io @@ -5,6 +6,7 @@ import threading import socket import traceback from lldbsuite.support import seven +from typing import Optional, List, Tuple def checksum(message): @@ -86,7 +88,7 @@ class MockGDBServerResponder: handles any packet not recognized in the common packet handling code. """ - registerCount = 40 + registerCount: int = 40 class RESPONSE_DISCONNECT: pass @@ -95,7 +97,7 @@ class MockGDBServerResponder: pass def __init__(self): - self.packetLog = [] + self.packetLog: List[str] = [] def respond(self, packet): """ @@ -241,7 +243,7 @@ class MockGDBServerResponder: def qHostInfo(self): return "ptrsize:8;endian:little;" - def qEcho(self): + def qEcho(self, num: int): return "E04" def qQueryGDBServer(self): @@ -262,10 +264,10 @@ class MockGDBServerResponder: def D(self, packet): return "OK" - def readRegisters(self): + def readRegisters(self) -> str: return "00000000" * self.registerCount - def readRegister(self, register): + def readRegister(self, register: int) -> str: return "00000000" def writeRegisters(self, registers_hex): @@ -305,7 +307,9 @@ class MockGDBServerResponder: # SIGINT is 2, return type is 2 digit hex string return "S02" - def qXferRead(self, obj, annex, offset, length): + def qXferRead( + self, obj: str, annex: str, offset: int, length: int + ) -> Tuple[Optional[str], bool]: return None, False def _qXferResponse(self, data, has_more): @@ -373,15 +377,17 @@ class MockGDBServerResponder: pass -class ServerChannel: +class ServerChannel(ABC): """ A wrapper class for TCP or pty-based server. """ - def get_connect_address(self): + @abstractmethod + def get_connect_address(self) -> str: """Get address for the client to connect to.""" - def get_connect_url(self): + @abstractmethod + def get_connect_url(self) -> str: """Get URL suitable for process connect command.""" def close_server(self): @@ -393,10 +399,12 @@ class ServerChannel: def close_connection(self): """Close all resources used by the accepted connection.""" - def recv(self): + @abstractmethod + def recv(self) -> bytes: """Receive a data packet from the connected client.""" - def sendall(self, data): + @abstractmethod + def sendall(self, data: bytes) -> None: """Send the data to the connected client.""" @@ -427,11 +435,11 @@ class ServerSocket(ServerChannel): self._connection.close() self._connection = None - def recv(self): + def recv(self) -> bytes: assert self._connection is not None return self._connection.recv(4096) - def sendall(self, data): + def sendall(self, data: bytes) -> None: assert self._connection is not None return self._connection.sendall(data) @@ -443,10 +451,10 @@ class TCPServerSocket(ServerSocket): )[0] super().__init__(family, type, proto, addr) - def get_connect_address(self): + def get_connect_address(self) -> str: return "[{}]:{}".format(*self._server_socket.getsockname()) - def get_connect_url(self): + def get_connect_url(self) -> str: return "connect://" + self.get_connect_address() @@ -454,10 +462,10 @@ class UnixServerSocket(ServerSocket): def __init__(self, addr): super().__init__(socket.AF_UNIX, socket.SOCK_STREAM, 0, addr) - def get_connect_address(self): + def get_connect_address(self) -> str: return self._server_socket.getsockname() - def get_connect_url(self): + def get_connect_url(self) -> str: return "unix-connect://" + self.get_connect_address() @@ -471,7 +479,7 @@ class PtyServerSocket(ServerChannel): self._primary = io.FileIO(primary, "r+b") self._secondary = io.FileIO(secondary, "r+b") - def get_connect_address(self): + def get_connect_address(self) -> str: libc = ctypes.CDLL(None) libc.ptsname.argtypes = (ctypes.c_int,) libc.ptsname.restype = ctypes.c_char_p @@ -484,7 +492,7 @@ class PtyServerSocket(ServerChannel): self._secondary.close() self._primary.close() - def recv(self): + def recv(self) -> bytes: try: return self._primary.read(4096) except OSError as e: @@ -493,8 +501,8 @@ class PtyServerSocket(ServerChannel): return b"" raise - def sendall(self, data): - return self._primary.write(data) + def sendall(self, data: bytes) -> None: + self._primary.write(data) class MockGDBServer: @@ -527,18 +535,21 @@ class MockGDBServer: self._thread.join() self._thread = None - def get_connect_address(self): + def get_connect_address(self) -> str: + assert self._socket is not None return self._socket.get_connect_address() - def get_connect_url(self): + def get_connect_url(self) -> str: + assert self._socket is not None return self._socket.get_connect_url() def run(self): + assert self._socket is not None # For testing purposes, we only need to worry about one client # connecting just one time. try: self._socket.accept() - except: + except Exception: traceback.print_exc() return self._shouldSendAck = True @@ -553,7 +564,7 @@ class MockGDBServer: self._receive(data) except self.TerminateConnectionException: pass - except Exception as e: + except Exception: print( "An exception happened when receiving the response from the gdb server. Closing the client..." ) @@ -586,7 +597,9 @@ class MockGDBServer: Once a complete packet is found at the front of self._receivedData, its data is removed form self._receivedData. """ + assert self._receivedData is not None data = self._receivedData + assert self._receivedDataOffset is not None i = self._receivedDataOffset data_len = len(data) if data_len == 0: @@ -639,10 +652,13 @@ class MockGDBServer: self._receivedDataOffset = 0 return packet - def _sendPacket(self, packet): - self._socket.sendall(seven.bitcast_to_bytes(frame_packet(packet))) + def _sendPacket(self, packet: str): + assert self._socket is not None + framed_packet = seven.bitcast_to_bytes(frame_packet(packet)) + self._socket.sendall(framed_packet) def _handlePacket(self, packet): + assert self._socket is not None if packet is self.PACKET_ACK: # Ignore ACKs from the client. For the future, we can consider # adding validation code to make sure the client only sends ACKs diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index 21d94d6..8ae20bd 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -127,6 +127,10 @@ Status CommandOptionsProcessLaunch::SetOptionValue( break; } + case 'M': + launch_info.GetFlags().Set(eLaunchFlagMemoryTagging); + break; + case 'c': if (!option_arg.empty()) launch_info.SetShell(FileSpec(option_arg)); diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 595b3d0..a9f054e 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -1173,6 +1173,11 @@ let Command = "process launch" in { Arg<"Boolean">, Desc<"Set whether to shell expand arguments to the process when " "launching.">; + def process_launch_memory_tagging + : Option<"memory-tagging", "M">, + Desc<"Set whether to explicitly enable memory tagging when launching " + "the process. Requires hardware support. " + "(Only supported on Darwin.)">; } let Command = "process attach" in { diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm index 3c1d117..7120892 100644 --- a/lldb/source/Host/macosx/objcxx/Host.mm +++ b/lldb/source/Host/macosx/objcxx/Host.mm @@ -1210,6 +1210,39 @@ static Status LaunchProcessPosixSpawn(const char *exe_path, } } + if (launch_info.GetFlags().Test(eLaunchFlagMemoryTagging)) { + // The following function configures the spawn attributes to launch the + // process with memory tagging explicitly enabled. We look it up + // dynamically since it is only available on newer OS. Does nothing on + // hardware which does not support MTE. + // + // int posix_spawnattr_set_use_sec_transition_shims_np( + // posix_spawnattr_t *attr, uint32_t flags); + // + using posix_spawnattr_set_use_sec_transition_shims_np_t = + int (*)(posix_spawnattr_t *attr, uint32_t flags); + auto posix_spawnattr_set_use_sec_transition_shims_np_fn = + (posix_spawnattr_set_use_sec_transition_shims_np_t)dlsym( + RTLD_DEFAULT, "posix_spawnattr_set_use_sec_transition_shims_np"); + if (posix_spawnattr_set_use_sec_transition_shims_np_fn) { + error = + Status(posix_spawnattr_set_use_sec_transition_shims_np_fn(&attr, 0), + eErrorTypePOSIX); + if (error.Fail()) { + LLDB_LOG(log, + "error: {0}, " + "posix_spawnattr_set_use_sec_transition_shims_np(&attr, 0)", + error); + return error; + } + } else { + LLDB_LOG(log, + "error: posix_spawnattr_set_use_sec_transition_shims_np not " + "available", + error); + } + } + // Don't set the binpref if a shell was provided. After all, that's only // going to affect what version of the shell is launched, not what fork of // the binary is launched. We insert "arch --arch <ARCH> as part of the diff --git a/lldb/source/Host/windows/MainLoopWindows.cpp b/lldb/source/Host/windows/MainLoopWindows.cpp index c0b1079..9b7df10 100644 --- a/lldb/source/Host/windows/MainLoopWindows.cpp +++ b/lldb/source/Host/windows/MainLoopWindows.cpp @@ -55,11 +55,7 @@ public: if (m_monitor_thread.joinable()) { m_stopped = true; SetEvent(m_ready); - // Keep trying to cancel ReadFile() until the thread exits. - do { - CancelIoEx(m_handle, /*lpOverlapped=*/NULL); - } while (WaitForSingleObject(m_monitor_thread.native_handle(), 1) == - WAIT_TIMEOUT); + CancelIoEx(m_handle, /*lpOverlapped=*/NULL); m_monitor_thread.join(); } CloseHandle(m_event); diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp index cc0c9e7..6d8f41a 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp @@ -14,6 +14,7 @@ #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/lldb-enumerations.h" +#include "llvm/ADT/Sequence.h" using namespace lldb; using namespace lldb_private; @@ -266,22 +267,47 @@ bool ClassDescriptorV2::method_list_t::Read(Process *process, return true; } -bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr, - lldb::addr_t relative_selector_base_addr, - bool is_small, bool has_direct_sel) { - size_t ptr_size = process->GetAddressByteSize(); - size_t size = GetSize(process, is_small); +llvm::SmallVector<ClassDescriptorV2::method_t, 0> +ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses, + lldb::addr_t relative_selector_base_addr, + bool is_small, bool has_direct_sel) const { + lldb_private::Process *process = m_runtime.GetProcess(); + if (!process) + return {}; - DataBufferHeap buffer(size, '\0'); - Status error; + const size_t size = method_t::GetSize(process, is_small); + const size_t num_methods = addresses.size(); - process->ReadMemory(addr, buffer.GetBytes(), size, error); - if (error.Fail()) { - return false; + llvm::SmallVector<uint8_t, 0> buffer(num_methods * size, 0); + llvm::DenseSet<uint32_t> failed_indices; + + for (auto [idx, addr] : llvm::enumerate(addresses)) { + Status error; + process->ReadMemory(addr, buffer.data() + idx * size, size, error); + if (error.Fail()) + failed_indices.insert(idx); } - DataExtractor extractor(buffer.GetBytes(), size, process->GetByteOrder(), - ptr_size); + llvm::SmallVector<method_t, 0> methods; + methods.reserve(num_methods); + for (auto [idx, addr] : llvm::enumerate(addresses)) { + if (failed_indices.contains(idx)) + continue; + DataExtractor extractor(buffer.data() + idx * size, size, + process->GetByteOrder(), + process->GetAddressByteSize()); + methods.push_back(method_t()); + methods.back().Read(extractor, process, addr, relative_selector_base_addr, + is_small, has_direct_sel); + } + + return methods; +} + +bool ClassDescriptorV2::method_t::Read(DataExtractor &extractor, + Process *process, lldb::addr_t addr, + lldb::addr_t relative_selector_base_addr, + bool is_small, bool has_direct_sel) { lldb::offset_t cursor = 0; if (is_small) { @@ -291,11 +317,11 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr, m_name_ptr = addr + nameref_offset; + Status error; if (!has_direct_sel) { // The SEL offset points to a SELRef. We need to dereference twice. - m_name_ptr = process->ReadUnsignedIntegerFromMemory(m_name_ptr, ptr_size, - 0, error); - if (!error.Success()) + m_name_ptr = process->ReadPointerFromMemory(m_name_ptr, error); + if (error.Fail()) return false; } else if (relative_selector_base_addr != LLDB_INVALID_ADDRESS) { m_name_ptr = relative_selector_base_addr + nameref_offset; @@ -308,13 +334,13 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr, m_imp_ptr = extractor.GetAddress_unchecked(&cursor); } + Status error; process->ReadCStringFromMemory(m_name_ptr, m_name, error); - if (error.Fail()) { + if (error.Fail()) return false; - } process->ReadCStringFromMemory(m_types_ptr, m_types, error); - return !error.Fail(); + return error.Success(); } bool ClassDescriptorV2::ivar_list_t::Read(Process *process, lldb::addr_t addr) { @@ -447,17 +473,19 @@ ClassDescriptorV2::GetMethodList(Process *process, bool ClassDescriptorV2::ProcessMethodList( std::function<bool(const char *, const char *)> const &instance_method_func, ClassDescriptorV2::method_list_t &method_list) const { - lldb_private::Process *process = m_runtime.GetProcess(); - auto method = std::make_unique<method_t>(); - lldb::addr_t relative_selector_base_addr = - m_runtime.GetRelativeSelectorBaseAddr(); - for (uint32_t i = 0, e = method_list.m_count; i < e; ++i) { - method->Read(process, method_list.m_first_ptr + (i * method_list.m_entsize), - relative_selector_base_addr, method_list.m_is_small, - method_list.m_has_direct_selector); - if (instance_method_func(method->m_name.c_str(), method->m_types.c_str())) + auto idx_to_method_addr = [&](uint32_t idx) { + return method_list.m_first_ptr + (idx * method_list.m_entsize); + }; + llvm::SmallVector<addr_t> addresses = llvm::to_vector(llvm::map_range( + llvm::seq<uint32_t>(method_list.m_count), idx_to_method_addr)); + + llvm::SmallVector<method_t, 0> methods = + ReadMethods(addresses, m_runtime.GetRelativeSelectorBaseAddr(), + method_list.m_is_small, method_list.m_has_direct_selector); + + for (const auto &method : methods) + if (instance_method_func(method.m_name.c_str(), method.m_types.c_str())) break; - } return true; } diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h index 920a5eb..78b3311 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h @@ -172,11 +172,16 @@ private: + field_size; // IMP imp; } - bool Read(Process *process, lldb::addr_t addr, + bool Read(DataExtractor &extractor, Process *process, lldb::addr_t addr, lldb::addr_t relative_selector_base_addr, bool is_small, bool has_direct_sel); }; + llvm::SmallVector<method_t, 0> + ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses, + lldb::addr_t relative_selector_base_addr, bool is_small, + bool has_direct_sel) const; + struct ivar_list_t { uint32_t m_entsize; uint32_t m_count; diff --git a/lldb/test/API/macosx/mte/Makefile b/lldb/test/API/macosx/mte/Makefile index cb20942..d614e0f 100644 --- a/lldb/test/API/macosx/mte/Makefile +++ b/lldb/test/API/macosx/mte/Makefile @@ -1,12 +1,15 @@ C_SOURCES := main.c -EXE := uaf_mte +EXE := uaf -all: uaf_mte sign +binary-plain: uaf +binary-entitled: uaf sign + +all: binary-entitled include Makefile.rules -sign: mte-entitlements.plist uaf_mte +sign: mte-entitlements.plist uaf ifeq ($(OS),Darwin) codesign -s - -f --entitlements $^ endif diff --git a/lldb/test/API/macosx/mte/TestDarwinMTE.py b/lldb/test/API/macosx/mte/TestDarwinMTE.py index 489e24a..a70b4b4 100644 --- a/lldb/test/API/macosx/mte/TestDarwinMTE.py +++ b/lldb/test/API/macosx/mte/TestDarwinMTE.py @@ -7,13 +7,25 @@ from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil import lldbsuite.test.cpu_feature as cpu_feature -exe_name = "uaf_mte" # Must match Makefile +exe_name = "uaf" # Must match Makefile class TestDarwinMTE(TestBase): NO_DEBUG_INFO_TESTCASE = True @skipUnlessFeature(cpu_feature.AArch64.MTE) + def test_process_launch_memory_tagging(self): + self.build(make_targets=["binary-plain"]) + self.createTestTarget(self.getBuildArtifact(exe_name)) + + self.expect("process launch", substrs=["exited with status = 0"]) + + self.expect( + "process launch --memory-tagging", + substrs=["stopped", "stop reason = EXC_ARM_MTE_TAG_FAULT"], + ) + + @skipUnlessFeature(cpu_feature.AArch64.MTE) def test_tag_fault(self): self.build() exe = self.getBuildArtifact(exe_name) diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h index f06e7ce..bb7f3be 100644 --- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -32,8 +32,11 @@ bool IsAnnotationOK(const GlobalVariable &GV); /// profile information and provides methods to operate on them. class StaticDataProfileInfo { public: - /// Accummulate the profile count of a constant that will be lowered to static - /// data sections. + /// A constant is tracked only if the following conditions are met. + /// 1) It has local (i.e., private or internal) linkage. + // 2) Its data kind is one of {.rodata, .data, .bss, .data.rel.ro}. + // 3) It's eligible for section prefix annotation. See `AnnotationKind` + // above for ineligible reasons. DenseMap<const Constant *, uint64_t> ConstantProfileCounts; /// Keeps track of the constants that are seen at least once without profile @@ -44,6 +47,22 @@ public: LLVM_ABI std::optional<uint64_t> getConstantProfileCount(const Constant *C) const; + /// Use signed enums for enum value comparison, and make 'LukewarmOrUnknown' + /// as 0 so any accidentally uninitialized value will default to unknown. + enum class StaticDataHotness : int8_t { + Cold = -1, + LukewarmOrUnknown = 0, + Hot = 1, + }; + + /// Return the hotness of the constant \p C based on its profile count \p + /// Count. + LLVM_ABI StaticDataHotness getConstantHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const; + + /// Return the string representation of the hotness enum \p Hotness. + LLVM_ABI StringRef hotnessToStr(StaticDataHotness Hotness) const; + public: StaticDataProfileInfo() = default; diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h index 77ce052..2c59a52 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h +++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -43,8 +43,14 @@ public: /// /// \param LDCS Flag to indicate whether we should load the call site /// information from DWARF `DW_TAG_call_site` entries - DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false) - : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {} + /// + /// \param MachO Flag to indicate if the object file is mach-o (Apple's + /// executable format). Apple has some compile unit attributes that look like + /// split DWARF, but they aren't and they can cause warnins to be emitted + /// about missing DWO files. + DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false, + bool MachO = false) + : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS), IsMachO(MachO) {} /// Extract the DWARF from the supplied object file and convert it into the /// Gsym format in the GsymCreator object that is passed in. Returns an @@ -97,6 +103,7 @@ private: DWARFContext &DICtx; GsymCreator &Gsym; bool LoadDwarfCallSites; + bool IsMachO; friend class DwarfTransformerTest; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h index 596cc18..b0197f0 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h @@ -13,7 +13,6 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H #define LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H -#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include "llvm/Support/Compiler.h" @@ -54,9 +53,6 @@ inline size_t numDeallocActions(const AllocActions &AAs) { AAs, [](const AllocActionCallPair &P) { return !!P.Dealloc; }); } -using OnRunFinalizeActionsCompleteFn = - unique_function<void(Expected<std::vector<WrapperFunctionCall>>)>; - /// Run finalize actions. /// /// If any finalize action fails then the corresponding dealloc actions will be @@ -67,16 +63,13 @@ using OnRunFinalizeActionsCompleteFn = /// be returned. The dealloc actions should be run by calling /// runDeallocationActions. If this function succeeds then the AA argument will /// be cleared before the function returns. -LLVM_ABI void runFinalizeActions(AllocActions &AAs, - OnRunFinalizeActionsCompleteFn OnComplete); - -using OnRunDeallocActionsComeleteFn = unique_function<void(Error)>; +LLVM_ABI Expected<std::vector<WrapperFunctionCall>> +runFinalizeActions(AllocActions &AAs); /// Run deallocation actions. /// Dealloc actions will be run in reverse order (from last element of DAs to /// first). -LLVM_ABI void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs, - OnRunDeallocActionsComeleteFn OnComplete); +LLVM_ABI Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs); using SPSAllocActionCallPair = SPSTuple<SPSWrapperFunctionCall, SPSWrapperFunctionCall>; diff --git a/llvm/include/llvm/TableGen/CodeGenHelpers.h b/llvm/include/llvm/TableGen/CodeGenHelpers.h index 5b823db..ce91f62 100644 --- a/llvm/include/llvm/TableGen/CodeGenHelpers.h +++ b/llvm/include/llvm/TableGen/CodeGenHelpers.h @@ -34,6 +34,21 @@ private: raw_ostream &OS; }; +// Simple RAII helper for emitting header include guard (ifndef-define-endif). +class IncludeGuardEmitter { +public: + IncludeGuardEmitter(raw_ostream &OS, StringRef Name) + : Name(Name.str()), OS(OS) { + OS << "#ifndef " << Name << "\n" + << "#define " << Name << "\n\n"; + } + ~IncludeGuardEmitter() { OS << "\n#endif // " << Name << "\n"; } + +private: + std::string Name; + raw_ostream &OS; +}; + // Simple RAII helper for emitting namespace scope. Name can be a single // namespace (empty for anonymous namespace) or nested namespace. class NamespaceEmitter { diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 4bafd3f..a64b93d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6417,8 +6417,18 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S, case scSequentialUMinExpr: return GetGCDMultiple(cast<SCEVNAryExpr>(S)); case scUnknown: { - // ask ValueTracking for known bits + // Ask ValueTracking for known bits. SCEVUnknown only become available at + // the point their underlying IR instruction has been defined. If CtxI was + // not provided, use: + // * the first instruction in the entry block if it is an argument + // * the instruction itself otherwise. const SCEVUnknown *U = cast<SCEVUnknown>(S); + if (!CtxI) { + if (isa<Argument>(U->getValue())) + CtxI = &*F.getEntryBlock().begin(); + else if (auto *I = dyn_cast<Instruction>(U->getValue())) + CtxI = I; + } unsigned Known = computeKnownBits(U->getValue(), getDataLayout(), &AC, CtxI, &DT) .countMinTrailingZeros(); diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index 1f751ee..e7f0b2c 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -60,6 +60,36 @@ void StaticDataProfileInfo::addConstantProfileCount( OriginalCount = getInstrMaxCountValue(); } +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getConstantHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const { + // The accummulated counter shows the constant is hot. Return enum 'hot' + // whether this variable is seen by unprofiled functions or not. + if (PSI->isHotCount(Count)) + return StaticDataHotness::Hot; + // The constant is not hot, and seen by unprofiled functions. We don't want to + // assign it to unlikely sections, even if the counter says 'cold'. So return + // enum 'LukewarmOrUnknown'. + if (ConstantWithoutCounts.count(C)) + return StaticDataHotness::LukewarmOrUnknown; + // The accummulated counter shows the constant is cold so return enum 'cold'. + if (PSI->isColdCount(Count)) + return StaticDataHotness::Cold; + + return StaticDataHotness::LukewarmOrUnknown; +} + +StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const { + switch (Hotness) { + case StaticDataHotness::Cold: + return "unlikely"; + case StaticDataHotness::Hot: + return "hot"; + default: + return ""; + } +} + std::optional<uint64_t> StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { auto I = ConstantProfileCounts.find(C); @@ -70,23 +100,10 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { StringRef StaticDataProfileInfo::getConstantSectionPrefix( const Constant *C, const ProfileSummaryInfo *PSI) const { - auto Count = getConstantProfileCount(C); + std::optional<uint64_t> Count = getConstantProfileCount(C); if (!Count) return ""; - // The accummulated counter shows the constant is hot. Return 'hot' whether - // this variable is seen by unprofiled functions or not. - if (PSI->isHotCount(*Count)) - return "hot"; - // The constant is not hot, and seen by unprofiled functions. We don't want to - // assign it to unlikely sections, even if the counter says 'cold'. So return - // an empty prefix before checking whether the counter is cold. - if (ConstantWithoutCounts.count(C)) - return ""; - // The accummulated counter shows the constant is cold. Return 'unlikely'. - if (PSI->isColdCount(*Count)) - return "unlikely"; - // The counter says lukewarm. Return an empty prefix. - return ""; + return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count)); } bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 0e38017..d2f2c3e 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -661,7 +661,10 @@ void LiveIntervals::extendToIndices(LiveRange &LR, void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl<SlotIndex> *EndPoints) { LiveQueryResult LRQ = LR.Query(Kill); - VNInfo *VNI = LRQ.valueOutOrDead(); + // LR may have liveness reachable from early clobber slot, which may be + // only live-in instead of live-out of the instruction. + // For example, LR =[1r, 3r), Kill = 3e, we have to prune [3e, 3r) of LR. + VNInfo *VNI = LRQ.valueOutOrDead() ? LRQ.valueOutOrDead() : LRQ.valueIn(); if (!VNI) return; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 787a81a..358e060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -658,13 +658,13 @@ namespace { bool InexpensiveOnly = false, std::optional<EVT> OutVT = std::nullopt); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); + SDValue buildRsqrtEstimate(SDValue Op); + SDValue buildSqrtEstimate(SDValue Op); + SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip); SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -18590,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0))) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -18635,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A); SDValue AAZ = DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0)); - if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ)) return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt); // Estimate creation failed. Clean up speculatively created nodes. @@ -18645,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // We found a FSQRT, so try to make this fold: // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) - if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { + if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) { SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y); AddToWorklist(Div.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, Div); @@ -18742,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { return SDValue(); // FSQRT nodes have flags that propagate to the created nodes. + SelectionDAG::FlagInserter FlagInserter(DAG, Flags); // TODO: If this is N0/sqrt(N0), and we reach this node before trying to // transform the fdiv, we may produce a sub-optimal estimate sequence // because the reciprocal calculation may not have to filter out a // 0.0 input. - return buildSqrtEstimate(N0, Flags); + return buildSqrtEstimate(N0); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -29743,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); } // If non-reciprocal square root is requested, multiply the result by Arg. if (!Reciprocal) - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); return Est; } @@ -29775,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -29789,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations for reciprocal square root: // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { - SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est); + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est); + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -29799,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, SDValue LHS; if (Reciprocal || (i + 1) < Iterations) { // RSQRT: LHS = (E * -0.5) - LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); } else { // SQRT: LHS = (A * E) * -0.5 - LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf); } - Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS); } return Est; @@ -29814,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, - bool Reciprocal) { +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) { if (LegalDAG) return SDValue(); @@ -29843,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (Iterations > 0) Est = UseOneConstNR - ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) - : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal); if (!Reciprocal) { SDLoc DL(Op); // Try the target specific test first. @@ -29862,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, true); +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, false); +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, false); } /// Return true if there is any possibility that the two addresses overlap. diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index 7a0256f..fa39603 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -338,9 +338,13 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI, if (FilePath.empty()) { // If we had a DW_AT_decl_file, but got no file then we need to emit a // warning. + const uint64_t DwarfFileIdx = dwarf::toUnsigned( + Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX); + // Check if there is no DW_AT_decl_line attribute, and don't report an + // error if it isn't there. + if (DwarfFileIdx == UINT32_MAX) + return; Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) { - const uint64_t DwarfFileIdx = dwarf::toUnsigned( - Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX); OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has an invalid file index " << DwarfFileIdx << " in its DW_AT_decl_file attribute, unable to create a single " @@ -629,6 +633,10 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { size_t NumBefore = Gsym.getNumFunctionInfos(); auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false); + // Apple uses DW_AT_GNU_dwo_id for things other than split DWARF. + if (IsMachO) + return ReturnDie; + if (DwarfUnit.getDWOId()) { DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit(); if (!DWOCU->isDWOUnit()) diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 5b3c05e..6c7e27e 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -260,22 +260,17 @@ public: } // Run finalization actions. - using WrapperFunctionCall = orc::shared::WrapperFunctionCall; - runFinalizeActions( - G->allocActions(), - [this, OnFinalized = std::move(OnFinalized)]( - Expected<std::vector<WrapperFunctionCall>> DeallocActions) mutable { - completeFinalization(std::move(OnFinalized), - std::move(DeallocActions)); - }); - } + auto DeallocActions = runFinalizeActions(G->allocActions()); + if (!DeallocActions) { + OnFinalized(DeallocActions.takeError()); + return; + } - void abandon(OnAbandonedFunction OnAbandoned) override { - Error Err = Error::success(); - if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); - if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); + // Release the finalize segments slab. + if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) { + OnFinalized(errorCodeToError(EC)); + return; + } #ifndef NDEBUG // Set 'G' to null to flag that we've been successfully finalized. @@ -284,22 +279,17 @@ public: G = nullptr; #endif - OnAbandoned(std::move(Err)); + // Continue with finalized allocation. + OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments), + std::move(*DeallocActions))); } -private: - void completeFinalization( - OnFinalizedFunction OnFinalized, - Expected<std::vector<orc::shared::WrapperFunctionCall>> DeallocActions) { - - if (!DeallocActions) - return OnFinalized(DeallocActions.takeError()); - - // Release the finalize segments slab. - if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) { - OnFinalized(errorCodeToError(EC)); - return; - } + void abandon(OnAbandonedFunction OnAbandoned) override { + Error Err = Error::success(); + if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) + Err = joinErrors(std::move(Err), errorCodeToError(EC)); + if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments)) + Err = joinErrors(std::move(Err), errorCodeToError(EC)); #ifndef NDEBUG // Set 'G' to null to flag that we've been successfully finalized. @@ -308,11 +298,10 @@ private: G = nullptr; #endif - // Continue with finalized allocation. - OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments), - std::move(*DeallocActions))); + OnAbandoned(std::move(Err)); } +private: Error applyProtections() { for (auto &KV : BL.segments()) { const auto &AG = KV.first; diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 7b327af..7e606c6a 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -91,19 +91,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, sys::Memory::InvalidateInstructionCache(Base.toPtr<void *>(), Size); } - std::vector<shared::WrapperFunctionCall> DeinitializeActions; - { - std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P; - auto F = P.get_future(); - shared::runFinalizeActions( - AI.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) { - P.set_value(std::move(R)); - }); - if (auto DeinitializeActionsOrErr = F.get()) - DeinitializeActions = std::move(*DeinitializeActionsOrErr); - else - return OnInitialized(DeinitializeActionsOrErr.takeError()); - } + auto DeinitializeActions = shared::runFinalizeActions(AI.Actions); + if (!DeinitializeActions) + return OnInitialized(DeinitializeActions.takeError()); { std::lock_guard<std::mutex> Lock(Mutex); @@ -111,7 +101,7 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, // This is the maximum range whose permission have been possibly modified auto &Alloc = Allocations[MinAddr]; Alloc.Size = MaxAddr - MinAddr; - Alloc.DeinitializationActions = std::move(DeinitializeActions); + Alloc.DeinitializationActions = std::move(*DeinitializeActions); Reservations[AI.MappingBase.toPtr<void *>()].Allocations.push_back(MinAddr); } @@ -128,10 +118,10 @@ void InProcessMemoryMapper::deinitialize( for (auto Base : llvm::reverse(Bases)) { - shared::runDeallocActions( - Allocations[Base].DeinitializationActions, [&](Error Err) { - AllErr = joinErrors(std::move(AllErr), std::move(Err)); - }); + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } // Reset protections to read/write so the area can be reused if (auto EC = sys::Memory::protectMappedMemory( diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp index 08ab0c6..91f2899 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp @@ -12,39 +12,31 @@ namespace llvm { namespace orc { namespace shared { -void runFinalizeActions(AllocActions &AAs, - OnRunFinalizeActionsCompleteFn OnComplete) { +Expected<std::vector<WrapperFunctionCall>> +runFinalizeActions(AllocActions &AAs) { std::vector<WrapperFunctionCall> DeallocActions; DeallocActions.reserve(numDeallocActions(AAs)); for (auto &AA : AAs) { if (AA.Finalize) - - if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) { - while (!DeallocActions.empty()) { - Err = joinErrors(std::move(Err), - DeallocActions.back().runWithSPSRetErrorMerged()); - DeallocActions.pop_back(); - } - return OnComplete(std::move(Err)); - } + if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) + return joinErrors(std::move(Err), runDeallocActions(DeallocActions)); if (AA.Dealloc) DeallocActions.push_back(std::move(AA.Dealloc)); } AAs.clear(); - OnComplete(std::move(DeallocActions)); + return DeallocActions; } -void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs, - OnRunDeallocActionsComeleteFn OnComplete) { +Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs) { Error Err = Error::success(); while (!DAs.empty()) { Err = joinErrors(std::move(Err), DAs.back().runWithSPSRetErrorMerged()); DAs = DAs.drop_back(); } - OnComplete(std::move(Err)); + return Err; } } // namespace shared diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp index 8c24b1f..4fbf232 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp @@ -9,10 +9,8 @@ #include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h" #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" -#include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/Process.h" #include "llvm/Support/WindowsError.h" -#include <future> #include <sstream> #if defined(LLVM_ON_UNIX) @@ -183,24 +181,15 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize( } // Run finalization actions and get deinitlization action list. - std::vector<shared::WrapperFunctionCall> DeinitializeActions; - { - std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P; - auto F = P.get_future(); - shared::runFinalizeActions( - FR.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) { - P.set_value(std::move(R)); - }); - if (auto DeinitializeActionsOrErr = F.get()) - DeinitializeActions = std::move(*DeinitializeActionsOrErr); - else - return DeinitializeActionsOrErr.takeError(); + auto DeinitializeActions = shared::runFinalizeActions(FR.Actions); + if (!DeinitializeActions) { + return DeinitializeActions.takeError(); } { std::lock_guard<std::mutex> Lock(Mutex); Allocations[MinAddr].DeinitializationActions = - std::move(DeinitializeActions); + std::move(*DeinitializeActions); Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr); } @@ -221,11 +210,10 @@ Error ExecutorSharedMemoryMapperService::deinitialize( std::lock_guard<std::mutex> Lock(Mutex); for (auto Base : llvm::reverse(Bases)) { - shared::runDeallocActions( - Allocations[Base].DeinitializationActions, [&](Error Err) { - if (Err) - AllErr = joinErrors(std::move(AllErr), std::move(Err)); - }); + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } // Remove the allocation from the allocation list of its reservation for (auto &Reservation : Reservations) { diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index ca81d30..8ace2d2 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/MD5.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <cstdint> #include <optional> using namespace llvm; @@ -193,7 +194,12 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) { dxbc::PSV::v2::ResourceBindInfo BindInfo; BindInfo.Type = Type; BindInfo.LowerBound = Binding.LowerBound; - BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1; + assert(Binding.Size == UINT32_MAX || + (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX && + "Resource range is too large"); + BindInfo.UpperBound = (Binding.Size == UINT32_MAX) + ? UINT32_MAX + : Binding.LowerBound + Binding.Size - 1; BindInfo.Space = Binding.Space; BindInfo.Kind = static_cast<dxbc::PSV::ResourceKind>(Kind); BindInfo.Flags = Flags; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 47c24fc..f973949 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -601,6 +601,29 @@ static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineOperand Src = MI.getOperand(3); MachineOperand Len = MI.getOperand(4); + // If the length is a constant, we don't actually need the check. + if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) { + if (Def->getOpcode() == WebAssembly::CONST_I32 || + Def->getOpcode() == WebAssembly::CONST_I64) { + if (Def->getOperand(1).getImm() == 0) { + // A zero-length memcpy is a no-op. + MI.eraseFromParent(); + return BB; + } + // A non-zero-length memcpy doesn't need a zero check. + unsigned MemoryCopy = + Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32; + BuildMI(*BB, MI, DL, TII.get(MemoryCopy)) + .add(DstMem) + .add(SrcMem) + .add(Dst) + .add(Src) + .add(Len); + MI.eraseFromParent(); + return BB; + } + } + // We're going to add an extra use to `Len` to test if it's zero; that // use shouldn't be a kill, even if the original use is. MachineOperand NoKillLen = Len; @@ -669,6 +692,28 @@ static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL, MachineOperand Val = MI.getOperand(2); MachineOperand Len = MI.getOperand(3); + // If the length is a constant, we don't actually need the check. + if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) { + if (Def->getOpcode() == WebAssembly::CONST_I32 || + Def->getOpcode() == WebAssembly::CONST_I64) { + if (Def->getOperand(1).getImm() == 0) { + // A zero-length memset is a no-op. + MI.eraseFromParent(); + return BB; + } + // A non-zero-length memset doesn't need a zero check. + unsigned MemoryFill = + Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32; + BuildMI(*BB, MI, DL, TII.get(MemoryFill)) + .add(Mem) + .add(Dst) + .add(Val) + .add(Len); + MI.eraseFromParent(); + return BB; + } + } + // We're going to add an extra use to `Len` to test if it's zero; that // use shouldn't be a kill, even if the original use is. MachineOperand NoKillLen = Len; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index d2f09e9..578fec7 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2667,7 +2667,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); - V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend"); + // No profiling support for vector selects. + V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old, + DEBUG_TYPE, Name + "blend"); LLVM_DEBUG(dbgs() << " blend: " << *V << "\n"); return V; diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll new file mode 100644 index 0000000..ebab9f0 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s --data-layout="e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64 %s +; RUN: opt < %s --data-layout="e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32 %s + +declare void @useptr(ptr) + +define void @ptrtoaddr(ptr %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) { +; X64-LABEL: 'ptrtoaddr' +; X64-NEXT: Classifying expressions for: @ptrtoaddr +; X64-NEXT: %p0 = ptrtoaddr ptr %in to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: %p1 = ptrtoaddr ptr %in to i32 +; X64-NEXT: --> %p1 U: full-set S: full-set +; X64-NEXT: %p2 = ptrtoaddr ptr %in to i16 +; X64-NEXT: --> %p2 U: full-set S: full-set +; X64-NEXT: %p3 = ptrtoaddr ptr %in to i128 +; X64-NEXT: --> %p3 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr +; +; X32-LABEL: 'ptrtoaddr' +; X32-NEXT: Classifying expressions for: @ptrtoaddr +; X32-NEXT: %p0 = ptrtoaddr ptr %in to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: %p1 = ptrtoaddr ptr %in to i32 +; X32-NEXT: --> %p1 U: full-set S: full-set +; X32-NEXT: %p2 = ptrtoaddr ptr %in to i16 +; X32-NEXT: --> %p2 U: full-set S: full-set +; X32-NEXT: %p3 = ptrtoaddr ptr %in to i128 +; X32-NEXT: --> %p3 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr +; + %p0 = ptrtoaddr ptr %in to i64 + %p1 = ptrtoaddr ptr %in to i32 + %p2 = ptrtoaddr ptr %in to i16 + %p3 = ptrtoaddr ptr %in to i128 + store i64 %p0, ptr %out0 + store i32 %p1, ptr %out1 + store i16 %p2, ptr %out2 + store i128 %p3, ptr %out3 + ret void +} + +define void @ptrtoaddr_as1(ptr addrspace(1) %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) { +; X64-LABEL: 'ptrtoaddr_as1' +; X64-NEXT: Classifying expressions for: @ptrtoaddr_as1 +; X64-NEXT: %p0 = ptrtoaddr ptr addrspace(1) %in to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: %p1 = ptrtoaddr ptr addrspace(1) %in to i32 +; X64-NEXT: --> %p1 U: full-set S: full-set +; X64-NEXT: %p2 = ptrtoaddr ptr addrspace(1) %in to i16 +; X64-NEXT: --> %p2 U: full-set S: full-set +; X64-NEXT: %p3 = ptrtoaddr ptr addrspace(1) %in to i128 +; X64-NEXT: --> %p3 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_as1 +; +; X32-LABEL: 'ptrtoaddr_as1' +; X32-NEXT: Classifying expressions for: @ptrtoaddr_as1 +; X32-NEXT: %p0 = ptrtoaddr ptr addrspace(1) %in to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: %p1 = ptrtoaddr ptr addrspace(1) %in to i32 +; X32-NEXT: --> %p1 U: full-set S: full-set +; X32-NEXT: %p2 = ptrtoaddr ptr addrspace(1) %in to i16 +; X32-NEXT: --> %p2 U: full-set S: full-set +; X32-NEXT: %p3 = ptrtoaddr ptr addrspace(1) %in to i128 +; X32-NEXT: --> %p3 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_as1 +; + %p0 = ptrtoaddr ptr addrspace(1) %in to i64 + %p1 = ptrtoaddr ptr addrspace(1) %in to i32 + %p2 = ptrtoaddr ptr addrspace(1) %in to i16 + %p3 = ptrtoaddr ptr addrspace(1) %in to i128 + store i64 %p0, ptr %out0 + store i32 %p1, ptr %out1 + store i16 %p2, ptr %out2 + store i128 %p3, ptr %out3 + ret void +} + +define void @ptrtoaddr_of_bitcast(ptr %in, ptr %out0) { +; X64-LABEL: 'ptrtoaddr_of_bitcast' +; X64-NEXT: Classifying expressions for: @ptrtoaddr_of_bitcast +; X64-NEXT: %in_casted = bitcast ptr %in to ptr +; X64-NEXT: --> %in U: full-set S: full-set +; X64-NEXT: %p0 = ptrtoaddr ptr %in_casted to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_of_bitcast +; +; X32-LABEL: 'ptrtoaddr_of_bitcast' +; X32-NEXT: Classifying expressions for: @ptrtoaddr_of_bitcast +; X32-NEXT: %in_casted = bitcast ptr %in to ptr +; X32-NEXT: --> %in U: full-set S: full-set +; X32-NEXT: %p0 = ptrtoaddr ptr %in_casted to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_of_bitcast +; + %in_casted = bitcast ptr %in to ptr + %p0 = ptrtoaddr ptr %in_casted to i64 + store i64 %p0, ptr %out0 + ret void +} + +define void @ptrtoaddr_of_nullptr(ptr %out0) { +; ALL-LABEL: 'ptrtoaddr_of_nullptr' +; ALL-NEXT: Classifying expressions for: @ptrtoaddr_of_nullptr +; ALL-NEXT: %p0 = ptrtoaddr ptr null to i64 +; ALL-NEXT: --> %p0 U: full-set S: full-set +; ALL-NEXT: Determining loop execution counts for: @ptrtoaddr_of_nullptr +; + %p0 = ptrtoaddr ptr null to i64 + store i64 %p0, ptr %out0 + ret void +} + +define void @ptrtoaddr_of_gep(ptr %in, ptr %out0) { +; X64-LABEL: 'ptrtoaddr_of_gep' +; X64-NEXT: Classifying expressions for: @ptrtoaddr_of_gep +; X64-NEXT: %in_adj = getelementptr inbounds i8, ptr %in, i64 42 +; X64-NEXT: --> (42 + %in) U: full-set S: full-set +; X64-NEXT: %p0 = ptrtoaddr ptr %in_adj to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_of_gep +; +; X32-LABEL: 'ptrtoaddr_of_gep' +; X32-NEXT: Classifying expressions for: @ptrtoaddr_of_gep +; X32-NEXT: %in_adj = getelementptr inbounds i8, ptr %in, i64 42 +; X32-NEXT: --> (42 + %in) U: full-set S: full-set +; X32-NEXT: %p0 = ptrtoaddr ptr %in_adj to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_of_gep +; + %in_adj = getelementptr inbounds i8, ptr %in, i64 42 + %p0 = ptrtoaddr ptr %in_adj to i64 + store i64 %p0, ptr %out0 + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll index 7ba422d..a477465c 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll @@ -578,22 +578,22 @@ define void @test_ptr_aligned_by_2_and_4_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptr_aligned_by_2_and_4_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptr_aligned_by_2_and_4_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_2_and_4_via_assumption ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ] @@ -615,9 +615,9 @@ define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 @@ -644,9 +644,9 @@ define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start)<nuw><nsw>,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 @@ -677,22 +677,22 @@ define void @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors(ptr ; CHECK-NEXT: %c = call i1 @cond() ; CHECK-NEXT: --> %c U: full-set S: full-set ; CHECK-NEXT: %iv = phi ptr [ %start, %then ], [ %start, %else ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ] diff --git a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll index bea0310..70224fc 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll @@ -94,6 +94,18 @@ define void @main() #0 { %uav2_2 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f32_1_0( i32 4, i32 0, i32 10, i32 5, ptr null) + + ; RWBuffer<float4> UnboundedArray[] : register(u10, space5) +; CHECK: - Type: UAVTyped +; CHECK: Space: 5 +; CHECK: LowerBound: 10 +; CHECK: UpperBound: 4294967295 +; CHECK: Kind: TypedBuffer +; CHECK: Flags: +; CHECK: UsedByAtomic64: false + ; RWBuffer<float4> Buf = BufferArray[100]; + %uav3 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 5, i32 10, i32 -1, i32 100, ptr null) ret void } diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll index bf31ccb..559cc53 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll @@ -32,6 +32,40 @@ define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v32i8_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + +define void @select_v32i8_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v16i16: ; CHECK: # %bb.0: @@ -49,6 +83,40 @@ define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v16i16_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + +define void @select_v16i16_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v8i32: ; CHECK: # %bb.0: @@ -65,19 +133,70 @@ define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v8i32_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI8_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %sel = select <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i32> %v0, <8 x i32> %v1 + store <8 x i32> %sel, ptr %res + ret void +} + +define void @select_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false>, <8 x float> %v0, <8 x float> %v1 + store <8 x float> %sel, ptr %res + ret void +} + define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI10_0) ; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <4 x i64>, ptr %a0 %v1 = load <4 x i64>, ptr %a1 - %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %v0, <4 x i64> %v1 + %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v0, <4 x i64> %v1 store <4 x i64> %sel, ptr %res ret void } + +define void @select_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI11_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v0, <4 x double> %v1 + store <4 x double> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll index 8f25a6b..25c4f09 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll @@ -16,6 +16,20 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { ret void } +define void @select_v16i8_imm_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: select_v16i8_imm_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, -256 +; CHECK-NEXT: vbitseli.b $vr1, $vr0, 1 +; CHECK-NEXT: vst $vr1, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %v0 + store <16 x i8> %sel, ptr %res + ret void +} + define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v16i8: ; CHECK: # %bb.0: @@ -32,6 +46,40 @@ define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v16i8_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + +define void @select_v16i8_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v8i16: ; CHECK: # %bb.0: @@ -49,6 +97,40 @@ define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v8i16_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + +define void @select_v8i16_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI7_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI7_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v4i32: ; CHECK: # %bb.0: @@ -65,13 +147,47 @@ define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v4i32_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %sel = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> %v0, <4 x i32> %v1 + store <4 x i32> %sel, ptr %res + ret void +} + +define void @select_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI10_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %sel = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %v0, <4 x float> %v1 + store <4 x float> %sel, ptr %res + ret void +} + define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI11_0) ; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret @@ -81,3 +197,20 @@ define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { store <2 x i64> %sel, ptr %res ret void } + +define void @select_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI12_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %sel = select <2 x i1> <i1 false, i1 true>, <2 x double> %v0, <2 x double> %v1 + store <2 x double> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index e71f59c..cad684e 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -325,24 +325,21 @@ define float @sqrt_afn_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_afn_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB11_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB11_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -393,21 +390,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_afn_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB13_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB13_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -462,24 +457,21 @@ define float @sqrt_fast_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_fast_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB15_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB15_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -517,21 +509,19 @@ define float @sqrt_fast_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_fast_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB16_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB16_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x) ret float %rt diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll new file mode 100644 index 0000000..c19e93d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -O1 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s + +define i32 @pr134424(i64 %input_value, i32 %base_value, i1 %cond_flag1, i1 %cond_flag2, i1 %cond_flag3) { +; CHECK-LABEL: pr134424: +; CHECK: # %bb.0: # %for.body.us.preheader.i +; CHECK-NEXT: andi a3, a3, 1 +; CHECK-NEXT: andi a5, a2, 1 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 14 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bnez a5, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %for.body.us.preheader.i +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: .LBB0_2: # %for.body.us.preheader.i +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: andi a4, a4, 1 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: bnez a3, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %for.body.us.preheader.i +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: .LBB0_4: # %for.body.us.preheader.i +; CHECK-NEXT: vmsle.vi v0, v8, 0 +; CHECK-NEXT: sext.w a2, a2 +; CHECK-NEXT: bnez a4, .LBB0_6 +; CHECK-NEXT: # %bb.5: # %for.body.us.preheader.i +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: .LBB0_6: # %for.body.us.preheader.i +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a3, v8 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: bge a3, a2, .LBB0_11 +; CHECK-NEXT: # %bb.7: # %for.body.us.preheader.i +; CHECK-NEXT: bge a0, a1, .LBB0_12 +; CHECK-NEXT: .LBB0_8: # %for.body.us.preheader.i +; CHECK-NEXT: blt a3, a0, .LBB0_10 +; CHECK-NEXT: .LBB0_9: # %for.body.us.preheader.i +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB0_10: # %for.body.us.preheader.i +; CHECK-NEXT: sw a3, 0(zero) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_11: # %for.body.us.preheader.i +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: blt a0, a1, .LBB0_8 +; CHECK-NEXT: .LBB0_12: # %for.body.us.preheader.i +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: bge a3, a0, .LBB0_9 +; CHECK-NEXT: j .LBB0_10 +for.body.us.preheader.i: + %partial_vector = insertelement <4 x i64> zeroinitializer, i64 %input_value, i64 1 + %comparison_vector = shufflevector <4 x i64> %partial_vector, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 1, i32 1> + %comparison_result = icmp sle <4 x i64> %comparison_vector, zeroinitializer + %selected_value1 = select i1 %cond_flag1, i32 %base_value, i32 1 + %selected_value2 = select i1 %cond_flag2, i32 %base_value, i32 1 + %selected_value3 = select i1 %cond_flag3, i32 %base_value, i32 1 + %bool_to_int = zext <4 x i1> %comparison_result to <4 x i32> + %extended_vector = shufflevector <4 x i32> %bool_to_int, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> + %vector_min = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %extended_vector) + %min1 = call i32 @llvm.smin.i32(i32 %vector_min, i32 %selected_value1) + %min2 = call i32 @llvm.smin.i32(i32 %selected_value2, i32 %selected_value3) + %final_min = call i32 @llvm.smin.i32(i32 %min1, i32 %min2) + store i32 %final_min, ptr null, align 4 + ret i32 0 +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir new file mode 100644 index 0000000..aeab8f6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir @@ -0,0 +1,57 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=register-coalescer -o - %s | FileCheck %s + +--- +name: pr71023 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: pr71023 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x10, $v8, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF + ; CHECK-NEXT: undef [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_2:vrn8m1 = PseudoVMV_V_I_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_6:vrn8m1 = COPY undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2 + ; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3 + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3 + ; CHECK-NEXT: PseudoBR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber [[PseudoVMV_V_I_M1_]].sub_vrm1_0:vrn8m1 = PseudoVRGATHER_VI_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_0, [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSSEG6E8_V_M1_MASK [[PseudoVMV_V_I_M1_]].sub_vrm1_0_sub_vrm1_1_sub_vrm1_2_sub_vrm1_3_sub_vrm1_4_sub_vrm1_5, undef [[DEF]], killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1) + ; CHECK-NEXT: PseudoRET + bb.0: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $x10, $v8, $v10 + %0:gpr = IMPLICIT_DEF + %1:vrnov0 = PseudoVMV_V_I_M1 undef %1, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + %2:vrnov0 = IMPLICIT_DEF + undef %3.sub_vrm1_0:vrn6m1nov0 = COPY undef %1 + %3.sub_vrm1_3:vrn6m1nov0 = COPY %2 + %3.sub_vrm1_4:vrn6m1nov0 = COPY undef %1 + BNE undef %0, $x0, %bb.3 + PseudoBR %bb.1 + bb.1: + successors: %bb.3(0x40000000), %bb.2(0x40000000) + BNE killed undef %0, $x0, %bb.3 + PseudoBR %bb.2 + bb.2: + successors: %bb.3(0x80000000) + bb.3: + %4:vr = IMPLICIT_DEF + early-clobber %4:vr = PseudoVRGATHER_VI_M1 undef %4, killed %1, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + undef %5.sub_vrm1_0:vrn6m1 = COPY killed %4 + %5.sub_vrm1_5:vrn6m1 = COPY killed %2 + PseudoVSSEG6E8_V_M1_MASK killed %5, undef %0, killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1) + PseudoRET +... diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll index ae170d7..d949068 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll @@ -104,6 +104,31 @@ define void @memset_i32(ptr %dest, i8 %val, i32 %len) { ret void } +; CHECK-LABEL: memcpy_0: +; CHECK-NEXT: .functype memcpy_0 (i32, i32) -> () +; CHECK-NEXT: return +define void @memcpy_0(ptr %dest, ptr %src) { + call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0) + ret void +} + +; CHECK-LABEL: memmove_0: +; CHECK-NEXT: .functype memmove_0 (i32, i32) -> () +; CHECK-NEXT: return +define void @memmove_0(ptr %dest, ptr %src) { + call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0) + ret void +} + +; CHECK-LABEL: memset_0: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_0 (i32, i32) -> () +; BULK-MEM-NEXT: return +define void @memset_0(ptr %dest, i8 %val) { + call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 0, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1: ; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> () ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) @@ -137,14 +162,8 @@ define void @memset_1(ptr %dest, i8 %val) { ; CHECK-LABEL: memcpy_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> () -; BULK-MEM-NEXT: block ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] -; BULK-MEM-NEXT: br_if 0, $pop[[L1]] -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memcpy_1024(ptr %dest, ptr %src) { call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0) @@ -154,14 +173,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memmove_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> () -; BULK-MEM-NEXT: block ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] -; BULK-MEM-NEXT: br_if 0, $pop[[L1]] -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memmove_1024(ptr %dest, ptr %src) { call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0) @@ -171,14 +184,8 @@ define void @memmove_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memset_1024: ; NO-BULK-MEM-NOT: memory.fill ; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> () -; BULK-MEM-NEXT: block ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] -; BULK-MEM-NEXT: br_if 0, $pop[[L1]] -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L2]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memset_1024(ptr %dest, i8 %val) { call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 1024, i1 0) @@ -201,17 +208,11 @@ define void @memset_1024(ptr %dest, i8 %val) { ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_src(ptr %dst) { %a = alloca [100 x i8] @@ -224,17 +225,11 @@ define void @memcpy_alloca_src(ptr %dst) { ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_dst(ptr %src) { %a = alloca [100 x i8] @@ -247,17 +242,11 @@ define void @memcpy_alloca_dst(ptr %src) { ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i32.sub $1=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i32.eqz $push[[L3:[0-9]+]]=, $pop[[L2]] -; BULK-MEM-NEXT: br_if 0, $pop[[L3]] -; BULK-MEM-NEXT: i32.const $push[[L4:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i32.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]] -; BULK-MEM-NEXT: i32.const $push[[L6:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memset_alloca(i8 %val) { %a = alloca [100 x i8] diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll index 0cf8493..d0206a3 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll @@ -110,6 +110,31 @@ define void @memset_i32(ptr %dest, i8 %val, i64 %len) { ret void } +; CHECK-LABEL: memcpy_0: +; CHECK-NEXT: .functype memcpy_0 (i64, i64) -> () +; CHECK-NEXT: return +define void @memcpy_0(ptr %dest, ptr %src) { + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0) + ret void +} + +; CHECK-LABEL: memmove_0: +; CHECK-NEXT: .functype memmove_0 (i64, i64) -> () +; CHECK-NEXT: return +define void @memmove_0(ptr %dest, ptr %src) { + call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0) + ret void +} + +; CHECK-LABEL: memset_0: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_0 (i64, i32) -> () +; BULK-MEM-NEXT: return +define void @memset_0(ptr %dest, i8 %val) { + call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 0, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1: ; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> () ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) @@ -143,14 +168,8 @@ define void @memset_1(ptr %dest, i8 %val) { ; CHECK-LABEL: memcpy_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> () -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] -; BULK-MEM-NEXT: br_if 0, $pop0 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block ; BULK-MEM-NEXT: return define void @memcpy_1024(ptr %dest, ptr %src) { call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0) @@ -160,14 +179,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memmove_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> () -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] -; BULK-MEM-NEXT: br_if 0, $pop0 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block ; BULK-MEM-NEXT: return define void @memmove_1024(ptr %dest, ptr %src) { call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0) @@ -177,14 +190,8 @@ define void @memmove_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memset_1024: ; NO-BULK-MEM-NOT: memory.fill ; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> () -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] -; BULK-MEM-NEXT: br_if 0, $pop0 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 ; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block ; BULK-MEM-NEXT: return define void @memset_1024(ptr %dest, i8 %val) { call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 1024, i1 0) @@ -207,17 +214,11 @@ define void @memset_1024(ptr %dest, i8 %val) { ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> () ; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_src(ptr %dst) { %a = alloca [100 x i8] @@ -230,17 +231,11 @@ define void @memcpy_alloca_src(ptr %dst) { ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> () ; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_dst(ptr %src) { %a = alloca [100 x i8] @@ -253,17 +248,11 @@ define void @memcpy_alloca_dst(ptr %src) { ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i64.sub $1=, $pop[[L1]], $pop[[L0]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L2:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i64.eqz $push[[L3:[0-9]+]]=, $pop[[L2]] -; BULK-MEM-NEXT: br_if 0, $pop[[L3]] -; BULK-MEM-NEXT: i64.const $push[[L4:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i64.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]] -; BULK-MEM-NEXT: i64.const $push[[L6:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memset_alloca(i8 %val) { %a = alloca [100 x i8] diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index cdf6bdd..caec02e 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -4765,8 +4765,8 @@ define void @scaleidx_scatter_outofrange(<8 x float> %value, ptr %base, <8 x i32 } declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>) -define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) { -; X64-LABEL: pr163023: +define <16 x i32> @pr163023_sext(ptr %a0, <16 x i32> %a1) { +; X64-LABEL: pr163023_sext: ; X64: # %bb.0: ; X64-NEXT: kxnorw %k0, %k0, %k1 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -4774,7 +4774,7 @@ define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) { ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ; X64-NEXT: retq ; -; X86-LABEL: pr163023: +; X86-LABEL: pr163023_sext: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kxnorw %k0, %k0, %k1 @@ -4788,7 +4788,40 @@ define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) { %ofs = sext <16 x i32> %a1 to <16 x i64> %addr = add nuw <16 x i64> %addr.splat, %ofs %ptr = inttoptr <16 x i64> %addr to <16 x ptr> - %gather = tail call fastcc <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) + %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) + ret <16 x i32> %gather +} + +define <16 x i32> @pr163023_zext(ptr %a0, <16 x i32> %a1) { +; X64-LABEL: pr163023_zext: +; X64: # %bb.0: +; X64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; X64-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-NEXT: kxnorw %k0, %k0, %k1 +; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-NEXT: kxnorw %k0, %k0, %k2 +; X64-NEXT: vpgatherqd (%rdi,%zmm0), %ymm3 {%k2} +; X64-NEXT: vpgatherqd (%rdi,%zmm1), %ymm2 {%k1} +; X64-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; X64-NEXT: retq +; +; X86-LABEL: pr163023_zext: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kxnorw %k0, %k0, %k1 +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl + %addr.p = ptrtoint ptr %a0 to i64 + %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0 + %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer + %ofs = zext <16 x i32> %a1 to <16 x i64> + %addr = add nuw <16 x i64> %addr.splat, %ofs + %ptr = inttoptr <16 x i64> %addr to <16 x ptr> + %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ret <16 x i32> %gather } diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll index 42617c1..18588aa 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -24,7 +24,7 @@ define float @sqrt_ieee_ninf(float %f) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr @@ -71,7 +71,7 @@ define float @sqrt_daz_ninf(float %f) #1 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 3eda077..475faf9 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -177,6 +177,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: static constexpr bool is_iterable = true; // CHECK-NEXT: }; // CHECK-NEXT: } // namespace llvm +// CHECK-EMPTY: // CHECK-NEXT: #endif // LLVM_Tdl_INC diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td index a25197c..ccc0944 100644 --- a/llvm/test/TableGen/directive2.td +++ b/llvm/test/TableGen/directive2.td @@ -150,6 +150,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: static constexpr bool is_iterable = true; // CHECK-NEXT: }; // CHECK-NEXT: } // namespace llvm +// CHECK-EMPTY: // CHECK-NEXT: #endif // LLVM_Tdl_INC // IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll new file mode 100644 index 0000000..e4322cf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll @@ -0,0 +1,588 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s + +define i32 @umax_phi_used_outside(ptr %src, i32 %n) { +; CHECK-LABEL: define i32 @umax_phi_used_outside( +; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = tail call i32 @llvm.umax.i32(i32 [[MAX]], i32 [[L_EXT]]) +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_LCSSA:%.*]] = phi i32 [ [[MAX]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[MAX_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %spec.select, %loop ] + %gep.src = getelementptr inbounds i8, ptr %src, i32 %iv + %l = load i8, ptr %gep.src + %l.ext = zext i8 %l to i32 + %spec.select = tail call i32 @llvm.umax.i32(i32 %max, i32 %l.ext) + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max +} + +define i32 @chained_smax(i32 %x, ptr %src) { +; CHECK-LABEL: define i32 @chained_smax( +; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[VEC_PHI]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK: [[PRED_LOAD_IF3]]: +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK: [[PRED_LOAD_CONTINUE4]]: +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], %[[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_IF5]]: +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_CONTINUE6]]: +; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP26]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP25]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP27]]) +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[TMP28]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv + %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max) + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %l, i32 %max.1) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max.next +} + +define void @smax_with_invariant_store_user(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_invariant_store_user( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_same_addr(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 %max.next, ptr %dst, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_same_addr2(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr2( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 0, ptr [[DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 0, ptr %dst, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_same_addr3(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr3( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 0, ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 0, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 %max.next, ptr %dst, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_different_addr(ptr noalias %src, ptr noalias %dst, ptr noalias %dst.2, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_different_addr( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], ptr noalias [[DST_2:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST_2]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 %max.next, ptr %dst.2, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define i32 @chained_instructions_feeding_max1(i32 %x, ptr %src) { +; CHECK-LABEL: define i32 @chained_instructions_feeding_max1( +; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MAX]], [[L]] +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 [[L]]) +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %add = add i32 %max, %l + %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 %l) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max.next +} + +define i32 @chained_instructions_feeding_max2(i32 %x, ptr %src) { +; CHECK-LABEL: define i32 @chained_instructions_feeding_max2( +; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[MAX_1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 [[MAX]]) +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[L]], [[MAX_1]] +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 100) +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv + %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max) + %l = load i32, ptr %gep.src, align 4 + %add = add i32 %l, %max.1 + %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 100) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max.next +} + + +define i32 @test_predicated_smin(ptr %src) { +; CHECK-LABEL: define i32 @test_predicated_smin( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[TMP3:%.*]] = fptosi <4 x float> [[TMP2]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP3]]) +; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[PREDPHI]]) +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[TMP6]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %min = phi i32 [ 0, %entry ], [ %min.merge, %loop.latch ] + %gep.src = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep.src, align 4 + %c = fcmp une float %l, 0.0 + br i1 %c, label %then, label %loop.latch + +then: + %div = fdiv float %l, 3.0 + %div.i32 = fptosi float %div to i32 + %min.next = tail call i32 @llvm.smin.i32(i32 %min, i32 %div.i32) + br label %loop.latch + +loop.latch: + %min.merge = phi i32 [ %min.next, %then ], [ %min, %loop.header ] + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 111 + br i1 %ec, label %exit, label %loop.header + +exit: + ret i32 %min.merge +} + +define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) { +; CHECK-LABEL: define i32 @smax_reduction_multiple_incoming( +; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[COND]], label %[[LOOP_HEADER_PREHEADER:.*]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: [[IV_PH:%.*]] = phi i32 [ 10, %[[ELSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[MAX_PH:%.*]] = phi i32 [ 5, %[[ELSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[IV_PH]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[N]], [[IV_PH]] +; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[IV_PH]], [[N_VEC]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[MAX_PH]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IV_PH]], [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[IV_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[IV_PH]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[MAX_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[MAX_PH]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[MAX_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP_HEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] +; +entry: + br i1 %cond, label %loop.header, label %else + +else: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ 10, %else ], [ %iv.next, %loop.header ] + %max = phi i32 [ 0, %entry ], [ 5, %else ], [ %max.next, %loop.header ] + %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv, %n + br i1 %ec, label %exit, label %loop.header + +exit: + ret i32 %max.next +} diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll index eabb697..3b77e49 100644 --- a/llvm/test/Transforms/SROA/slice-width.ll +++ b/llvm/test/Transforms/SROA/slice-width.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -8,6 +8,10 @@ declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind ; This tests that allocas are not split into slices that are not byte width multiple +;. +; CHECK: @foo_copy_source = external constant %union.Foo +; CHECK: @i64_sink = global i64 0 +;. define void @no_split_on_non_byte_width(i32) { ; CHECK-LABEL: @no_split_on_non_byte_width( ; CHECK-NEXT: [[ARG_SROA_0:%.*]] = alloca i8, align 8 @@ -92,12 +96,12 @@ declare i32 @memcpy_vec3float_helper(ptr) ; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte ; vector store, hence accidentally putting gibberish onto the stack. -define i32 @memcpy_vec3float_widening(ptr %x) { +define i32 @memcpy_vec3float_widening(ptr %x) !prof !0 { ; CHECK-LABEL: @memcpy_vec3float_widening( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> -; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef +; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> ; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4 @@ -158,6 +162,15 @@ define i1 @presplit_overlarge_load() { %L2 = load i1, ptr %A ret i1 %L2 } +!0 = !{!"function_entry_count", i32 10} + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"unknown", !"sroa"} +;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} ; CHECK-PRESERVE-CFG: {{.*}} diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index de83a0d..4c08b57 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -386,7 +386,9 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile, // Make a DWARF transformer object and populate the ranges of the code // so we don't end up adding invalid functions to GSYM data. - DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites); + bool IsMachO = dyn_cast<object::MachOObjectFile>(&Obj) != nullptr; + + DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites, IsMachO); if (!TextRanges.empty()) Gsym.SetValidTextRanges(TextRanges); diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp index 33f53de..d560073 100644 --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -4899,3 +4899,189 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) { for (const auto &Line : ExpectedDumpLines) EXPECT_TRUE(DumpStr.find(Line) != std::string::npos); } + +TEST(GSYMTest, TestUnableToLocateDWO) { + // Test that llvm-gsymutil will not produce "uanble to locate DWO file" for + // Apple binaries. Apple uses DW_AT_GNU_dwo_id for non split DWARF purposes + // and this makes llvm-gsymutil create warnings and errors. + // + // 0x0000000b: DW_TAG_compile_unit + // DW_AT_name ("main.cpp") + // DW_AT_language (DW_LANG_C) + // DW_AT_GNU_dwo_id (0xfffffffe) + StringRef yamldata = R"( + debug_str: + - '' + - main.cpp + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_udata + - Attribute: DW_AT_GNU_dwo_id + Form: DW_FORM_data4 + debug_info: + - Length: 0x11 + Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x1 + - Value: 0x2 + - Value: 0xFFFFFFFE + )"; + auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr<DWARFContext> DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + std::string errors; + raw_string_ostream OS(errors); + OutputAggregator OSAgg(&OS); + GsymCreator GC; + // Make a DWARF transformer that is MachO (Apple) to avoid warnings about + // not finding DWO files. + DwarfTransformer DT(*DwarfContext, GC, /*LDCS=*/false, /*MachO*/ true); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OSAgg), Succeeded()); + + // Make sure this warning is not in the binary + std::string warn("warning: Unable to retrieve DWO .debug_info section for"); + EXPECT_TRUE(errors.find(warn) == std::string::npos); +} + +TEST(GSYMTest, TestDWARFTransformNoErrorForMissingFileDecl) { + // Test that if llvm-gsymutil finds a line table for a compile unit and if + // there are no matching entries for a function in that compile unit, that + // it doesn't print out a error saying that a DIE has an invalid file index + // if there is no DW_AT_decl_file attribute. + // + // 0x0000000b: DW_TAG_compile_unit + // DW_AT_name ("main.cpp") + // DW_AT_language (DW_LANG_C) + // DW_AT_stmt_list (0x00000000) + // + // 0x00000015: DW_TAG_subprogram + // DW_AT_name ("foo") + // DW_AT_low_pc (0x0000000000001000) + // DW_AT_high_pc (0x0000000000001050) + // + // 0x0000002a: NULL + // + // Line table that has entries, but none that match "foo": + // + // Address Line Column File ISA Discriminator OpIndex Flags + // ------------------ ------ ------ ------ --- ------------- ------- ----- + // 0x0000000000002000 10 0 1 0 0 0 is_stmt + // 0x0000000000002050 13 0 1 0 0 0 is_stmt + + StringRef yamldata = R"( + debug_str: + - '' + - main.cpp + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_udata + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + - Code: 0x2 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_addr + debug_info: + - Length: 0x27 + Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x1 + - Value: 0x2 + - Value: 0x0 + - AbbrCode: 0x2 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: foo + - Value: 0x1000 + - Value: 0x1050 + - AbbrCode: 0x0 + debug_line: + - Length: 58 + Version: 2 + PrologueLength: 31 + MinInstLength: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + Files: + - Name: main.cpp + DirIdx: 0 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 8192 + - Opcode: DW_LNS_advance_line + SData: 9 + Data: 0 + - Opcode: DW_LNS_copy + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 80 + - Opcode: DW_LNS_advance_line + SData: 3 + Data: 0 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 0 + )"; + auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata); + ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded()); + std::unique_ptr<DWARFContext> DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + ASSERT_TRUE(DwarfContext.get() != nullptr); + std::string errors; + raw_string_ostream OS(errors); + OutputAggregator OSAgg(&OS); + GsymCreator GC; + DwarfTransformer DT(*DwarfContext, GC); + const uint32_t ThreadCount = 1; + ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded()); + ASSERT_THAT_ERROR(GC.finalize(OSAgg), Succeeded()); + + // Make sure this warning is not in the binary + std::string error_str("error: function DIE at 0x00000015 has an invalid file " + "index 4294967295 in its DW_AT_decl_file attribute"); + EXPECT_TRUE(errors.find(error_str) == std::string::npos); +} diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp index b4d816e..3c6ff11 100644 --- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp @@ -266,10 +266,9 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { return; StringRef Lang = DirLang.getName(); + IncludeGuardEmitter IncGuard(OS, (Twine("LLVM_") + Lang + "_INC").str()); - OS << "#ifndef LLVM_" << Lang << "_INC\n"; - OS << "#define LLVM_" << Lang << "_INC\n"; - OS << "\n#include \"llvm/ADT/ArrayRef.h\"\n"; + OS << "#include \"llvm/ADT/ArrayRef.h\"\n"; if (DirLang.hasEnableBitmaskEnumInNamespace()) OS << "#include \"llvm/ADT/BitmaskEnum.h\"\n"; @@ -370,7 +369,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { OS << "};\n"; } LlvmNS.close(); - OS << "#endif // LLVM_" << Lang << "_INC\n"; } // Given a list of spellings (for a given clause/directive), order them diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index a5c5426..3f8be5e 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -1310,14 +1310,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll Transforms/SimpleLoopUnswitch/trivial-unswitch.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll -Transforms/SROA/phi-gep.ll -Transforms/SROA/scalable-vectors-with-known-vscale.ll -Transforms/SROA/select-gep.ll -Transforms/SROA/select-load.ll -Transforms/SROA/slice-width.ll -Transforms/SROA/vector-conversion.ll -Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll -Transforms/SROA/vector-promotion.ll Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll Transforms/StructurizeCFG/hoist-zerocost.ll diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index c477c6c..dcc1ef9 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -315,7 +315,8 @@ bool mlir::linalg::detail::isContractionBody( Value yielded = getSourceSkipUnary(terminator->getOperand(0)); Operation *reductionOp = yielded.getDefiningOp(); - if (reductionOp->getNumResults() != 1 || reductionOp->getNumOperands() != 2) { + if (!reductionOp || reductionOp->getNumResults() != 1 || + reductionOp->getNumOperands() != 2) { errs << "expected reduction op to be binary"; return false; } diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir index 618ba34..66cae5c 100644 --- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir +++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir @@ -1011,6 +1011,20 @@ module attributes { transform.target_tag = "start_here" } { } -> tensor<1x1x4xf32> return } + + func.func @generic_none(%arg0: tensor<128x128xi32>, %arg1: tensor<128x128xi32>, %arg2: tensor<128x128xi32>) { + %0 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, + affine_map<(d0, d1, d2) -> (d2, d1)>, + affine_map<(d0, d1, d2) -> (d0, d1)>], + iterator_types = ["parallel", "parallel", "reduction"]} + ins(%arg0, %arg1 : tensor<128x128xi32>, tensor<128x128xi32>) + outs(%arg2 : tensor<128x128xi32>) { + ^bb0(%in: i32, %in_0: i32, %out: i32): + linalg.yield %out : i32 + } -> tensor<128x128xi32> + return + } } // ----- diff --git a/orc-rt/include/orc-rt/ExecutorAddress.h b/orc-rt/include/orc-rt/ExecutorAddress.h index cc7bbf5..6ec583f 100644 --- a/orc-rt/include/orc-rt/ExecutorAddress.h +++ b/orc-rt/include/orc-rt/ExecutorAddress.h @@ -204,6 +204,9 @@ struct ExecutorAddrRange { constexpr bool contains(ExecutorAddr Addr) const noexcept { return Start <= Addr && Addr < End; } + constexpr bool contains(const ExecutorAddrRange &Other) const noexcept { + return (Other.Start >= Start && Other.End <= End); + } constexpr bool overlaps(const ExecutorAddrRange &Other) const noexcept { return !(Other.End <= Start || End <= Other.Start); } diff --git a/orc-rt/unittests/ExecutorAddressTest.cpp b/orc-rt/unittests/ExecutorAddressTest.cpp index 98074a7..2e04901 100644 --- a/orc-rt/unittests/ExecutorAddressTest.cpp +++ b/orc-rt/unittests/ExecutorAddressTest.cpp @@ -97,10 +97,16 @@ TEST(ExecutorAddrTest, AddrRanges) { EXPECT_FALSE(R1.contains(A0)); EXPECT_FALSE(R1.contains(A2)); + EXPECT_TRUE(R3.contains(R0)); // True for singleton range at start. + EXPECT_TRUE(R3.contains(R1)); // True for singleton range at end. + EXPECT_FALSE(R3.contains(R2)); // False for non-overlaping singleton range. + EXPECT_FALSE(R3.contains(R4)); // False for overlapping, uncontained range. + EXPECT_FALSE(R1.overlaps(R0)); EXPECT_FALSE(R1.overlaps(R2)); EXPECT_TRUE(R1.overlaps(R3)); EXPECT_TRUE(R1.overlaps(R4)); + EXPECT_TRUE(R3.overlaps(R4)); } TEST(ExecutorAddrTest, Hashable) { diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index c83ab59..74d632b 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1412,6 +1412,13 @@ td_library( ) td_library( + name = "InferStridedMetadataInterfaceTdFiles", + srcs = ["include/mlir/Interfaces/InferStridedMetadataInterface.td"], + includes = ["include"], + deps = [":OpBaseTdFiles"], +) + +td_library( name = "InferTypeOpInterfaceTdFiles", srcs = ["include/mlir/Interfaces/InferTypeOpInterface.td"], includes = ["include"], @@ -4159,6 +4166,7 @@ cc_library( ":MathToLibm", ":MathToROCDL", ":MathToSPIRV", + ":MathToXeVM", ":MemRefToEmitC", ":MemRefToLLVM", ":MemRefToSPIRV", @@ -7049,6 +7057,33 @@ cc_library( ) cc_library( + name = "MathToXeVM", + srcs = glob([ + "lib/Conversion/MathToXeVM/*.cpp", + ]), + hdrs = glob([ + "include/mlir/Conversion/MathToXeVM/*.h", + ]), + includes = [ + "include", + "lib/Conversion/MathToXeVM", + ], + deps = [ + ":ArithAttrToLLVMConversion", + ":ArithDialect", + ":ConversionPassIncGen", + ":IR", + ":LLVMCommonConversion", + ":LLVMDialect", + ":MathDialect", + ":Pass", + ":Transforms", + ":XeVMDialect", + "//llvm:Support", + ], +) + +cc_library( name = "FuncToEmitC", srcs = glob([ "lib/Conversion/FuncToEmitC/*.cpp", @@ -7622,6 +7657,30 @@ cc_library( ], ) +gentbl_cc_library( + name = "InferStridedMetadataInterfaceIncGen", + tbl_outs = { + "include/mlir/Interfaces/InferStridedMetadataInterface.h.inc": ["-gen-op-interface-decls"], + "include/mlir/Interfaces/InferStridedMetadataInterface.cpp.inc": ["-gen-op-interface-defs"], + }, + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Interfaces/InferStridedMetadataInterface.td", + deps = [":InferStridedMetadataInterfaceTdFiles"], +) + +cc_library( + name = "InferStridedMetadataInterface", + srcs = ["lib/Interfaces/InferStridedMetadataInterface.cpp"], + hdrs = ["include/mlir/Interfaces/InferStridedMetadataInterface.h"], + includes = ["include"], + deps = [ + ":IR", + ":InferIntRangeInterface", + ":InferStridedMetadataInterfaceIncGen", + "//llvm:Support", + ], +) + td_library( name = "DataLayoutInterfacesTdFiles", srcs = ["include/mlir/Interfaces/DataLayoutInterfaces.td"], @@ -8550,9 +8609,11 @@ cc_library( ":CallOpInterfaces", ":ControlFlowInterfaces", ":DataLayoutInterfaces", + ":DialectUtils", ":FunctionInterfaces", ":IR", ":InferIntRangeInterface", + ":InferStridedMetadataInterface", ":LoopLikeInterface", ":Pass", ":SideEffectInterfaces", @@ -12474,6 +12535,7 @@ cc_library( ":IR", ":InferIntRangeCommon", ":InferIntRangeInterface", + ":InferStridedMetadataInterface", ":InferTypeOpInterface", ":InliningUtils", ":Pass", @@ -12695,6 +12757,7 @@ td_library( ":ArithOpsTdFiles", ":CastInterfacesTdFiles", ":ControlFlowInterfacesTdFiles", + ":InferStridedMetadataInterfaceTdFiles", ":MemOpInterfacesTdFiles", ":MemorySlotInterfacesTdFiles", ":OpBaseTdFiles", @@ -12785,6 +12848,7 @@ cc_library( ":IR", ":InferIntRangeCommon", ":InferIntRangeInterface", + ":InferStridedMetadataInterface", ":InferTypeOpInterface", ":InliningUtils", ":MemOpInterfaces", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel index b00e8f2..d8fcb53 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel @@ -1,4 +1,5 @@ load("//llvm:lit_test.bzl", "lit_test") +load("//llvm:targets.bzl", "llvm_targets") licenses(["notice"]) @@ -15,6 +16,9 @@ package(default_visibility = ["//visibility:public"]) ) for src in glob( include = ["**/*.mlir"], - exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"], + exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"] + ( + # MathToXeVM needs SPIRV; see MathToXeVM/lit.local.cfg + ["MathToXeVM/**"] if "SPIRV" not in llvm_targets else [] + ), ) ] |