diff options
88 files changed, 2459 insertions, 1100 deletions
diff --git a/clang/docs/PointerAuthentication.rst b/clang/docs/PointerAuthentication.rst index 96eb498..7e65f4b 100644 --- a/clang/docs/PointerAuthentication.rst +++ b/clang/docs/PointerAuthentication.rst @@ -592,6 +592,36 @@ The result value is never zero and always within range for both the This can be used in constant expressions. +``ptrauth_type_discriminator`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + ptrauth_type_discriminator(type) + +Compute the constant discriminator derived from the given type, as is computed +for automatically type diversified schemas. + +``type`` must be a type. The result has the type ``ptrauth_extra_data_t``. + +This can be used in constant expressions. + +``ptrauth_function_pointer_type_discriminator`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + ptrauth_function_pointer_type_discriminator(function_type) + +Compute the constant discriminator derived from the provided function type, for +use in contexts where the default function authentication schema. If function +pointer type diversity is enabled, this is equivalent to +`ptrauth_type_discriminator(function_type)`, if it is not enabled this is `0`. + +``function_type`` must be a function type. The result has the type ``ptrauth_extra_data_t``. + +This can be used in constant expressions. + ``ptrauth_strip`` ^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index edb872c..40b3e02 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -191,6 +191,8 @@ C23 Feature Support - Added ``FLT_SNAN``, ``DBL_SNAN``, and ``LDBL_SNAN`` to Clang's ``<float.h>`` header in C23 and later modes. This implements `WG14 N2710 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2710.htm>`_. +- Fixed accepting as compatible unnamed tag types with the same fields within + the same translation unit but from different types. Non-comprehensive list of changes in this release ------------------------------------------------- @@ -513,6 +515,7 @@ X86 Support driver. - Remove `[no-]evex512` feature request from intrinsics and builtins. - Change features `avx10.x-[256,512]` to `avx10.x`. +- `-march=wildcatlake` is now supported. Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a8b41ba..3603e9cd 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11581,6 +11581,12 @@ QualType ASTContext::mergeTagDefinitions(QualType LHS, QualType RHS) { if (LangOpts.CPlusPlus || !LangOpts.C23) return {}; + // Nameless tags are comparable only within outer definitions. At the top + // level they are not comparable. + const TagDecl *LTagD = LHS->castAsTagDecl(), *RTagD = RHS->castAsTagDecl(); + if (!LTagD->getIdentifier() || !RTagD->getIdentifier()) + return {}; + // C23, on the other hand, requires the members to be "the same enough", so // we use a structural equivalence check. StructuralEquivalenceContext::NonEquivalentDeclSet NonEquivalentDecls; diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index 1557346..b17cd6f 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -1763,19 +1763,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, // another anonymous structure or union, respectively, if their members // fulfill the preceding requirements. ... Otherwise, the structure, union, // or enumerated types are incompatible. - - // Note: "the same tag" refers to the identifier for the structure; two - // structures without names are not compatible within a TU. In C23, if either - // declaration has no name, they're not equivalent. However, the paragraph - // after the bulleted list goes on to talk about compatibility of anonymous - // structure and union members, so this prohibition only applies to top-level - // declarations; if either declaration is not a member, they cannot be - // compatible. - if (Context.LangOpts.C23 && (!D1->getIdentifier() || !D2->getIdentifier()) && - (!D1->getDeclContext()->isRecord() || !D2->getDeclContext()->isRecord())) - return false; - - // Otherwise, check the names for equivalence. if (!NameIsStructurallyEquivalent(*D1, *D2)) return false; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 187815c..ef4973c 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -625,6 +625,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_ArrowlakeS: case CK_Lunarlake: case CK_Pantherlake: + case CK_Wildcatlake: case CK_Sierraforest: case CK_Grandridge: case CK_Graniterapids: @@ -1613,6 +1614,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const { case CK_ArrowlakeS: case CK_Lunarlake: case CK_Pantherlake: + case CK_Wildcatlake: case CK_Sierraforest: case CK_Grandridge: case CK_Graniterapids: diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index cfd48a2..5ba64dd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -536,7 +536,7 @@ mlir::LogicalResult CIRGenFunction::emitLabel(const clang::LabelDecl &d) { mlir::Block *currBlock = builder.getBlock(); mlir::Block *labelBlock = currBlock; - if (!currBlock->empty()) { + if (!currBlock->empty() || currBlock->isEntryBlock()) { { mlir::OpBuilder::InsertionGuard guard(builder); labelBlock = builder.createBlock(builder.getBlock()->getParent()); diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h index f902ca1..ad28f06 100644 --- a/clang/lib/Headers/ptrauth.h +++ b/clang/lib/Headers/ptrauth.h @@ -241,6 +241,18 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; #define ptrauth_type_discriminator(__type) \ __builtin_ptrauth_type_discriminator(__type) +/* Compute the constant discriminator used by Clang to sign pointers with the + given C function pointer type. + + A call to this function is an integer constant expression. */ +#if __has_feature(ptrauth_function_pointer_type_discrimination) +#define ptrauth_function_pointer_type_discriminator(__type) \ + __builtin_ptrauth_type_discriminator(__type) +#else +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) +#endif + /* Compute a signature for the given pair of pointer-sized values. The order of the arguments is significant. @@ -372,6 +384,8 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; }) #define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0) +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) #define ptrauth_sign_generic_data(__value, __data) \ ({ \ diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index 319aaca..c1a5000 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -26,6 +26,7 @@ bool tryToFindPtrOrigin( const Expr *E, bool StopAtFirstRefCountedObj, std::function<bool(const clang::CXXRecordDecl *)> isSafePtr, std::function<bool(const clang::QualType)> isSafePtrType, + std::function<bool(const clang::Decl *)> isSafeGlobalDecl, std::function<bool(const clang::Expr *, bool)> callback) { while (E) { if (auto *DRE = dyn_cast<DeclRefExpr>(E)) { @@ -34,6 +35,8 @@ bool tryToFindPtrOrigin( auto IsImmortal = safeGetName(VD) == "NSApp"; if (VD->hasGlobalStorage() && (IsImmortal || QT.isConstQualified())) return callback(E, true); + if (VD->hasGlobalStorage() && isSafeGlobalDecl(VD)) + return callback(E, true); } } if (auto *tempExpr = dyn_cast<MaterializeTemporaryExpr>(E)) { @@ -71,9 +74,11 @@ bool tryToFindPtrOrigin( } if (auto *Expr = dyn_cast<ConditionalOperator>(E)) { return tryToFindPtrOrigin(Expr->getTrueExpr(), StopAtFirstRefCountedObj, - isSafePtr, isSafePtrType, callback) && + isSafePtr, isSafePtrType, isSafeGlobalDecl, + callback) && tryToFindPtrOrigin(Expr->getFalseExpr(), StopAtFirstRefCountedObj, - isSafePtr, isSafePtrType, callback); + isSafePtr, isSafePtrType, isSafeGlobalDecl, + callback); } if (auto *cast = dyn_cast<CastExpr>(E)) { if (StopAtFirstRefCountedObj) { diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h index 3a009d6..9fff456 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h @@ -56,6 +56,7 @@ bool tryToFindPtrOrigin( const clang::Expr *E, bool StopAtFirstRefCountedObj, std::function<bool(const clang::CXXRecordDecl *)> isSafePtr, std::function<bool(const clang::QualType)> isSafePtrType, + std::function<bool(const clang::Decl *)> isSafeGlobalDecl, std::function<bool(const clang::Expr *, bool)> callback); /// For \p E referring to a ref-countable/-counted pointer/reference we return diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp index 9585ceb..791e709 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp @@ -29,12 +29,12 @@ namespace { class RawPtrRefCallArgsChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { BugType Bug; - mutable BugReporter *BR; TrivialFunctionAnalysis TFA; EnsureFunctionAnalysis EFA; protected: + mutable BugReporter *BR; mutable std::optional<RetainTypeChecker> RTC; public: @@ -46,6 +46,7 @@ public: virtual bool isSafePtr(const CXXRecordDecl *Record) const = 0; virtual bool isSafePtrType(const QualType type) const = 0; virtual bool isSafeExpr(const Expr *) const { return false; } + virtual bool isSafeDecl(const Decl *) const { return false; } virtual const char *ptrKind() const = 0; void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, @@ -214,6 +215,7 @@ public: Arg, /*StopAtFirstRefCountedObj=*/true, [&](const clang::CXXRecordDecl *Record) { return isSafePtr(Record); }, [&](const clang::QualType T) { return isSafePtrType(T); }, + [&](const clang::Decl *D) { return isSafeDecl(D); }, [&](const clang::Expr *ArgOrigin, bool IsSafe) { if (IsSafe) return true; @@ -479,6 +481,11 @@ public: isa<ObjCMessageExpr>(E); } + bool isSafeDecl(const Decl *D) const final { + // Treat NS/CF globals in system header as immortal. + return BR->getSourceManager().isInSystemHeader(D->getLocation()); + } + const char *ptrKind() const final { return "unretained"; } }; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp index dd9701f..c13df479 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp @@ -166,10 +166,10 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded, class RawPtrRefLocalVarsChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { BugType Bug; - mutable BugReporter *BR; EnsureFunctionAnalysis EFA; protected: + mutable BugReporter *BR; mutable std::optional<RetainTypeChecker> RTC; public: @@ -180,6 +180,7 @@ public: virtual bool isSafePtr(const CXXRecordDecl *) const = 0; virtual bool isSafePtrType(const QualType) const = 0; virtual bool isSafeExpr(const Expr *) const { return false; } + virtual bool isSafeDecl(const Decl *) const { return false; } virtual const char *ptrKind() const = 0; void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, @@ -288,6 +289,7 @@ public: return isSafePtr(Record); }, [&](const clang::QualType Type) { return isSafePtrType(Type); }, + [&](const clang::Decl *D) { return isSafeDecl(D); }, [&](const clang::Expr *InitArgOrigin, bool IsSafe) { if (!InitArgOrigin || IsSafe) return true; @@ -443,6 +445,10 @@ public: return ento::cocoa::isCocoaObjectRef(E->getType()) && isa<ObjCMessageExpr>(E); } + bool isSafeDecl(const Decl *D) const final { + // Treat NS/CF globals in system header as immortal. + return BR->getSourceManager().isInSystemHeader(D->getLocation()); + } const char *ptrKind() const final { return "unretained"; } }; diff --git a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h index 1e44de8..d55b3ab 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h @@ -34,6 +34,8 @@ void os_log_msg(os_log_t oslog, os_log_type_t type, const char *msg, ...); typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStringRef; +extern CFStringRef const kCFURLTagNamesKey; + #ifdef __OBJC__ @class NSString; @interface SystemObject { @@ -41,4 +43,8 @@ typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStrin CFStringRef cf_string; } @end + +typedef NSString *NSNotificationName; +extern "C" NSNotificationName NSApplicationDidBecomeActiveNotification; + #endif diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm index 72ba05e..f49e7bd 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm @@ -1,8 +1,11 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedLocalVarsChecker -verify %s #import "objc-mock-types.h" +#import "mock-system-header.h" void someFunction(); +extern "C" CFStringRef LocalGlobalCFString; +extern "C" NSString *LocalGlobalNSString; namespace raw_ptr { void foo() { @@ -547,6 +550,29 @@ void foo() { } // autoreleased +namespace ns_global { + +void consumeCFString(CFStringRef); +void consumeNSString(NSString *); + +void cf() { + auto *str = kCFURLTagNamesKey; + consumeCFString(str); + auto *localStr = LocalGlobalCFString; + // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}} + consumeCFString(localStr); +} + +void ns() { + auto *str = NSApplicationDidBecomeActiveNotification; + consumeNSString(str); + auto *localStr = LocalGlobalNSString; + // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}} + consumeNSString(localStr); +} + +} + bool doMoreWorkOpaque(OtherObj*); SomeObj* provide(); diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm new file mode 100644 index 0000000..5c78b21 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm @@ -0,0 +1,18 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedCallArgsChecker -verify %s + +#import "mock-types.h" +#import "mock-system-header.h" + +void consumeCFString(CFStringRef); +extern "C" CFStringRef LocalGlobalCFString; +void consumeNSString(NSString *); +extern "C" NSString *LocalGlobalNSString; + +void foo() { + consumeCFString(kCFURLTagNamesKey); + consumeCFString(LocalGlobalCFString); + // expected-warning@-1{{Call argument is unretained and unsafe}} + consumeNSString(NSApplicationDidBecomeActiveNotification); + consumeNSString(LocalGlobalNSString); + // expected-warning@-1{{Call argument is unretained and unsafe}} +} diff --git a/clang/test/C/C23/n3037.c b/clang/test/C/C23/n3037.c index 3748375..113ecf7 100644 --- a/clang/test/C/C23/n3037.c +++ b/clang/test/C/C23/n3037.c @@ -30,11 +30,24 @@ void func2(PRODUCT(int, SUM(float, double)) y) { // c17-warning {{declaration of struct foop { struct { int x; }; }; // c17-note {{previous definition is here}} struct foop { struct { int x; }; }; // c17-error {{redefinition of 'foop'}} +// Test the field lookup compatibility isn't sufficient, the structure of types should be compatible. +struct AnonymousStructNotMatchingFields { // c17-note {{previous definition is here}} + struct { // c23-note {{field has name '' here}} + int x; + }; +}; +struct AnonymousStructNotMatchingFields { // c23-error {{type 'struct AnonymousStructNotMatchingFields' has incompatible definitions}} \ + c17-error {{redefinition of 'AnonymousStructNotMatchingFields'}} + int x; // c23-note {{field has name 'x' here}} +}; + union barp { int x; float y; }; // c17-note {{previous definition is here}} union barp { int x; float y; }; // c17-error {{redefinition of 'barp'}} typedef struct q { int x; } q_t; // c17-note 2 {{previous definition is here}} typedef struct q { int x; } q_t; // c17-error {{redefinition of 'q'}} \ c17-error-re {{typedef redefinition with different types ('struct (unnamed struct at {{.*}})' vs 'struct q')}} +typedef struct { int x; } untagged_q_t; // both-note {{previous definition is here}} +typedef struct { int x; } untagged_q_t; // both-error {{typedef redefinition with different types}} void func3(void) { struct S { int x; }; // c17-note {{previous definition is here}} struct T { struct S s; }; // c17-note {{previous definition is here}} @@ -389,13 +402,40 @@ void nontag_both_in_params(struct { int i; } Arg1, struct { int i; } Arg2) { _Static_assert(0 == _Generic(__typeof__(Arg1), __typeof__(Arg2) : 1, default : 0)); // both-warning {{passing a type argument as the first operand to '_Generic' is a C2y extension}} } -struct InnerAnonStruct { +struct InnerUnnamedStruct { struct { int i; } untagged; -} inner_anon_tagged; +} inner_unnamed_tagged; +_Static_assert(0 == _Generic(inner_unnamed_tagged.untagged, struct { int i; } : 1, default : 0)); -_Static_assert(0 == _Generic(inner_anon_tagged.untagged, struct { int i; } : 1, default : 0)); +struct InnerUnnamedStruct_same { + struct { + int i; + } untagged; +}; +struct InnerUnnamedStruct_differentNaming { + struct { + int i; + } untaggedDifferent; +}; +struct InnerUnnamedStruct_differentShape { + float x; + struct { + int i; + } untagged; + int y; +}; +void compare_unnamed_struct_from_different_outer_type( + struct InnerUnnamedStruct sameOuterType, + struct InnerUnnamedStruct_same matchingType, + struct InnerUnnamedStruct_differentNaming differentFieldName, + struct InnerUnnamedStruct_differentShape differentType) { + inner_unnamed_tagged.untagged = sameOuterType.untagged; + inner_unnamed_tagged.untagged = matchingType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}} + inner_unnamed_tagged.untagged = differentFieldName.untaggedDifferent; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}} + inner_unnamed_tagged.untagged = differentType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}} +} // Test the same thing with enumerations (test for unions is omitted because // unions and structures are both RecordDecl objects, whereas EnumDecl is not). diff --git a/clang/test/CIR/CodeGen/goto.cpp b/clang/test/CIR/CodeGen/goto.cpp index 48cb44e..257c255 100644 --- a/clang/test/CIR/CodeGen/goto.cpp +++ b/clang/test/CIR/CodeGen/goto.cpp @@ -205,6 +205,8 @@ extern "C" void case_follow_label(int v) { // CIR: cir.func dso_local @case_follow_label // CIR: cir.switch // CIR: cir.case(equal, [#cir.int<1> : !s32i]) { +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "label" // CIR: cir.case(equal, [#cir.int<2> : !s32i]) { // CIR: cir.call @action1() @@ -215,9 +217,11 @@ extern "C" void case_follow_label(int v) { // LLVM: define dso_local void @case_follow_label // LLVM: switch i32 {{.*}}, label %[[SWDEFAULT:.*]] [ -// LLVM: i32 1, label %[[LABEL:.*]] +// LLVM: i32 1, label %[[CASE1:.*]] // LLVM: i32 2, label %[[CASE2:.*]] // LLVM: ] +// LLVM: [[CASE1]]: +// LLVM: br label %[[LABEL:.*]] // LLVM: [[LABEL]]: // LLVM: br label %[[CASE2]] // LLVM: [[CASE2]]: @@ -303,3 +307,24 @@ extern "C" void default_follow_label(int v) { // OGCG: br label %label // OGCG: sw.epilog: // OGCG: ret void + +void g3() { +label: + goto label; +} + +// CIR: cir.func dso_local @_Z2g3v +// CIR: cir.br ^bb1 +// CIR: ^bb1: +// CIR: cir.label "label" +// CIR: cir.goto "label" + +// LLVM: define dso_local void @_Z2g3v() +// LLVM: br label %1 +// LLVM: 1: +// LLVM: br label %1 + +// OGCG: define dso_local void @_Z2g3v() +// OGCG: br label %label +// OGCG: label: +// OGCG: br label %label diff --git a/clang/test/CIR/CodeGen/label.c b/clang/test/CIR/CodeGen/label.c index a050094..f5345ef 100644 --- a/clang/test/CIR/CodeGen/label.c +++ b/clang/test/CIR/CodeGen/label.c @@ -11,10 +11,14 @@ labelA: } // CIR: cir.func no_proto dso_local @label +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "labelA" // CIR: cir.return // LLVM:define dso_local void @label +// LLVM: br label %1 +// LLVM: 1: // LLVM: ret void // OGCG: define dso_local void @label @@ -29,15 +33,19 @@ labelC: } // CIR: cir.func no_proto dso_local @multiple_labels -// CIR: cir.label "labelB" // CIR: cir.br ^bb1 -// CIR: ^bb1: // pred: ^bb0 +// CIR: ^bb1: +// CIR: cir.label "labelB" +// CIR: cir.br ^bb2 +// CIR: ^bb2: // CIR: cir.label "labelC" // CIR: cir.return // LLVM: define dso_local void @multiple_labels() // LLVM: br label %1 // LLVM: 1: +// LLVM: br label %2 +// LLVM: 2: // LLVM: ret void // OGCG: define dso_local void @multiple_labels @@ -56,6 +64,8 @@ labelD: // CIR: cir.func dso_local @label_in_if // CIR: cir.if {{.*}} { +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "labelD" // CIR: [[LOAD:%.*]] = cir.load align(4) [[COND:%.*]] : !cir.ptr<!s32i>, !s32i // CIR: [[INC:%.*]] = cir.unary(inc, %3) nsw : !s32i, !s32i @@ -68,15 +78,17 @@ labelD: // LLVM: 3: // LLVM: [[LOAD:%.*]] = load i32, ptr [[COND:%.*]], align 4 // LLVM: [[CMP:%.*]] = icmp ne i32 [[LOAD]], 0 -// LLVM: br i1 [[CMP]], label %6, label %9 +// LLVM: br i1 [[CMP]], label %6, label %10 // LLVM: 6: +// LLVM: br label %7 +// LLVM: 7: // LLVM: [[LOAD2:%.*]] = load i32, ptr [[COND]], align 4 // LLVM: [[ADD1:%.*]] = add nsw i32 [[LOAD2]], 1 // LLVM: store i32 [[ADD1]], ptr [[COND]], align 4 -// LLVM: br label %9 -// LLVM: 9: // LLVM: br label %10 // LLVM: 10: +// LLVM: br label %11 +// LLVM: 11: // LLVM: ret void // OGCG: define dso_local void @label_in_if @@ -142,11 +154,15 @@ end: return; } // CIR: cir.func no_proto dso_local @labelWithoutMatch +// CIR: cir.br ^bb1 +// CIR: ^bb1: // CIR: cir.label "end" // CIR: cir.return // CIR: } // LLVM: define dso_local void @labelWithoutMatch +// LLVM: br label %1 +// LLVM: 1: // LLVM: ret void // OGCG: define dso_local void @labelWithoutMatch @@ -167,13 +183,17 @@ void foo() { // CIR: cir.func no_proto dso_local @foo // CIR: cir.scope { -// CIR: cir.label "label" // CIR: %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["agg.tmp0"] +// CIR: cir.br ^bb1 +// CIR: ^bb1: +// CIR: cir.label "label" // LLVM:define dso_local void @foo() { // LLVM: [[ALLOC:%.*]] = alloca %struct.S, i64 1, align 1 // LLVM: br label %2 // LLVM:2: +// LLVM: br label %3 +// LLVM:3: // LLVM: [[CALL:%.*]] = call %struct.S @get() // LLVM: store %struct.S [[CALL]], ptr [[ALLOC]], align 1 // LLVM: [[LOAD:%.*]] = load %struct.S, ptr [[ALLOC]], align 1 diff --git a/clang/test/CodeGen/attr-target-mv.c b/clang/test/CodeGen/attr-target-mv.c index 07f47d9..607e3e4 100644 --- a/clang/test/CodeGen/attr-target-mv.c +++ b/clang/test/CodeGen/attr-target-mv.c @@ -30,6 +30,7 @@ int __attribute__((target("arch=gracemont"))) foo(void) {return 24;} int __attribute__((target("arch=pantherlake"))) foo(void) {return 25;} int __attribute__((target("arch=clearwaterforest"))) foo(void) {return 26;} int __attribute__((target("arch=diamondrapids"))) foo(void) {return 27;} +int __attribute__((target("arch=wildcatlake"))) foo(void) {return 28;} int __attribute__((target("default"))) foo(void) { return 2; } int bar(void) { @@ -203,6 +204,8 @@ void calls_pr50025c(void) { pr50025c(); } // ITANIUM: ret i32 26 // ITANIUM: define{{.*}} i32 @foo.arch_diamondrapids() // ITANIUM: ret i32 27 +// ITANIUM: define{{.*}} i32 @foo.arch_wildcatlake() +// ITANIUM: ret i32 28 // ITANIUM: define{{.*}} i32 @foo() // ITANIUM: ret i32 2 // ITANIUM: define{{.*}} i32 @bar() @@ -262,6 +265,8 @@ void calls_pr50025c(void) { pr50025c(); } // WINDOWS: ret i32 26 // WINDOWS: define dso_local i32 @foo.arch_diamondrapids() // WINDOWS: ret i32 27 +// WINDOWS: define dso_local i32 @foo.arch_wildcatlake() +// WINDOWS: ret i32 28 // WINDOWS: define dso_local i32 @foo() // WINDOWS: ret i32 2 // WINDOWS: define dso_local i32 @bar() diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index 120f1a5..2c0d83c 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -178,6 +178,7 @@ void verifycpustrings(void) { (void)__builtin_cpu_is("lunarlake"); (void)__builtin_cpu_is("clearwaterforest"); (void)__builtin_cpu_is("pantherlake"); + (void)__builtin_cpu_is("wildcatlake"); (void)__builtin_cpu_is("haswell"); (void)__builtin_cpu_is("icelake-client"); (void)__builtin_cpu_is("icelake-server"); diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl index 00b46ed..43ddd2e 100644 --- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl +++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl @@ -1,112 +1,64 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL -// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPV +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL +// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV // NOTE: SPIRV codegen for resource methods is not yet implemented StructuredBuffer<float> SB1 : register(t0); RWStructuredBuffer<float> RWSB1 : register(u0); -RWStructuredBuffer<uint4> RWSB2 : register(u1); +RWStructuredBuffer<float> RWSB2 : register(u1); AppendStructuredBuffer<float> ASB : register(u2); -ConsumeStructuredBuffer<double> CSB : register(u3); +ConsumeStructuredBuffer<float> CSB : register(u3); -// DXIL: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", float, 0, 0) } -// DXIL: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } -// DXIL: %"class.hlsl::RWStructuredBuffer.0" = type { target("dx.RawBuffer", <4 x i32>, 1, 0), target("dx.RawBuffer", <4 x i32>, 1, 0) } -// DXIL: %"class.hlsl::AppendStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } -// DXIL: %"class.hlsl::ConsumeStructuredBuffer" = type { target("dx.RawBuffer", double, 1, 0), target("dx.RawBuffer", double, 1, 0) } +// CHECK: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", float, 0, 0) } +// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } +// CHECK: %"class.hlsl::AppendStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } +// CHECK: %"class.hlsl::ConsumeStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } export int TestIncrementCounter() { return RWSB1.IncrementCounter(); } -// CHECK: define noundef i32 @TestIncrementCounter()() -// CHECK: call noundef i32 @hlsl::RWStructuredBuffer<float>::IncrementCounter()(ptr {{.*}} @RWSB1) -// CHECK: ret - -// CHECK: define {{.*}} noundef i32 @hlsl::RWStructuredBuffer<float>::IncrementCounter()(ptr {{.*}} %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWStructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 1, 0), ptr %__handle -// DXIL-NEXT: %[[COUNTER:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %[[HANDLE]], i8 1) -// CHECK-NEXT: ret i32 %[[COUNTER]] - +// CHECK: define noundef i32 @_Z20TestIncrementCounterv() +// CHECK-DXIL: %[[INDEX:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 1) +// CHECK-DXIL: ret i32 %[[INDEX]] export int TestDecrementCounter() { return RWSB2.DecrementCounter(); } -// CHECK: define {{.*}} i32 @TestDecrementCounter()() -// CHECK: call noundef i32 @hlsl::RWStructuredBuffer<unsigned int vector[4]>::DecrementCounter()(ptr {{.*}} @RWSB2) -// CHECK: ret -// CHECK: define {{.*}} noundef i32 @hlsl::RWStructuredBuffer<unsigned int vector[4]>::DecrementCounter()(ptr {{.*}} %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWStructuredBuffer.0", ptr %{{.*}}, i32 0, i32 0 -// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", <4 x i32>, 1, 0), ptr %__handle -// DXIL-NEXT: %[[COUNTER:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_v4i32_1_0t(target("dx.RawBuffer", <4 x i32>, 1, 0) %[[HANDLE]], i8 -1) -// CHECK-NEXT: ret i32 %[[COUNTER]] +// CHECK: define noundef i32 @_Z20TestDecrementCounterv() +// CHECK-DXIL: %[[INDEX:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 -1) +// CHECK-DXIL: ret i32 %[[INDEX]] export void TestAppend(float value) { ASB.Append(value); } -// CHECK: define void @TestAppend(float)(float {{.*}} %value) -// CHECK: call void @hlsl::AppendStructuredBuffer<float>::Append(float)(ptr {{.*}} @ASB, float noundef nofpclass(nan inf) %0) -// CHECK: ret void - -// CHECK: define {{.*}} void @hlsl::AppendStructuredBuffer<float>::Append(float)(ptr {{.*}} %this, float noundef nofpclass(nan inf) %value) -// CHECK: %[[VALUE:.*]] = load float, ptr %value.addr -// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::AppendStructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 1, 0), ptr %__handle -// CHECK-NEXT: %__handle2 = getelementptr inbounds nuw %"class.hlsl::AppendStructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// DXIL-NEXT: %[[HANDLE2:.*]] = load target("dx.RawBuffer", float, 1, 0), ptr %__handle2 -// DXIL-NEXT: %[[COUNTER:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %[[HANDLE2]], i8 1) -// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %[[HANDLE]], i32 %[[COUNTER]]) -// CHECK-NEXT: store float %[[VALUE]], ptr %[[PTR]] -// CHECK-NEXT: ret void +// CHECK: define void @_Z10TestAppendf(float noundef nofpclass(nan inf) %value) +// CHECK-DXIL: %[[VALUE:.*]] = load float, ptr %value.addr, align 4 +// CHECK-DXIL: %[[INDEX:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 1) +// CHECK-DXIL: %[[RESPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i32 %[[INDEX]]) +// CHECK-DXIL: store float %[[VALUE]], ptr %[[RESPTR]], align 4 -export double TestConsume() { +export float TestConsume() { return CSB.Consume(); } -// CHECK: define {{.*}} double @TestConsume()() -// CHECK: call {{.*}} double @hlsl::ConsumeStructuredBuffer<double>::Consume()(ptr {{.*}} @CSB) -// CHECK: ret double - -// CHECK: define {{.*}} double @hlsl::ConsumeStructuredBuffer<double>::Consume()(ptr {{.*}} %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::ConsumeStructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", double, 1, 0), ptr %__handle -// CHECK-NEXT: %__handle2 = getelementptr inbounds nuw %"class.hlsl::ConsumeStructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// DXIL-NEXT: %[[HANDLE2:.*]] = load target("dx.RawBuffer", double, 1, 0), ptr %__handle2 -// DXIL-NEXT: %[[COUNTER:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f64_1_0t(target("dx.RawBuffer", double, 1, 0) %[[HANDLE2]], i8 -1) -// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f64_1_0t(target("dx.RawBuffer", double, 1, 0) %[[HANDLE]], i32 %[[COUNTER]]) -// CHECK-NEXT: %[[VAL:.*]] = load double, ptr %[[PTR]], align 8 -// CHECK-NEXT: ret double %[[VAL]] + +// CHECK: define noundef nofpclass(nan inf) float @_Z11TestConsumev() +// CHECK-DXIL: %[[INDEX:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %1, i8 -1) +// CHECK-DXIL: %[[RESPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %0, i32 %[[INDEX]]) +// CHECK-DXIL: %[[VALUE:.*]] = load float, ptr %[[RESPTR]], align 4 +// CHECK-DXIL: ret float %[[VALUE]] export float TestLoad() { return RWSB1.Load(1) + SB1.Load(2); } -// CHECK: define noundef nofpclass(nan inf) float @TestLoad()() -// CHECK: call {{.*}} float @hlsl::RWStructuredBuffer<float>::Load(unsigned int)(ptr {{.*}} @RWSB1, i32 noundef 1) -// CHECK: call {{.*}} float @hlsl::StructuredBuffer<float>::Load(unsigned int)(ptr {{.*}} @SB1, i32 noundef 2) -// CHECK: add -// CHECK: ret float - -// CHECK: define {{.*}} float @hlsl::RWStructuredBuffer<float>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWStructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 1, 0), ptr %__handle -// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr -// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %[[HANDLE]], i32 %[[INDEX]]) -// CHECK-NEXT: %[[VAL:.*]] = load float, ptr %[[PTR]] -// CHECK-NEXT: ret float %[[VAL]] - -// CHECK: define {{.*}} float @hlsl::StructuredBuffer<float>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::StructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 0, 0), ptr %__handle -// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr -// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_0_0t(target("dx.RawBuffer", float, 0, 0) %[[HANDLE]], i32 %[[INDEX]]) -// CHECK-NEXT: %[[VAL:.*]] = load float, ptr %[[PTR]] -// CHECK-NEXT: ret float %[[VAL]] +// CHECK: define noundef nofpclass(nan inf) float @_Z8TestLoadv() +// CHECK: %[[PTR1:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i32 %{{[0-9]+}}) +// CHECK: %[[VALUE1:.*]] = load float, ptr %[[PTR1]] +// CHECK: %[[PTR2:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_0_0t(target("dx.RawBuffer", float, 0, 0) %{{[0-9]+}}, i32 %{{[0-9]+}}) +// CHECK: %[[VALUE2:.*]] = load float, ptr %[[PTR2]] -// DXIL: declare i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0), i8) -// DXIL: declare i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_v4i32_1_0t(target("dx.RawBuffer", <4 x i32>, 1, 0), i8) -// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0), i32) -// DXIL: declare i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f64_1_0t(target("dx.RawBuffer", double, 1, 0), i8) -// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f64_1_0t(target("dx.RawBuffer", double, 1, 0), i32) -// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_0_0t(target("dx.RawBuffer", float, 0, 0), i32) +// CHECK: declare i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0), i8) +// CHECK: declare ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0), i32) +// CHECK: declare ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_0_0t(target("dx.RawBuffer", float, 0, 0), i32) diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl index af312f9..9e08a6d 100644 --- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl +++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl @@ -1,71 +1,37 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-pixel -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL -// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-pixel -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPV +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-pixel -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL +// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-pixel -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV // NOTE: SPIRV codegen for resource methods is not yet implemented -RasterizerOrderedStructuredBuffer<float> ROSB1; -RasterizerOrderedStructuredBuffer<int2> ROSB2; +RWStructuredBuffer<float> RWSB1, RWSB2; +RasterizerOrderedStructuredBuffer<float> ROSB1, ROSB2; -// %"class.hlsl::RasterizerOrderedStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 1), target("dx.RawBuffer", float, 1, 1) } -// %"class.hlsl::RasterizerOrderedStructuredBuffer.0" = type { target("dx.RawBuffer", <2 x i32>, 1, 1), target("dx.RawBuffer", <2 x i32>, 1, 1) } - -// CHECK: @ROSB1 = internal global %"class.hlsl::RasterizerOrderedStructuredBuffer" poison -// CHECK: @ROSB2 = internal global %"class.hlsl::RasterizerOrderedStructuredBuffer.0" poison +// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } export void TestIncrementCounter() { +// CHECK: define void @_Z20TestIncrementCounterv() +// CHECK-DXIL: call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 1) +// CHECK-DXIL: call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %{{[0-9]+}}, i8 1) + RWSB1.IncrementCounter(); ROSB1.IncrementCounter(); } -// CHECK: define void @TestIncrementCounter()() -// CHECK: call noundef i32 @hlsl::RasterizerOrderedStructuredBuffer<float>::IncrementCounter()(ptr {{.*}} @ROSB1) -// CHECK-NEXT: ret void - -// CHECK: define {{.*}} i32 @hlsl::RasterizerOrderedStructuredBuffer<float>::IncrementCounter()(ptr {{.*}} %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedStructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// CHECK-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 1, 1), ptr %__handle -// DXIL-NEXT: %[[VAL:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %[[HANDLE]], i8 1) -// CHECK-NEXT: ret i32 %[[VAL]] - export void TestDecrementCounter() { +// CHECK: define void @_Z20TestDecrementCounterv() +// CHECK-DXIL: call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 -1) +// CHECK-DXIL: call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %{{[0-9]+}}, i8 -1) + RWSB2.DecrementCounter(); ROSB2.DecrementCounter(); } -// CHECK: define void @TestDecrementCounter()() -// CHECK: call noundef i32 @hlsl::RasterizerOrderedStructuredBuffer<int vector[2]>::DecrementCounter()(ptr {{.*}} @ROSB2) -// CHECK-NEXT: ret void - -// CHECK: define {{.*}} i32 @hlsl::RasterizerOrderedStructuredBuffer<int vector[2]>::DecrementCounter()(ptr {{.*}} %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedStructuredBuffer.0", ptr %{{.*}}, i32 0, i32 0 -// CHECK-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", <2 x i32>, 1, 1), ptr %__handle -// DXIL-NEXT: %[[VAL:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_v2i32_1_1t(target("dx.RawBuffer", <2 x i32>, 1, 1) %[[HANDLE]], i8 -1) -// CHECK-NEXT: ret i32 %[[VAL]] - export float TestLoad() { - return ROSB1.Load(10).x + ROSB2.Load(20).x; + return ROSB1.Load(10); } -// CHECK: define {{.*}} float @TestLoad()() -// CHECK: call {{.*}} float @hlsl::RasterizerOrderedStructuredBuffer<float>::Load(unsigned int)(ptr {{.*}} @ROSB1, i32 noundef 10) -// CHECK: call {{.*}} <2 x i32> @hlsl::RasterizerOrderedStructuredBuffer<int vector[2]>::Load(unsigned int)(ptr {{.*}} @ROSB2, i32 noundef 20) -// CHECK: ret - -// CHECK: define {{.*}} float @hlsl::RasterizerOrderedStructuredBuffer<float>::Load(unsigned int)(ptr {{.*}} %Index) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedStructuredBuffer", ptr {{.*}}, i32 0, i32 0 -// CHECK-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 1, 1), ptr %__handle -// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr -// DXIL-NEXT: %[[BUFPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %[[HANDLE]], i32 %[[INDEX]]) -// CHECK-NEXT: %[[VAL:.*]] = load float, ptr %[[BUFPTR]] -// CHECK-NEXT: ret float %[[VAL]] - -// CHECK: define {{.*}} <2 x i32> @hlsl::RasterizerOrderedStructuredBuffer<int vector[2]>::Load(unsigned int)(ptr {{.*}} %Index) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedStructuredBuffer.0", ptr {{.*}}, i32 0, i32 0 -// CHECK-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", <2 x i32>, 1, 1), ptr %__handle -// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr -// DXIL-NEXT: %[[BUFPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_v2i32_1_1t(target("dx.RawBuffer", <2 x i32>, 1, 1) %[[HANDLE]], i32 %[[INDEX]]) -// CHECK-NEXT: %[[VAL:.*]] = load <2 x i32>, ptr %[[BUFPTR]] -// CHECK-NEXT: ret <2 x i32> %[[VAL]] +// CHECK: define noundef nofpclass(nan inf) float @_Z8TestLoadv() +// CHECK: %[[PTR1:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %{{[0-9]+}}, i32 %{{[0-9]+}}) +// CHECK: %[[VALUE1:.*]] = load float, ptr %[[PTR1]] -// DXIL: declare i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1), i8) -// DXIL: declare i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_v2i32_1_1t(target("dx.RawBuffer", <2 x i32>, 1, 1), i8) -// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1), i32) -// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_v2i32_1_1t(target("dx.RawBuffer", <2 x i32>, 1, 1), i32) +// CHECK: declare i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0), i8) +// CHECK: declare i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1), i8) +// CHECK: declare ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1), i32) diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c index 341f01c..24404ff 100644 --- a/clang/test/Driver/x86-march.c +++ b/clang/test/Driver/x86-march.c @@ -116,6 +116,10 @@ // RUN: | FileCheck %s -check-prefix=pantherlake // pantherlake: "-target-cpu" "pantherlake" // +// RUN: %clang --target=x86_64 -c -### %s -march=wildcatlake 2>&1 \ +// RUN: | FileCheck %s -check-prefix=wildcatlake +// wildcatlake: "-target-cpu" "wildcatlake" +// // RUN: %clang --target=x86_64 -c -### %s -march=clearwaterforest 2>&1 \ // RUN: | FileCheck %s -check-prefix=clearwaterforest // clearwaterforest: "-target-cpu" "clearwaterforest" diff --git a/clang/test/Misc/target-invalid-cpu-note/x86.c b/clang/test/Misc/target-invalid-cpu-note/x86.c index f89cdc2..3906318 100644 --- a/clang/test/Misc/target-invalid-cpu-note/x86.c +++ b/clang/test/Misc/target-invalid-cpu-note/x86.c @@ -63,6 +63,7 @@ // X86-SAME: {{^}}, lunarlake // X86-SAME: {{^}}, gracemont // X86-SAME: {{^}}, pantherlake +// X86-SAME: {{^}}, wildcatlake // X86-SAME: {{^}}, sierraforest // X86-SAME: {{^}}, grandridge // X86-SAME: {{^}}, graniterapids @@ -150,6 +151,7 @@ // X86_64-SAME: {{^}}, lunarlake // X86_64-SAME: {{^}}, gracemont // X86_64-SAME: {{^}}, pantherlake +// X86_64-SAME: {{^}}, wildcatlake // X86_64-SAME: {{^}}, sierraforest // X86_64-SAME: {{^}}, grandridge // X86_64-SAME: {{^}}, graniterapids @@ -246,6 +248,7 @@ // TUNE_X86-SAME: {{^}}, lunarlake // TUNE_X86-SAME: {{^}}, gracemont // TUNE_X86-SAME: {{^}}, pantherlake +// TUNE_X86-SAME: {{^}}, wildcatlake // TUNE_X86-SAME: {{^}}, sierraforest // TUNE_X86-SAME: {{^}}, grandridge // TUNE_X86-SAME: {{^}}, graniterapids @@ -349,6 +352,7 @@ // TUNE_X86_64-SAME: {{^}}, lunarlake // TUNE_X86_64-SAME: {{^}}, gracemont // TUNE_X86_64-SAME: {{^}}, pantherlake +// TUNE_X86_64-SAME: {{^}}, wildcatlake // TUNE_X86_64-SAME: {{^}}, sierraforest // TUNE_X86_64-SAME: {{^}}, grandridge // TUNE_X86_64-SAME: {{^}}, graniterapids diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 44089c4..e2f4bcb 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -2526,6 +2526,9 @@ // RUN: %clang -march=pantherlake -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NKL_M32 +// RUN: %clang -march=wildcatlake -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NKL_M32 // RUN: %clang -march=clearwaterforest -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_CWF_M32,CHECK_NKL_M32 @@ -2630,6 +2633,9 @@ // RUN: %clang -march=pantherlake -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NKL_M64 +// RUN: %clang -march=wildcatlake -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NKL_M64 // RUN: %clang -march=clearwaterforest -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_CWF_M64,CHECK_NKL_M64 diff --git a/clang/test/Sema/attr-cpuspecific-cpus.c b/clang/test/Sema/attr-cpuspecific-cpus.c index 48543ac..0874d0c 100644 --- a/clang/test/Sema/attr-cpuspecific-cpus.c +++ b/clang/test/Sema/attr-cpuspecific-cpus.c @@ -87,3 +87,4 @@ ATTR(cpu_specific(lunarlake)) void CPU37(void){} ATTR(cpu_specific(gracemont)) void CPU38(void){} ATTR(cpu_specific(pantherlake)) void CPU39(void){} ATTR(cpu_specific(clearwaterforest)) void CPU40(void){} +ATTR(cpu_specific(wildcatlake)) void CPU41(void){} diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 9f8e833..5931b60 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -83,6 +83,8 @@ mark_as_advanced(COMPILER_RT_BUILD_ORC) option(COMPILER_RT_BUILD_GWP_ASAN "Build GWP-ASan, and link it into SCUDO" ON) mark_as_advanced(COMPILER_RT_BUILD_GWP_ASAN) option(COMPILER_RT_ENABLE_CET "Build Compiler RT with CET enabled" OFF) +option(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME "Build asan unit tests without depending upon a just-built asan runtime" OFF) +mark_as_advanced(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME) option(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH "Set custom sysroot for building SCUDO standalone" OFF) mark_as_advanced(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH) diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt index 9cd9c97..6d88c96 100644 --- a/compiler-rt/lib/asan/tests/CMakeLists.txt +++ b/compiler-rt/lib/asan/tests/CMakeLists.txt @@ -170,11 +170,21 @@ function(add_asan_tests arch test_runtime) set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config) set(CONFIG_NAME_DYNAMIC ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig) + # On some platforms, unit tests can be run against the runtime that shipped + # with the host compiler with COMPILER_RT_TEST_STANDALONE_BUILD_LIBS=OFF. + # COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME=ON removes the dependency + # on `asan`, allowing the tests to be run independently without + # a newly built asan runtime. + set(ASAN_UNIT_TEST_DEPS asan) + if(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME) + set(ASAN_UNIT_TEST_DEPS) + endif() + # Closure to keep the values. function(generate_asan_tests test_objects test_suite testname) generate_compiler_rt_tests(${test_objects} ${test_suite} ${testname} ${arch} COMPILE_DEPS ${ASAN_UNITTEST_HEADERS} ${ASAN_IGNORELIST_FILE} - DEPS asan + DEPS ${ASAN_UNIT_TEST_DEPS} KIND ${TEST_KIND} ${ARGN} ) @@ -215,7 +225,7 @@ function(add_asan_tests arch test_runtime) add_compiler_rt_test(AsanDynamicUnitTests "${dynamic_test_name}" "${arch}" SUBDIR "${CONFIG_NAME_DYNAMIC}" OBJECTS ${ASAN_INST_TEST_OBJECTS} - DEPS asan ${ASAN_INST_TEST_OBJECTS} + DEPS ${ASAN_UNIT_TEST_DEPS} ${ASAN_INST_TEST_OBJECTS} LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS} ${TARGET_LINK_FLAGS} ${DYNAMIC_LINK_FLAGS} ) endif() diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index a40675c..d91e13c 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -520,6 +520,13 @@ static const char *getIntelProcessorTypeAndSubtype(unsigned Family, *Subtype = INTEL_COREI7_PANTHERLAKE; break; + // Wildcatlake: + case 0xd5: + CPU = "wildcatlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_PANTHERLAKE; + break; + // Icelake Xeon: case 0x6a: case 0x6c: diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index 41383fb..9bf10b5 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -353,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) { // calculation op. mlir::Value baseRef = llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op) + .Case<fir::DeclareOp>([&](auto op) { + // If this declare binds a view with an underlying storage operand, + // treat that storage as the base reference. Otherwise, fall back + // to the declared memref. + if (auto storage = op.getStorage()) + return storage; + return mlir::Value(varPtr); + }) .Case<hlfir::DesignateOp>([&](auto op) { // Get the base object. return op.getMemref(); diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir new file mode 100644 index 0000000..fabfe4c --- /dev/null +++ b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir @@ -0,0 +1,24 @@ +// Use --mlir-disable-threading so that the diagnostic printing is serialized. +// RUN: fir-opt %s -pass-pipeline='builtin.module(test-fir-openacc-interfaces)' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s + +module { + // Build a scalar view via fir.declare with a storage operand into an array of i8 + func.func @_QPdeclare_with_storage_is_nonscalar() { + %c0 = arith.constant 0 : index + %arr = fir.alloca !fir.array<4xi8> + %elem_i8 = fir.coordinate_of %arr, %c0 : (!fir.ref<!fir.array<4xi8>>, index) -> !fir.ref<i8> + %elem_f32 = fir.convert %elem_i8 : (!fir.ref<i8>) -> !fir.ref<f32> + %view = fir.declare %elem_f32 storage(%arr[0]) {uniq_name = "_QFpi"} + : (!fir.ref<f32>, !fir.ref<!fir.array<4xi8>>) -> !fir.ref<f32> + // Force interface query through an acc op that prints type category + %cp = acc.copyin varPtr(%view : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + acc.enter_data dataOperands(%cp : !fir.ref<f32>) + return + } + + // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + // CHECK: Pointer-like and Mappable: !fir.ref<f32> + // CHECK: Type category: array +} + + diff --git a/lldb/cmake/modules/LLDBFramework.cmake b/lldb/cmake/modules/LLDBFramework.cmake index c6f00ed..23d9d49 100644 --- a/lldb/cmake/modules/LLDBFramework.cmake +++ b/lldb/cmake/modules/LLDBFramework.cmake @@ -68,8 +68,6 @@ if(NOT APPLE_EMBEDDED) ) endif() -find_program(unifdef_EXECUTABLE unifdef) - # Wrap output in a target, so lldb-framework can depend on it. add_custom_target(liblldb-resource-headers DEPENDS lldb-sbapi-dwarf-enums ${lldb_staged_headers}) set_target_properties(liblldb-resource-headers PROPERTIES FOLDER "LLDB/Resources") diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 640516a..9ca08d6 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -139,6 +139,8 @@ Changes to the Windows Target Changes to the X86 Backend -------------------------- +* `-mcpu=wildcatlake` is now supported. + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h index f06e7ce..bb7f3be 100644 --- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -32,8 +32,11 @@ bool IsAnnotationOK(const GlobalVariable &GV); /// profile information and provides methods to operate on them. class StaticDataProfileInfo { public: - /// Accummulate the profile count of a constant that will be lowered to static - /// data sections. + /// A constant is tracked only if the following conditions are met. + /// 1) It has local (i.e., private or internal) linkage. + // 2) Its data kind is one of {.rodata, .data, .bss, .data.rel.ro}. + // 3) It's eligible for section prefix annotation. See `AnnotationKind` + // above for ineligible reasons. DenseMap<const Constant *, uint64_t> ConstantProfileCounts; /// Keeps track of the constants that are seen at least once without profile @@ -44,6 +47,22 @@ public: LLVM_ABI std::optional<uint64_t> getConstantProfileCount(const Constant *C) const; + /// Use signed enums for enum value comparison, and make 'LukewarmOrUnknown' + /// as 0 so any accidentally uninitialized value will default to unknown. + enum class StaticDataHotness : int8_t { + Cold = -1, + LukewarmOrUnknown = 0, + Hot = 1, + }; + + /// Return the hotness of the constant \p C based on its profile count \p + /// Count. + LLVM_ABI StaticDataHotness getConstantHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const; + + /// Return the string representation of the hotness enum \p Hotness. + LLVM_ABI StringRef hotnessToStr(StaticDataHotness Hotness) const; + public: StaticDataProfileInfo() = default; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h index 596cc18..b0197f0 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h @@ -13,7 +13,6 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H #define LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H -#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include "llvm/Support/Compiler.h" @@ -54,9 +53,6 @@ inline size_t numDeallocActions(const AllocActions &AAs) { AAs, [](const AllocActionCallPair &P) { return !!P.Dealloc; }); } -using OnRunFinalizeActionsCompleteFn = - unique_function<void(Expected<std::vector<WrapperFunctionCall>>)>; - /// Run finalize actions. /// /// If any finalize action fails then the corresponding dealloc actions will be @@ -67,16 +63,13 @@ using OnRunFinalizeActionsCompleteFn = /// be returned. The dealloc actions should be run by calling /// runDeallocationActions. If this function succeeds then the AA argument will /// be cleared before the function returns. -LLVM_ABI void runFinalizeActions(AllocActions &AAs, - OnRunFinalizeActionsCompleteFn OnComplete); - -using OnRunDeallocActionsComeleteFn = unique_function<void(Error)>; +LLVM_ABI Expected<std::vector<WrapperFunctionCall>> +runFinalizeActions(AllocActions &AAs); /// Run deallocation actions. /// Dealloc actions will be run in reverse order (from last element of DAs to /// first). -LLVM_ABI void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs, - OnRunDeallocActionsComeleteFn OnComplete); +LLVM_ABI Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs); using SPSAllocActionCallPair = SPSTuple<SPSWrapperFunctionCall, SPSWrapperFunctionCall>; diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index e62aa6d..254587b 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -115,6 +115,7 @@ X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "meteorlake") X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_SAPPHIRERAPIDS, "emeraldrapids") X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ARROWLAKE_S,"lunarlake") X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "gracemont") +X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_PANTHERLAKE, "wildcatlake") #undef X86_CPU_SUBTYPE_ALIAS #undef X86_CPU_SUBTYPE diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h index f6aeaad..e4c43cd 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.h +++ b/llvm/include/llvm/TargetParser/X86TargetParser.h @@ -116,6 +116,7 @@ enum CPUKind { CK_ArrowlakeS, CK_Lunarlake, CK_Pantherlake, + CK_Wildcatlake, CK_Sierraforest, CK_Grandridge, CK_Graniterapids, diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index 1f751ee..e7f0b2c 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -60,6 +60,36 @@ void StaticDataProfileInfo::addConstantProfileCount( OriginalCount = getInstrMaxCountValue(); } +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getConstantHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const { + // The accummulated counter shows the constant is hot. Return enum 'hot' + // whether this variable is seen by unprofiled functions or not. + if (PSI->isHotCount(Count)) + return StaticDataHotness::Hot; + // The constant is not hot, and seen by unprofiled functions. We don't want to + // assign it to unlikely sections, even if the counter says 'cold'. So return + // enum 'LukewarmOrUnknown'. + if (ConstantWithoutCounts.count(C)) + return StaticDataHotness::LukewarmOrUnknown; + // The accummulated counter shows the constant is cold so return enum 'cold'. + if (PSI->isColdCount(Count)) + return StaticDataHotness::Cold; + + return StaticDataHotness::LukewarmOrUnknown; +} + +StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const { + switch (Hotness) { + case StaticDataHotness::Cold: + return "unlikely"; + case StaticDataHotness::Hot: + return "hot"; + default: + return ""; + } +} + std::optional<uint64_t> StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { auto I = ConstantProfileCounts.find(C); @@ -70,23 +100,10 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { StringRef StaticDataProfileInfo::getConstantSectionPrefix( const Constant *C, const ProfileSummaryInfo *PSI) const { - auto Count = getConstantProfileCount(C); + std::optional<uint64_t> Count = getConstantProfileCount(C); if (!Count) return ""; - // The accummulated counter shows the constant is hot. Return 'hot' whether - // this variable is seen by unprofiled functions or not. - if (PSI->isHotCount(*Count)) - return "hot"; - // The constant is not hot, and seen by unprofiled functions. We don't want to - // assign it to unlikely sections, even if the counter says 'cold'. So return - // an empty prefix before checking whether the counter is cold. - if (ConstantWithoutCounts.count(C)) - return ""; - // The accummulated counter shows the constant is cold. Return 'unlikely'. - if (PSI->isColdCount(*Count)) - return "unlikely"; - // The counter says lukewarm. Return an empty prefix. - return ""; + return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count)); } bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 380b192..1dd470b 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -329,10 +329,6 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { for (const auto &[Name, Info] : make_early_inc_range(ForwardRefVals)) { if (StringRef(Name).starts_with("llvm.")) { Intrinsic::ID IID = Intrinsic::lookupIntrinsicID(Name); - if (IID == Intrinsic::not_intrinsic) - // Don't do anything for unknown intrinsics. - continue; - // Automatically create declarations for intrinsics. Intrinsics can only // be called directly, so the call function type directly determines the // declaration function type. @@ -346,11 +342,26 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { return error(Info.second, "intrinsic can only be used as callee"); SmallVector<Type *> OverloadTys; - if (!Intrinsic::getIntrinsicSignature(IID, CB->getFunctionType(), - OverloadTys)) - return error(Info.second, "invalid intrinsic signature"); - - U.set(Intrinsic::getOrInsertDeclaration(M, IID, OverloadTys)); + if (IID != Intrinsic::not_intrinsic && + Intrinsic::getIntrinsicSignature(IID, CB->getFunctionType(), + OverloadTys)) { + U.set(Intrinsic::getOrInsertDeclaration(M, IID, OverloadTys)); + } else { + // Try to upgrade the intrinsic. + Function *TmpF = Function::Create(CB->getFunctionType(), + Function::ExternalLinkage, Name, M); + Function *NewF = nullptr; + if (!UpgradeIntrinsicFunction(TmpF, NewF)) { + if (IID == Intrinsic::not_intrinsic) + return error(Info.second, "unknown intrinsic '" + Name + "'"); + return error(Info.second, "invalid intrinsic signature"); + } + + U.set(TmpF); + UpgradeIntrinsicCall(CB, NewF); + if (TmpF->use_empty()) + TmpF->eraseFromParent(); + } } Info.first->eraseFromParent(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 787a81a..358e060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -658,13 +658,13 @@ namespace { bool InexpensiveOnly = false, std::optional<EVT> OutVT = std::nullopt); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); + SDValue buildRsqrtEstimate(SDValue Op); + SDValue buildSqrtEstimate(SDValue Op); + SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip); SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -18590,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0))) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -18635,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A); SDValue AAZ = DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0)); - if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ)) return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt); // Estimate creation failed. Clean up speculatively created nodes. @@ -18645,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // We found a FSQRT, so try to make this fold: // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) - if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { + if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) { SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y); AddToWorklist(Div.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, Div); @@ -18742,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { return SDValue(); // FSQRT nodes have flags that propagate to the created nodes. + SelectionDAG::FlagInserter FlagInserter(DAG, Flags); // TODO: If this is N0/sqrt(N0), and we reach this node before trying to // transform the fdiv, we may produce a sub-optimal estimate sequence // because the reciprocal calculation may not have to filter out a // 0.0 input. - return buildSqrtEstimate(N0, Flags); + return buildSqrtEstimate(N0); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -29743,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); } // If non-reciprocal square root is requested, multiply the result by Arg. if (!Reciprocal) - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); return Est; } @@ -29775,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -29789,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations for reciprocal square root: // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { - SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est); + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est); + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -29799,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, SDValue LHS; if (Reciprocal || (i + 1) < Iterations) { // RSQRT: LHS = (E * -0.5) - LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); } else { // SQRT: LHS = (A * E) * -0.5 - LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf); } - Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS); } return Est; @@ -29814,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, - bool Reciprocal) { +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) { if (LegalDAG) return SDValue(); @@ -29843,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (Iterations > 0) Est = UseOneConstNR - ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) - : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal); if (!Reciprocal) { SDLoc DL(Op); // Try the target specific test first. @@ -29862,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, true); +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, false); +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, false); } /// Return true if there is any possibility that the two addresses overlap. diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 5b3c05e..6c7e27e 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -260,22 +260,17 @@ public: } // Run finalization actions. - using WrapperFunctionCall = orc::shared::WrapperFunctionCall; - runFinalizeActions( - G->allocActions(), - [this, OnFinalized = std::move(OnFinalized)]( - Expected<std::vector<WrapperFunctionCall>> DeallocActions) mutable { - completeFinalization(std::move(OnFinalized), - std::move(DeallocActions)); - }); - } + auto DeallocActions = runFinalizeActions(G->allocActions()); + if (!DeallocActions) { + OnFinalized(DeallocActions.takeError()); + return; + } - void abandon(OnAbandonedFunction OnAbandoned) override { - Error Err = Error::success(); - if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); - if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); + // Release the finalize segments slab. + if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) { + OnFinalized(errorCodeToError(EC)); + return; + } #ifndef NDEBUG // Set 'G' to null to flag that we've been successfully finalized. @@ -284,22 +279,17 @@ public: G = nullptr; #endif - OnAbandoned(std::move(Err)); + // Continue with finalized allocation. + OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments), + std::move(*DeallocActions))); } -private: - void completeFinalization( - OnFinalizedFunction OnFinalized, - Expected<std::vector<orc::shared::WrapperFunctionCall>> DeallocActions) { - - if (!DeallocActions) - return OnFinalized(DeallocActions.takeError()); - - // Release the finalize segments slab. - if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) { - OnFinalized(errorCodeToError(EC)); - return; - } + void abandon(OnAbandonedFunction OnAbandoned) override { + Error Err = Error::success(); + if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) + Err = joinErrors(std::move(Err), errorCodeToError(EC)); + if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments)) + Err = joinErrors(std::move(Err), errorCodeToError(EC)); #ifndef NDEBUG // Set 'G' to null to flag that we've been successfully finalized. @@ -308,11 +298,10 @@ private: G = nullptr; #endif - // Continue with finalized allocation. - OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments), - std::move(*DeallocActions))); + OnAbandoned(std::move(Err)); } +private: Error applyProtections() { for (auto &KV : BL.segments()) { const auto &AG = KV.first; diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 7b327af..7e606c6a 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -91,19 +91,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, sys::Memory::InvalidateInstructionCache(Base.toPtr<void *>(), Size); } - std::vector<shared::WrapperFunctionCall> DeinitializeActions; - { - std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P; - auto F = P.get_future(); - shared::runFinalizeActions( - AI.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) { - P.set_value(std::move(R)); - }); - if (auto DeinitializeActionsOrErr = F.get()) - DeinitializeActions = std::move(*DeinitializeActionsOrErr); - else - return OnInitialized(DeinitializeActionsOrErr.takeError()); - } + auto DeinitializeActions = shared::runFinalizeActions(AI.Actions); + if (!DeinitializeActions) + return OnInitialized(DeinitializeActions.takeError()); { std::lock_guard<std::mutex> Lock(Mutex); @@ -111,7 +101,7 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, // This is the maximum range whose permission have been possibly modified auto &Alloc = Allocations[MinAddr]; Alloc.Size = MaxAddr - MinAddr; - Alloc.DeinitializationActions = std::move(DeinitializeActions); + Alloc.DeinitializationActions = std::move(*DeinitializeActions); Reservations[AI.MappingBase.toPtr<void *>()].Allocations.push_back(MinAddr); } @@ -128,10 +118,10 @@ void InProcessMemoryMapper::deinitialize( for (auto Base : llvm::reverse(Bases)) { - shared::runDeallocActions( - Allocations[Base].DeinitializationActions, [&](Error Err) { - AllErr = joinErrors(std::move(AllErr), std::move(Err)); - }); + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } // Reset protections to read/write so the area can be reused if (auto EC = sys::Memory::protectMappedMemory( diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp index 08ab0c6..91f2899 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp @@ -12,39 +12,31 @@ namespace llvm { namespace orc { namespace shared { -void runFinalizeActions(AllocActions &AAs, - OnRunFinalizeActionsCompleteFn OnComplete) { +Expected<std::vector<WrapperFunctionCall>> +runFinalizeActions(AllocActions &AAs) { std::vector<WrapperFunctionCall> DeallocActions; DeallocActions.reserve(numDeallocActions(AAs)); for (auto &AA : AAs) { if (AA.Finalize) - - if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) { - while (!DeallocActions.empty()) { - Err = joinErrors(std::move(Err), - DeallocActions.back().runWithSPSRetErrorMerged()); - DeallocActions.pop_back(); - } - return OnComplete(std::move(Err)); - } + if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) + return joinErrors(std::move(Err), runDeallocActions(DeallocActions)); if (AA.Dealloc) DeallocActions.push_back(std::move(AA.Dealloc)); } AAs.clear(); - OnComplete(std::move(DeallocActions)); + return DeallocActions; } -void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs, - OnRunDeallocActionsComeleteFn OnComplete) { +Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs) { Error Err = Error::success(); while (!DAs.empty()) { Err = joinErrors(std::move(Err), DAs.back().runWithSPSRetErrorMerged()); DAs = DAs.drop_back(); } - OnComplete(std::move(Err)); + return Err; } } // namespace shared diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp index 8c24b1f..4fbf232 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp @@ -9,10 +9,8 @@ #include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h" #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" -#include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/Process.h" #include "llvm/Support/WindowsError.h" -#include <future> #include <sstream> #if defined(LLVM_ON_UNIX) @@ -183,24 +181,15 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize( } // Run finalization actions and get deinitlization action list. - std::vector<shared::WrapperFunctionCall> DeinitializeActions; - { - std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P; - auto F = P.get_future(); - shared::runFinalizeActions( - FR.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) { - P.set_value(std::move(R)); - }); - if (auto DeinitializeActionsOrErr = F.get()) - DeinitializeActions = std::move(*DeinitializeActionsOrErr); - else - return DeinitializeActionsOrErr.takeError(); + auto DeinitializeActions = shared::runFinalizeActions(FR.Actions); + if (!DeinitializeActions) { + return DeinitializeActions.takeError(); } { std::lock_guard<std::mutex> Lock(Mutex); Allocations[MinAddr].DeinitializationActions = - std::move(DeinitializeActions); + std::move(*DeinitializeActions); Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr); } @@ -221,11 +210,10 @@ Error ExecutorSharedMemoryMapperService::deinitialize( std::lock_guard<std::mutex> Lock(Mutex); for (auto Base : llvm::reverse(Bases)) { - shared::runDeallocActions( - Allocations[Base].DeinitializationActions, [&](Error Err) { - if (Err) - AllErr = joinErrors(std::move(AllErr), std::move(Err)); - }); + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } // Remove the allocation from the allocation list of its reservation for (auto &Reservation : Reservations) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 944a1e2..8bf0d11 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } return SDV; } + // Recognize build vector patterns to emit VSX vector instructions + // instead of loading value from memory. + if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG)) + return VecPat; } // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; @@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N, return SDValue(); } +// LXVKQ instruction load VSX vector with a special quadword value +// based on an immediate value. This helper method returns the details of the +// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount} +// to help generate the LXVKQ instruction and the subsequent shift instruction +// required to match the original build vector pattern. + +// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount} +using LXVKQPattern = std::tuple<uint32_t, uint8_t>; + +static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) { + + // LXVKQ instruction loads the Quadword value: + // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000 + static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64; + static const uint32_t Uim = 16; + + // Check for direct LXVKQ match (no shift needed) + if (FullVal == BasePattern) + return std::make_tuple(Uim, uint8_t{0}); + + // Check if FullValue is 1 (the result of the base pattern >> 127) + if (FullVal == APInt(128, 1)) + return std::make_tuple(Uim, uint8_t{127}); + + return std::nullopt; +} + +/// Combine vector loads to a single load (using lxvkq) or splat with shift of a +/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector. +/// LXVKQ instruction load VSX vector with a special quadword value based on an +/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value +/// 0x8000_0000_0000_0000_0000_0000_0000_0000. +/// This can be used to inline the build vector constants that have the +/// following patterns: +/// +/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern) +/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern) +/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a +/// combination of splatting and right shift instructions. + +SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op, + SelectionDAG &DAG) const { + + assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) && + "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue"); + + // This transformation is only supported if we are loading either a byte, + // halfword, word, or doubleword. + EVT VT = Op.getValueType(); + if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 || + VT == MVT::v2i64)) + return SDValue(); + + LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector (" + << VT.getEVTString() << "): "; + Op->dump()); + + unsigned NumElems = VT.getVectorNumElements(); + unsigned ElemBits = VT.getScalarSizeInBits(); + + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + + // Check for Non-constant operand in the build vector. + for (const SDValue &Operand : Op.getNode()->op_values()) { + if (!isa<ConstantSDNode>(Operand)) + return SDValue(); + } + + // Assemble build vector operands as a 128-bit register value + // We need to reconstruct what the 128-bit register pattern would be + // that produces this vector when interpreted with the current endianness + APInt FullVal = APInt::getZero(128); + + for (unsigned Index = 0; Index < NumElems; ++Index) { + auto *C = cast<ConstantSDNode>(Op.getOperand(Index)); + + // Get element value as raw bits (zero-extended) + uint64_t ElemValue = C->getZExtValue(); + + // Mask to element size to ensure we only get the relevant bits + if (ElemBits < 64) + ElemValue &= ((1ULL << ElemBits) - 1); + + // Calculate bit position for this element in the 128-bit register + unsigned BitPos = + (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits); + + // Create APInt for the element value and shift it to correct position + APInt ElemAPInt(128, ElemValue); + ElemAPInt <<= BitPos; + + // Place the element value at the correct bit position + FullVal |= ElemAPInt; + } + + if (FullVal.isZero() || FullVal.isAllOnes()) + return SDValue(); + + if (auto UIMOpt = getPatternInfo(FullVal)) { + const auto &[Uim, ShiftAmount] = *UIMOpt; + SDLoc Dl(Op); + + // Generate LXVKQ instruction if the shift amount is zero. + if (ShiftAmount == 0) { + SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32); + SDValue LxvkqInstr = + SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0); + LLVM_DEBUG(llvm::dbgs() + << "combineBVLoadsSpecialValue: Instruction Emitted "; + LxvkqInstr.dump()); + return LxvkqInstr; + } + + assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value"); + + // The right shifted pattern can be constructed using a combination of + // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower + // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate + // value 255. + SDValue ShiftAmountVec = + SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32, + DAG.getTargetConstant(255, Dl, MVT::i32)), + 0); + // Generate appropriate right shift instruction + SDValue ShiftVec = SDValue( + DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec), + 0); + LLVM_DEBUG(llvm::dbgs() + << "\n combineBVLoadsSpecialValue: Instruction Emitted "; + ShiftVec.dump()); + return ShiftVec; + } + // No patterns matched for build vectors. + return SDValue(); +} + /// Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 59f3387..880aca7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1472,6 +1472,9 @@ namespace llvm { combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBVLoadsSpecialValue(SDValue Operand, + SelectionDAG &DAG) const; + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be /// handled by the VINSERTH instruction introduced in ISA 3.0. This is /// essentially any shuffle of v8i16 vectors that just inserts one element diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 2384959..2d8c633 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2404,6 +2404,190 @@ multiclass XXEvalTernarySelectOr<ValueType Vt> { 126>; } +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNor +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOR(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, AND(B,C), XOR(B,C), NOT(C), +// NOT(B), NAND(B,C)} +// - C is the "false" case op NOR(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNor<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOR(B,C)) XXEVAL immediate value: 129 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 129>; + + // Pattern: (A ? B : NOR(B,C)) XXEVAL immediate value: 131 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNor Vt:$vB, Vt:$vC)),131>; + + // Pattern: (A ? C : NOR(B,C)) XXEVAL immediate value: 133 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNor Vt:$vB, Vt:$vC)), + 133>; + + // Pattern: (A ? XOR(B,C) : NOR(B,C)) XXEVAL immediate value: 134 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 134>; + + // Pattern: (A ? NOT(C) : NOR(B,C)) XXEVAL immediate value: 138 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 138>; + + // Pattern: (A ? NOT(B) : NOR(B,C)) XXEVAL immediate value: 140 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNor Vt:$vB, Vt:$vC)), + 140>; + + // Pattern: (A ? NAND(B,C) : NOR(B,C)) XXEVAL immediate value: 142 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 142>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectEqv +// This class matches the equivalent Ternary Operation: A ? f(B,C) : EQV(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {OR(B,C), NOR(B,C), NAND(B,C), NOT(B), +// NOT(C)} +// - C is the "false" case op EQV(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectEqv<ValueType Vt>{ + // Pattern: (A ? OR(B,C) : EQV(B,C)) XXEVAL immediate value: 151 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 151>; + + // Pattern: (A ? NOR(B,C) : EQV(B,C)) XXEVAL immediate value: 152 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 152>; + + // Pattern: (A ? NOT(C) : EQV(B,C)) XXEVAL immediate value: 154 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 154>; + + // Pattern: (A ? NAND(B,C) : EQV(B,C)) XXEVAL immediate value: 158 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 158>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotC +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// B, NOT(B)} +// - C is the "false" case op NOT(C) +// ============================================================================= +multiclass XXEvalTernarySelectNotC<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(C)) XXEVAL immediate value: 161 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 161>; + + // Pattern: (A ? B : NOT(C)) XXEVAL immediate value: 163 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNot Vt:$vC)), 163>; + + // Pattern: (A ? XOR(B,C) : NOT(C)) XXEVAL immediate value: 166 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 166>; + + // Pattern: (A ? OR(B,C) : NOT(C)) XXEVAL immediate value: 167 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 167>; + + // Pattern: (A ? NOT(B) : NOT(C)) XXEVAL immediate value: 172 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNot Vt:$vC)), 172>; + + // Pattern: (A ? NAND(B,C) : NOT(C)) XXEVAL immediate value: 174 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 174>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotB +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(B) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// C, NOT(B)} +// - C is the "false" case op NOT(B) +// ============================================================================= +multiclass XXEvalTernarySelectNotB<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(B)) XXEVAL immediate value: 193 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 193>; + + // Pattern: (A ? C : NOT(B)) XXEVAL immediate value: 197 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vC, (VNot Vt:$vB)), 197>; + + // Pattern: (A ? XOR(B,C) : NOT(B)) XXEVAL immediate value: 198 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 198>; + + // Pattern: (A ? OR(B,C) : NOT(B)) XXEVAL immediate value: 199 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 199>; + + // Pattern: (A ? NOT(C) : NOT(B)) XXEVAL immediate value: 202 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNot Vt:$vB)), 202>; + + // Pattern: (A ? NAND(B,C) : NOT(B)) XXEVAL immediate value: 206 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 206>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNand +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NAND(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, XOR(B,C), OR(B,C), EQV(B,C)} +// - C is the "false" case op NAND(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNand<ValueType Vt>{ + // Pattern: (A ? B : NAND(B,C)) XXEVAL immediate value: 227 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vB, (VNand Vt:$vB, Vt:$vC)), 227>; + + // Pattern: (A ? C : NAND(B,C)) XXEVAL immediate value: 229 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNand Vt:$vB, Vt:$vC)), 229>; + + // Pattern: (A ? XOR(B,C) : NAND(B,C)) XXEVAL immediate value: 230 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 230>; + + // Pattern: (A ? OR(B,C) : NAND(B,C)) XXEVAL immediate value: 231 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 231>; + + // Pattern: (A ? EQV(B,C) : NAND(B,C)) XXEVAL immediate value: 233 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 233>; +} + let Predicates = [PrefixInstrs, HasP10Vector] in { let AddedComplexity = 400 in { def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, @@ -2519,6 +2703,11 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { defm : XXEvalTernarySelectC<Ty>; defm : XXEvalTernarySelectXor<Ty>; defm : XXEvalTernarySelectOr<Ty>; + defm : XXEvalTernarySelectNor<Ty>; + defm : XXEvalTernarySelectEqv<Ty>; + defm : XXEvalTernarySelectNotC<Ty>; + defm : XXEvalTernarySelectNotB<Ty>; + defm : XXEvalTernarySelectNand<Ty>; } // Anonymous patterns to select prefixed VSX loads and stores. diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 2bf016a..6db780f 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1338,7 +1338,6 @@ def ProcessorFeatures { list<SubtargetFeature> PTLFeatures = !listremove(ARLSFeatures, [FeatureWIDEKL]); - // Clearwaterforest list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, FeatureAVXVNNIINT16, @@ -1880,8 +1879,10 @@ def : ProcModel<P, AlderlakePModel, } def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>; -def : ProcModel<"pantherlake", AlderlakePModel, +foreach P = ["pantherlake", "wildcatlake"] in { +def : ProcModel<P, AlderlakePModel, ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; +} def : ProcModel<"clearwaterforest", AlderlakePModel, ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; def : ProcModel<"emeraldrapids", SapphireRapidsModel, diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index a5bdc9d..928e779 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -964,6 +964,13 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, *Subtype = X86::INTEL_COREI7_PANTHERLAKE; break; + // Wildcatlake: + case 0xd5: + CPU = "wildcatlake"; + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_PANTHERLAKE; + break; + // Graniterapids: case 0xad: CPU = "graniterapids"; diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 1932a3a..e382cfe 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -378,6 +378,7 @@ constexpr ProcInfo Processors[] = { { {"gracemont"}, CK_Gracemont, FEATURE_AVX2, FeaturesAlderlake, 'p', false }, // Pantherlake microarchitecture based processors. { {"pantherlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false }, + { {"wildcatlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false }, // Sierraforest microarchitecture based processors. { {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false }, // Grandridge microarchitecture based processors. diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index d2f09e9..578fec7 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2667,7 +2667,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); - V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend"); + // No profiling support for vector selects. + V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old, + DEBUG_TYPE, Name + "blend"); LLVM_DEBUG(dbgs() << " blend: " << *V << "\n"); return V; diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 609a23b..0976a10 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -40,50 +40,50 @@ declare <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x define void @vector_insert_extract_idxzero_128b() #1 { ; CHECK-VSCALE-1-LABEL: 'vector_insert_extract_idxzero_128b' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'vector_insert_extract_idxzero_128b' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_idxzero_128b' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0) - %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0) - %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) - %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) - %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) - %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) - %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) - %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) - %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) + %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> poison, i64 0) + %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> poison, i64 0) + %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) + %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) + %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) + %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) + %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) + %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) + %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ret void } declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64) @@ -97,50 +97,50 @@ declare <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x define void @vector_insert_extract_idxzero_256b() #2 { ; CHECK-VSCALE-1-LABEL: 'vector_insert_extract_idxzero_256b' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'vector_insert_extract_idxzero_256b' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_idxzero_256b' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0) - %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nx4f32(<vscale x 4 x float> undef, i64 0) - %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) - %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) - %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) - %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) - %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) - %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) - %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) + %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> poison, i64 0) + %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nx4f32(<vscale x 4 x float> poison, i64 0) + %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) + %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) + %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) + %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) + %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) + %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) + %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ret void } declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16>, <16 x i16>, i64) @@ -148,157 +148,157 @@ declare <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float>, i64 define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale x 4 x float> %v2, <vscale x 4 x double> %v3) { ; CHECK-VSCALE-1-LABEL: 'reductions' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'reductions' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'reductions' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) + %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> poison) %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) - %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) + %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> poison) %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) - %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) + %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> poison) %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) - %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) + %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> poison) %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) - %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) + %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> poison) %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) - %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) + %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> poison) %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) - %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) + %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> poison) %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) - %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) + %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> poison) %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) - %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) + %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> poison) %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) - %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef) + %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> poison) %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> %v2) %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> %v3) - %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) + %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison) %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) - %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) + %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison) %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) @@ -389,123 +389,123 @@ declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1) define void @vector_reverse() #0 { ; CHECK-VSCALE-1-LABEL: 'vector_reverse' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'vector_reverse' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'vector_reverse' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) - %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) - %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) - %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) - %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) - %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) - %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) - %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) - %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) - %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) - %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) - %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) - %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) - %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) - %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) - %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) - %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) - %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) - %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) - %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) - %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) - %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) - %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) - %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) - %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) - %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) - %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) + %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> poison) + %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> poison) + %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> poison) + %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> poison) + %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> poison) + %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> poison) + %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> poison) + %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> poison) + %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> poison) + %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> poison) + %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> poison) + %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> poison) + %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> poison) + %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> poison) + %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> poison) + %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> poison) + %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> poison) + %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> poison) + %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> poison) + %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> poison) + %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> poison) + %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> poison) + %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> poison) + %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> poison) + %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> poison) + %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> poison) + %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> poison) ret void } declare <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8>) @@ -912,158 +912,158 @@ declare <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double>, define void @get_lane_mask() #0 { ; CHECK-VSCALE-1-LABEL: 'get_lane_mask' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'get_lane_mask' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'get_lane_mask' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) - %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) - %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) - %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) + %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) + %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) + %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) + %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) - %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) - %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) - %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) - %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) + %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) + %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) + %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) + %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) - %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) - %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) + %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) + %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) - %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) - %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) - %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) - %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) + %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) + %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) + %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) + %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) - %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) - %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) - %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) - %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) + %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) + %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) + %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) + %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) - %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) - %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) + %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) + %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) ret void } define void @fshr() #0 { ; CHECK-VSCALE-1-LABEL: 'fshr' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'fshr' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'fshr' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) - call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) - call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) - call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) + call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) + call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) + call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) + call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ret void } define void @fshl() #0 { ; CHECK-VSCALE-1-LABEL: 'fshl' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'fshl' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'fshl' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) - call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) - call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) - call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) + call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) + call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) + call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) + call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ret void } @@ -1362,48 +1362,48 @@ define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %m define void @match() #3 { ; CHECK-VSCALE-1-LABEL: 'match' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> poison, <vscale x 16 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> poison, <vscale x 8 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> poison, <vscale x 4 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> poison, <vscale x 2 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> poison, <16 x i8> poison, <16 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> poison, <8 x i16> poison, <8 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> poison, <4 x i32> poison, <4 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'match' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> poison, <vscale x 16 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> poison, <vscale x 8 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> poison, <vscale x 4 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> poison, <vscale x 2 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> poison, <16 x i8> poison, <16 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> poison, <8 x i16> poison, <8 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> poison, <4 x i32> poison, <4 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'match' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> poison, <vscale x 16 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> poison, <vscale x 8 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> poison, <vscale x 4 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> poison, <vscale x 2 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> poison, <16 x i8> poison, <16 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> poison, <8 x i16> poison, <8 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> poison, <4 x i32> poison, <4 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) - %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) - %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) - %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) + %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> poison, <vscale x 16 x i1> poison) + %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> poison, <vscale x 8 x i1> poison) + %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> poison, <vscale x 4 x i1> poison) + %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> poison, <vscale x 2 x i1> poison) - %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) - %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) - %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) - %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) + %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> poison, <16 x i8> poison, <16 x i1> poison) + %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> poison, <8 x i16> poison, <8 x i1> poison) + %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> poison, <4 x i32> poison, <4 x i1> poison) + %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> poison) ret void } diff --git a/llvm/test/Assembler/autoupgrade-lifetime-intrinsics.ll b/llvm/test/Assembler/autoupgrade-lifetime-intrinsics.ll index 377c002..49174d2 100644 --- a/llvm/test/Assembler/autoupgrade-lifetime-intrinsics.ll +++ b/llvm/test/Assembler/autoupgrade-lifetime-intrinsics.ll @@ -56,6 +56,45 @@ define void @remove_unanalyzable(ptr %p) { ret void } +define void @no_declaration() { +; CHECK-LABEL: define void @no_declaration() { +; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1, addrspace(2) +; CHECK-NEXT: call void @llvm.lifetime.start.p2(ptr addrspace(2) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p2(ptr addrspace(2) [[A]]) +; CHECK-NEXT: ret void +; + %a = alloca i8, addrspace(2) + call void @llvm.lifetime.start.p2(i64 1, ptr addrspace(2) %a) + call void @llvm.lifetime.end.p2(i64 1, ptr addrspace(2) %a) + ret void +} + +define void @no_suffix1() { +; CHECK-LABEL: define void @no_suffix1() { +; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1, addrspace(3) +; CHECK-NEXT: call void @llvm.lifetime.start.p3(ptr addrspace(3) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p3(ptr addrspace(3) [[A]]) +; CHECK-NEXT: ret void +; + %a = alloca i8, addrspace(3) + call void @llvm.lifetime.start(i64 1, ptr addrspace(3) %a) + call void @llvm.lifetime.end(i64 1, ptr addrspace(3) %a) + ret void +} + +define void @no_suffix2() { +; CHECK-LABEL: define void @no_suffix2() { +; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1, addrspace(4) +; CHECK-NEXT: call void @llvm.lifetime.start.p4(ptr addrspace(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p4(ptr addrspace(4) [[A]]) +; CHECK-NEXT: ret void +; + %a = alloca i8, addrspace(4) + call void @llvm.lifetime.start(i64 1, ptr addrspace(4) %a) + call void @llvm.lifetime.end(i64 1, ptr addrspace(4) %a) + ret void +} + declare void @llvm.lifetime.start.p0(i64, ptr) declare void @llvm.lifetime.end.p0(i64, ptr) declare void @llvm.lifetime.start.p1(i64, ptr addrspace(1)) diff --git a/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll b/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll index 012fa1d..e54efa4 100644 --- a/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll +++ b/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll @@ -46,7 +46,10 @@ define <4 x float> @test_fms(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ret <4 x float> %res } -declare <16 x i8> @llvm.wasm.laneselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +; This declaration is intentionally omitted to check that intrinsic upgrade +; also works without a declaration. +; declare <16 x i8> @llvm.wasm.laneselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) + declare <8 x i16> @llvm.wasm.dot.i8x16.i7x16.signed(<16 x i8>, <16 x i8>) declare <4 x i32> @llvm.wasm.dot.i8x16.i7x16.add.signed(<16 x i8>, <16 x i8>, <4 x i32>) declare <4 x float> @llvm.wasm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) diff --git a/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll index ad5a96a..4caee57 100644 --- a/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll +++ b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll @@ -2,7 +2,7 @@ ; Use of unknown intrinsic without declaration should be rejected. -; CHECK: error: use of undefined value '@llvm.foobar' +; CHECK: error: unknown intrinsic 'llvm.foobar' define void @test() { call i8 @llvm.foobar(i8 0, i16 1) ret void diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll index bf31ccb..559cc53 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll @@ -32,6 +32,40 @@ define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v32i8_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + +define void @select_v32i8_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v16i16: ; CHECK: # %bb.0: @@ -49,6 +83,40 @@ define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v16i16_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + +define void @select_v16i16_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v8i32: ; CHECK: # %bb.0: @@ -65,19 +133,70 @@ define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v8i32_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI8_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %sel = select <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i32> %v0, <8 x i32> %v1 + store <8 x i32> %sel, ptr %res + ret void +} + +define void @select_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false>, <8 x float> %v0, <8 x float> %v1 + store <8 x float> %sel, ptr %res + ret void +} + define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI10_0) ; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <4 x i64>, ptr %a0 %v1 = load <4 x i64>, ptr %a1 - %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %v0, <4 x i64> %v1 + %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v0, <4 x i64> %v1 store <4 x i64> %sel, ptr %res ret void } + +define void @select_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI11_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v0, <4 x double> %v1 + store <4 x double> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll index 8f25a6b..25c4f09 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll @@ -16,6 +16,20 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { ret void } +define void @select_v16i8_imm_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: select_v16i8_imm_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, -256 +; CHECK-NEXT: vbitseli.b $vr1, $vr0, 1 +; CHECK-NEXT: vst $vr1, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %v0 + store <16 x i8> %sel, ptr %res + ret void +} + define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v16i8: ; CHECK: # %bb.0: @@ -32,6 +46,40 @@ define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v16i8_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + +define void @select_v16i8_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v8i16: ; CHECK: # %bb.0: @@ -49,6 +97,40 @@ define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v8i16_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + +define void @select_v8i16_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI7_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI7_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v4i32: ; CHECK: # %bb.0: @@ -65,13 +147,47 @@ define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v4i32_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %sel = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> %v0, <4 x i32> %v1 + store <4 x i32> %sel, ptr %res + ret void +} + +define void @select_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI10_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %sel = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %v0, <4 x float> %v1 + store <4 x float> %sel, ptr %res + ret void +} + define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI11_0) ; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret @@ -81,3 +197,20 @@ define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { store <2 x i64> %sel, ptr %res ret void } + +define void @select_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI12_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %sel = select <2 x i1> <i1 false, i1 true>, <2 x double> %v0, <2 x double> %v1 + store <2 x double> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index e71f59c..cad684e 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -325,24 +325,21 @@ define float @sqrt_afn_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_afn_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB11_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB11_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -393,21 +390,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_afn_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB13_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB13_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -462,24 +457,21 @@ define float @sqrt_fast_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_fast_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB15_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB15_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -517,21 +509,19 @@ define float @sqrt_fast_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_fast_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB16_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB16_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x) ret float %rt diff --git a/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll b/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll new file mode 100644 index 0000000..0ee4524 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC64-LE-10 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC64-BE-10 + +; Test LXVKQ instruction generation for special vector constants matching 128 bit patterns: +; 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern) +; 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern) + +; ============================================================================= +; v2i64 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-9223372036854775808, 0> +define dso_local noundef <2 x i64> @test_v2i64_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI0_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> <i64 -9223372036854775808, i64 0> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, -9223372036854775808> +define dso_local noundef <2 x i64> @test_v2i64_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI1_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> <i64 0, i64 -9223372036854775808> +} + +; ============================================================================= +; v4i32 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-2147483648, 0, 0, 0> +define dso_local noundef <4 x i32> @test_v4i32_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI2_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> <i32 -2147483648, i32 0, i32 0, i32 0> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, -2147483648> +define dso_local noundef <4 x i32> @test_v4i32_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI3_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> <i32 0, i32 0, i32 0, i32 -2147483648> +} + +; ============================================================================= +; v8i16 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-32768, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <8 x i16> @test_v8i16_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI4_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> <i16 -32768, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, 0, 0, 0, 0, -32768> +define dso_local noundef <8 x i16> @test_v8i16_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI5_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 -32768> +} + +; ============================================================================= +; v16i8 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <16 x i8> @test_v16i8_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI6_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> <i8 -128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128> +define dso_local noundef <16 x i8> @test_v16i8_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI7_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -128> +} + +; ============================================================================= +; v2i64 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 1> +define dso_local noundef <2 x i64> @test_v2i64_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI8_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> <i64 0, i64 1> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0> +define dso_local noundef <2 x i64> @test_v2i64_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI9_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> <i64 1, i64 0> +} + +; ============================================================================= +; v4i32 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 1> +define dso_local noundef <4 x i32> @test_v4i32_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI10_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> <i32 0, i32 0, i32 0, i32 1> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0> +define dso_local noundef <4 x i32> @test_v4i32_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI11_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> <i32 1, i32 0, i32 0, i32 0> +} + +; ============================================================================= +; v8i16 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 0, 0, 0, 0, 1> +define dso_local noundef <8 x i16> @test_v8i16_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI12_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 1> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <8 x i16> @test_v8i16_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI13_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI13_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> <i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> +} + +; ============================================================================= +; v16i8 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1> +define dso_local noundef <16 x i8> @test_v16i8_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI14_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <16 x i8> @test_v16i8_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI15_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> +}
\ No newline at end of file diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll index 0892210..d506d20 100644 --- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll @@ -1566,12 +1566,16 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10BE-LABEL: v16i8tov16i64_sign: ; PWR10BE: # %bb.0: # %entry ; PWR10BE-NEXT: addis r3, r2, .LCPI23_0@toc@ha +; PWR10BE-NEXT: xxspltib v1, 255 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_0@toc@l +; PWR10BE-NEXT: vsrq v1, v1, v1 ; PWR10BE-NEXT: lxv v3, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_1@toc@ha ; PWR10BE-NEXT: addi r3, r3, .LCPI23_1@toc@l +; PWR10BE-NEXT: vperm v1, v2, v2, v1 ; PWR10BE-NEXT: lxv v4, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_2@toc@ha +; PWR10BE-NEXT: vextsb2d v1, v1 ; PWR10BE-NEXT: vperm v3, v2, v2, v3 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_2@toc@l ; PWR10BE-NEXT: vextsb2d v3, v3 @@ -1585,23 +1589,18 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10BE-NEXT: vperm v5, v2, v2, v5 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_4@toc@l ; PWR10BE-NEXT: vextsb2d v5, v5 -; PWR10BE-NEXT: lxv v1, 0(r3) +; PWR10BE-NEXT: lxv v6, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_5@toc@ha ; PWR10BE-NEXT: vperm v0, v2, v2, v0 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_5@toc@l ; PWR10BE-NEXT: vextsb2d v0, v0 -; PWR10BE-NEXT: lxv v6, 0(r3) +; PWR10BE-NEXT: lxv v7, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_6@toc@ha -; PWR10BE-NEXT: vperm v1, v2, v2, v1 +; PWR10BE-NEXT: vperm v6, v2, v2, v6 ; PWR10BE-NEXT: vaddudm v5, v0, v5 ; PWR10BE-NEXT: vaddudm v3, v4, v3 ; PWR10BE-NEXT: vaddudm v3, v3, v5 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_6@toc@l -; PWR10BE-NEXT: vextsb2d v1, v1 -; PWR10BE-NEXT: lxv v7, 0(r3) -; PWR10BE-NEXT: addis r3, r2, .LCPI23_7@toc@ha -; PWR10BE-NEXT: vperm v6, v2, v2, v6 -; PWR10BE-NEXT: addi r3, r3, .LCPI23_7@toc@l ; PWR10BE-NEXT: vextsb2d v6, v6 ; PWR10BE-NEXT: lxv v8, 0(r3) ; PWR10BE-NEXT: vperm v7, v2, v2, v7 @@ -1609,7 +1608,7 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10BE-NEXT: vperm v2, v2, v2, v8 ; PWR10BE-NEXT: vextsb2d v2, v2 ; PWR10BE-NEXT: vaddudm v2, v2, v7 -; PWR10BE-NEXT: vaddudm v4, v6, v1 +; PWR10BE-NEXT: vaddudm v4, v1, v6 ; PWR10BE-NEXT: vaddudm v2, v4, v2 ; PWR10BE-NEXT: vaddudm v2, v2, v3 ; PWR10BE-NEXT: xxswapd v3, v2 diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll index 24a1724..ba7680b 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_or_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_or_BC_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 151 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -34,12 +32,10 @@ define <2 x i64> @ternary_A_or_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_or_BC_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 151 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -54,11 +50,9 @@ define <16 x i8> @ternary_A_or_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_or_BC_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 151 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -73,11 +67,9 @@ define <8 x i16> @ternary_A_or_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_or_BC_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 151 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -92,11 +84,9 @@ define <4 x i32> @ternary_A_nor_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -112,12 +102,10 @@ define <2 x i64> @ternary_A_nor_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -133,11 +121,9 @@ define <16 x i8> @ternary_A_nor_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -153,11 +139,9 @@ define <8 x i16> @ternary_A_nor_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -173,10 +157,9 @@ define <4 x i32> @ternary_A_not_C_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_C_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, vs0, v3, 99 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 154 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -191,12 +174,10 @@ define <2 x i64> @ternary_A_not_C_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_C_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxleqv vs1, v4, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 154 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation @@ -211,11 +192,9 @@ define <16 x i8> @ternary_A_not_C_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_C_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxleqv vs1, v4, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 154 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -230,11 +209,9 @@ define <8 x i16> @ternary_A_not_C_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_C_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxleqv vs1, v4, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 154 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -249,11 +226,9 @@ define <4 x i32> @ternary_A_nand_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 158 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -269,12 +244,10 @@ define <2 x i64> @ternary_A_nand_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 158 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -290,11 +263,9 @@ define <16 x i8> @ternary_A_nand_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 158 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -310,11 +281,9 @@ define <8 x i16> @ternary_A_nand_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 158 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll index 7a6733d3..067b089 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll @@ -15,10 +15,9 @@ define <4 x i32> @ternary_A_B_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> ; CHECK-LABEL: ternary_A_B_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 227 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -32,11 +31,10 @@ define <2 x i64> @ternary_A_B_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> ; CHECK-LABEL: ternary_A_B_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 227 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -50,10 +48,9 @@ define <16 x i8> @ternary_A_B_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_B_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 227 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -67,10 +64,9 @@ define <8 x i16> @ternary_A_B_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> ; CHECK-LABEL: ternary_A_B_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 227 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -84,10 +80,9 @@ define <4 x i32> @ternary_A_C_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> ; CHECK-LABEL: ternary_A_C_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 229 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -101,11 +96,10 @@ define <2 x i64> @ternary_A_C_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> ; CHECK-LABEL: ternary_A_C_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 229 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -119,10 +113,9 @@ define <16 x i8> @ternary_A_C_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_C_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 229 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -136,10 +129,9 @@ define <8 x i16> @ternary_A_C_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> ; CHECK-LABEL: ternary_A_C_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 229 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -153,11 +145,9 @@ define <4 x i32> @ternary_A_xor_BC_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_xor_BC_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 230 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -172,12 +162,10 @@ define <2 x i64> @ternary_A_xor_BC_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_xor_BC_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 230 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -192,11 +180,9 @@ define <16 x i8> @ternary_A_xor_BC_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_xor_BC_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 230 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -211,11 +197,9 @@ define <8 x i16> @ternary_A_xor_BC_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_xor_BC_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 230 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -230,11 +214,9 @@ define <4 x i32> @ternary_A_or_BC_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_or_BC_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 231 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -249,12 +231,10 @@ define <2 x i64> @ternary_A_or_BC_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_or_BC_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 231 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -269,11 +249,9 @@ define <16 x i8> @ternary_A_or_BC_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_or_BC_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 231 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -288,11 +266,9 @@ define <8 x i16> @ternary_A_or_BC_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_or_BC_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 231 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -307,11 +283,9 @@ define <4 x i32> @ternary_A_eqv_BC_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 233 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -327,12 +301,10 @@ define <2 x i64> @ternary_A_eqv_BC_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 233 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -348,11 +320,9 @@ define <16 x i8> @ternary_A_eqv_BC_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 233 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -368,11 +338,9 @@ define <8 x i16> @ternary_A_eqv_BC_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 233 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll index d635952..3695874 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -34,12 +32,10 @@ define <2 x i64> @ternary_A_and_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -54,11 +50,9 @@ define <16 x i8> @ternary_A_and_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -73,11 +67,9 @@ define <8 x i16> @ternary_A_and_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -92,10 +84,9 @@ define <4 x i32> @ternary_A_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_B_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -109,11 +100,10 @@ define <2 x i64> @ternary_A_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_B_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -127,10 +117,9 @@ define <16 x i8> @ternary_A_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_B_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -144,10 +133,9 @@ define <8 x i16> @ternary_A_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_B_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -161,10 +149,9 @@ define <4 x i32> @ternary_A_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_C_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -178,11 +165,10 @@ define <2 x i64> @ternary_A_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_C_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -196,10 +182,9 @@ define <16 x i8> @ternary_A_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_C_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -213,10 +198,9 @@ define <8 x i16> @ternary_A_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_C_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -230,11 +214,9 @@ define <4 x i32> @ternary_A_xor_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -249,12 +231,10 @@ define <2 x i64> @ternary_A_xor_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -269,11 +249,9 @@ define <16 x i8> @ternary_A_xor_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -288,11 +266,9 @@ define <8 x i16> @ternary_A_xor_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -307,11 +283,9 @@ define <4 x i32> @ternary_A_not_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_C_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -326,12 +300,10 @@ define <2 x i64> @ternary_A_not_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_C_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation @@ -346,11 +318,9 @@ define <16 x i8> @ternary_A_not_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_C_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -365,11 +335,9 @@ define <8 x i16> @ternary_A_not_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_C_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -384,11 +352,9 @@ define <4 x i32> @ternary_A_not_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_B_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -403,12 +369,10 @@ define <2 x i64> @ternary_A_not_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_B_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation @@ -423,11 +387,9 @@ define <16 x i8> @ternary_A_not_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_B_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %B, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -442,11 +404,9 @@ define <8 x i16> @ternary_A_not_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_B_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %B, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -461,11 +421,9 @@ define <4 x i32> @ternary_A_nand_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -481,12 +439,10 @@ define <2 x i64> @ternary_A_nand_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -502,11 +458,9 @@ define <16 x i8> @ternary_A_nand_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -522,11 +476,9 @@ define <8 x i16> @ternary_A_nand_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll index 6203a96..a67d9cf 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_and_BC_not_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 193 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_and_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_and_BC_not_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 193 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -52,11 +48,9 @@ define <16 x i8> @ternary_A_and_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_not_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 193 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -70,11 +64,9 @@ define <8 x i16> @ternary_A_and_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_and_BC_not_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 193 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -88,11 +80,9 @@ define <4 x i32> @ternary_A_xor_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_xor_BC_not_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 198 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -106,12 +96,10 @@ define <2 x i64> @ternary_A_xor_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_xor_BC_not_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 198 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -125,11 +113,9 @@ define <16 x i8> @ternary_A_xor_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_xor_BC_not_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 198 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -143,11 +129,9 @@ define <8 x i16> @ternary_A_xor_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_xor_BC_not_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 198 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -161,11 +145,9 @@ define <4 x i32> @ternary_A_or_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32 ; CHECK-LABEL: ternary_A_or_BC_not_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 199 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -179,12 +161,10 @@ define <2 x i64> @ternary_A_or_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64 ; CHECK-LABEL: ternary_A_or_BC_not_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 199 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -198,11 +178,9 @@ define <16 x i8> @ternary_A_or_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i ; CHECK-LABEL: ternary_A_or_BC_not_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 199 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -216,11 +194,9 @@ define <8 x i16> @ternary_A_or_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16 ; CHECK-LABEL: ternary_A_or_BC_not_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 199 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -234,11 +210,9 @@ define <4 x i32> @ternary_A_nand_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nand_BC_not_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 206 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -253,12 +227,10 @@ define <2 x i64> @ternary_A_nand_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nand_BC_not_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 206 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -273,11 +245,9 @@ define <16 x i8> @ternary_A_nand_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nand_BC_not_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 206 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -292,11 +262,9 @@ define <8 x i16> @ternary_A_nand_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nand_BC_not_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 206 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll index 3479d94..98c1f28 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_and_BC_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 161 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_and_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_and_BC_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 161 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -52,11 +48,9 @@ define <16 x i8> @ternary_A_and_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 161 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -70,11 +64,9 @@ define <8 x i16> @ternary_A_and_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_and_BC_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 161 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -88,10 +80,9 @@ define <4 x i32> @ternary_A_B_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C ; CHECK-LABEL: ternary_A_B_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 163 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -104,11 +95,10 @@ define <2 x i64> @ternary_A_B_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C ; CHECK-LABEL: ternary_A_B_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 163 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation @@ -121,10 +111,9 @@ define <16 x i8> @ternary_A_B_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> % ; CHECK-LABEL: ternary_A_B_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 163 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -137,10 +126,9 @@ define <8 x i16> @ternary_A_B_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C ; CHECK-LABEL: ternary_A_B_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 163 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -153,11 +141,9 @@ define <4 x i32> @ternary_A_xor_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_xor_BC_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 166 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -171,12 +157,10 @@ define <2 x i64> @ternary_A_xor_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_xor_BC_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 166 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -190,11 +174,9 @@ define <16 x i8> @ternary_A_xor_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_xor_BC_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 166 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -208,11 +190,9 @@ define <8 x i16> @ternary_A_xor_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_xor_BC_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 166 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -226,11 +206,9 @@ define <4 x i32> @ternary_A_or_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32 ; CHECK-LABEL: ternary_A_or_BC_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 167 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -244,12 +222,10 @@ define <2 x i64> @ternary_A_or_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64 ; CHECK-LABEL: ternary_A_or_BC_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 167 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -263,11 +239,9 @@ define <16 x i8> @ternary_A_or_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i ; CHECK-LABEL: ternary_A_or_BC_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 167 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -281,11 +255,9 @@ define <8 x i16> @ternary_A_or_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16 ; CHECK-LABEL: ternary_A_or_BC_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 167 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -299,11 +271,9 @@ define <4 x i32> @ternary_A_not_B_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32 ; CHECK-LABEL: ternary_A_not_B_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 172 ; CHECK-NEXT: blr entry: %not_b = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -317,12 +287,10 @@ define <2 x i64> @ternary_A_not_B_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64 ; CHECK-LABEL: ternary_A_not_B_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 172 ; CHECK-NEXT: blr entry: %not_b = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation @@ -336,11 +304,9 @@ define <16 x i8> @ternary_A_not_B_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i ; CHECK-LABEL: ternary_A_not_B_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 172 ; CHECK-NEXT: blr entry: %not_b = xor <16 x i8> %B, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -354,11 +320,9 @@ define <8 x i16> @ternary_A_not_B_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16 ; CHECK-LABEL: ternary_A_not_B_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 172 ; CHECK-NEXT: blr entry: %not_b = xor <8 x i16> %B, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -372,11 +336,9 @@ define <4 x i32> @ternary_A_nand_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nand_BC_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 174 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -391,12 +353,10 @@ define <2 x i64> @ternary_A_nand_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nand_BC_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 174 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -411,11 +371,9 @@ define <16 x i8> @ternary_A_nand_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nand_BC_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 174 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -430,11 +388,9 @@ define <8 x i16> @ternary_A_nand_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nand_BC_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 174 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/SPIRV/FCmpFalse.ll b/llvm/test/CodeGen/SPIRV/FCmpFalse.ll new file mode 100644 index 0000000..55d64196 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/FCmpFalse.ll @@ -0,0 +1,10 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#FalseVal:]] = OpConstantFalse %[[#]] +; CHECK: OpReturnValue %[[#FalseVal:]] + +define spir_func i1 @f(float %0) { + %2 = fcmp false float %0, %0 + ret i1 %2 +} diff --git a/llvm/test/CodeGen/SPIRV/FCmpFalse_Vec.ll b/llvm/test/CodeGen/SPIRV/FCmpFalse_Vec.ll new file mode 100644 index 0000000..c410b64 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/FCmpFalse_Vec.ll @@ -0,0 +1,13 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#BoolTy:]] = OpTypeBool +; CHECK: %[[#VecTy:]] = OpTypeVector %[[#BoolTy]] 4 +; CHECK: %[[#False:]] = OpConstantFalse %[[#BoolTy]] +; CHECK: %[[#Composite:]] = OpConstantComposite %[[#VecTy]] %[[#False]] %[[#False]] %[[#False]] %[[#False]] +; CHECK: OpReturnValue %[[#Composite]] + +define spir_func <4 x i1> @test(<4 x float> %a) { + %compare = fcmp false <4 x float> %a, %a + ret <4 x i1> %compare +} diff --git a/llvm/test/CodeGen/SPIRV/builtin_duplicate.ll b/llvm/test/CodeGen/SPIRV/builtin_duplicate.ll new file mode 100644 index 0000000..8786554 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/builtin_duplicate.ll @@ -0,0 +1,20 @@ +;; This test checks if we generate a single builtin variable for the following +;; LLVM IR. +;; @__spirv_BuiltInLocalInvocationId - A global variable +;; %3 = tail call i64 @_Z12get_local_idj(i32 0) - A function call + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpName %[[#]] "__spirv_BuiltInLocalInvocationId" +; CHECK-NOT: OpName %[[#]] "__spirv_BuiltInLocalInvocationId.1" + +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +declare spir_func i64 @_Z12get_local_idj(i32) local_unnamed_addr + +define spir_kernel void @test(i32 %a) { +entry: + %builtin_call = tail call i64 @_Z12get_local_idj(i32 0) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/complex-constexpr.ll b/llvm/test/CodeGen/SPIRV/complex-constexpr.ll new file mode 100644 index 0000000..e2c1d00 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/complex-constexpr.ll @@ -0,0 +1,21 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +@.str.1 = private unnamed_addr addrspace(1) constant [1 x i8] zeroinitializer, align 1 + +define linkonce_odr hidden spir_func void @test() { +entry: +; CHECK: %[[#MinusOne:]] = OpConstant %[[#]] 18446744073709551615 +; CHECK: %[[#Ptr:]] = OpConvertUToPtr %[[#]] %[[#MinusOne]] +; CHECK: %[[#PtrCast:]] = OpPtrCastToGeneric %[[#]] %[[#]] +; CHECK: %[[#]] = OpFunctionCall %[[#]] %[[#]] %[[#PtrCast]] %[[#Ptr]] + + %cast = bitcast ptr addrspace(4) inttoptr (i64 -1 to ptr addrspace(4)) to ptr addrspace(4) + call spir_func void @bar(ptr addrspace(4) addrspacecast (ptr addrspace(1) @.str.1 to ptr addrspace(4)), ptr addrspace(4) %cast) + ret void +} + +define linkonce_odr hidden spir_func void @bar(ptr addrspace(4) %begin, ptr addrspace(4) %end) { +entry: + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/dominator-order.ll b/llvm/test/CodeGen/SPIRV/dominator-order.ll new file mode 100644 index 0000000..2ecdddc --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/dominator-order.ll @@ -0,0 +1,25 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; This test checks that basic blocks are reordered in SPIR-V so that dominators +; are emitted ahead of their dominated blocks as required by the SPIR-V +; specification. + +; CHECK-DAG: OpName %[[#ENTRY:]] "entry" +; CHECK-DAG: OpName %[[#FOR_BODY137_LR_PH:]] "for.body137.lr.ph" +; CHECK-DAG: OpName %[[#FOR_BODY:]] "for.body" + +; CHECK: %[[#ENTRY]] = OpLabel +; CHECK: %[[#FOR_BODY]] = OpLabel +; CHECK: %[[#FOR_BODY137_LR_PH]] = OpLabel + +define spir_kernel void @test(ptr addrspace(1) %arg, i1 %cond) { +entry: + br label %for.body + +for.body137.lr.ph: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + br i1 %cond, label %for.body, label %for.body137.lr.ph +} diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fake_use.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fake_use.ll new file mode 100644 index 0000000..5370b51 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fake_use.ll @@ -0,0 +1,13 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpCapability Addresses +; CHECK-DAG: OpName %[[#]] "foo" + +declare void @llvm.fake.use(...) + +define spir_kernel void @foo(ptr addrspace(1) %a) { +entry: + call void (...) @llvm.fake.use(ptr addrspace(1) %a) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/AtomicCompareExchange_cl20.ll b/llvm/test/CodeGen/SPIRV/transcoding/AtomicCompareExchange_cl20.ll new file mode 100644 index 0000000..8357373 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/AtomicCompareExchange_cl20.ll @@ -0,0 +1,84 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64v1.2-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-NOT: OpCapability Int64Atomics + +; CHECK-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#int8:]] = OpTypeInt 8 0 +; CHECK-DAG: %[[#DeviceScope:]] = OpConstant %[[#int]] 1 +; CHECK-DAG: %[[#SequentiallyConsistent_MS:]] = OpConstant %[[#int]] 16 +; CHECK-DAG: %[[#int_ptr:]] = OpTypePointer Generic %[[#int]] +; CHECK-DAG: %[[#int_ptr8:]] = OpTypePointer Generic %[[#int8]] +; CHECK-DAG: %[[#bool:]] = OpTypeBool + +define spir_func void @test(ptr addrspace(4) %object, ptr addrspace(4) %expected, i32 %desired) { + +; CHECK: %[[#object:]] = OpFunctionParameter %[[#int_ptr8]] +; CHECK: %[[#expected:]] = OpFunctionParameter %[[#int_ptr8]] +; CHECK: %[[#desired:]] = OpFunctionParameter %[[#int]] + +entry: + %object.addr = alloca ptr addrspace(4), align 4 + %expected.addr = alloca ptr addrspace(4), align 4 + %desired.addr = alloca i32, align 4 + %strong_res = alloca i8, align 1 + %res = alloca i8, align 1 + %weak_res = alloca i8, align 1 + store ptr addrspace(4) %object, ptr %object.addr, align 4 + store ptr addrspace(4) %expected, ptr %expected.addr, align 4 + store i32 %desired, ptr %desired.addr, align 4 + %0 = load ptr addrspace(4), ptr %object.addr, align 4 + %1 = load ptr addrspace(4), ptr %expected.addr, align 4 + %2 = load i32, ptr %desired.addr, align 4 + +; CHECK-DAG: OpStore %[[#object_addr:]] %[[#object]] +; CHECK-DAG: OpStore %[[#expected_addr:]] %[[#expected]] +; CHECK-DAG: OpStore %[[#desired_addr:]] %[[#desired]] + +; CHECK: %[[#Pointer:]] = OpLoad %[[#int_ptr]] %[[#]] +; CHECK: %[[#exp:]] = OpLoad %[[#int_ptr]] %[[#]] +; CHECK: %[[#Value:]] = OpLoad %[[#int]] %[[#desired_addr]] +; CHECK: %[[#Comparator:]] = OpLoad %[[#int]] %[[#exp]] + +; CHECK: %[[#Result:]] = OpAtomicCompareExchange %[[#int]] %[[#]] %[[#DeviceScope]] %[[#SequentiallyConsistent_MS]] %[[#SequentiallyConsistent_MS]] %[[#Value]] %[[#Comparator]] + %call = call spir_func zeroext i1 @_Z30atomic_compare_exchange_strongPVU3AS4U7_AtomiciPU3AS4ii(ptr addrspace(4) %0, ptr addrspace(4) %1, i32 %2) + +; CHECK-NEXT: OpStore %[[#exp]] %[[#Result]] +; CHECK-NEXT: %[[#CallRes:]] = OpIEqual %[[#bool]] %[[#Result]] %[[#Comparator]] +; CHECK-NOT: %[[#Result]] + + %frombool = zext i1 %call to i8 + store i8 %frombool, ptr %strong_res, align 1 + %3 = load i8, ptr %strong_res, align 1 + %tobool = trunc i8 %3 to i1 + %lnot = xor i1 %tobool, true + %frombool1 = zext i1 %lnot to i8 + store i8 %frombool1, ptr %res, align 1 + %4 = load ptr addrspace(4), ptr %object.addr, align 4 + %5 = load ptr addrspace(4), ptr %expected.addr, align 4 + %6 = load i32, ptr %desired.addr, align 4 + +; CHECK: %[[#Pointer:]] = OpLoad %[[#int_ptr]] %[[#]] +; CHECK: %[[#exp:]] = OpLoad %[[#int_ptr]] %[[#]] +; CHECK: %[[#Value:]] = OpLoad %[[#int]] %[[#desired_addr]] +; CHECK: %[[#ComparatorWeak:]] = OpLoad %[[#int]] %[[#exp]] + +; CHECK: %[[#Result:]] = OpAtomicCompareExchangeWeak %[[#int]] %[[#]] %[[#DeviceScope]] %[[#SequentiallyConsistent_MS]] %[[#SequentiallyConsistent_MS]] %[[#Value]] %[[#ComparatorWeak]] + %call2 = call spir_func zeroext i1 @_Z28atomic_compare_exchange_weakPVU3AS4U7_AtomiciPU3AS4ii(ptr addrspace(4) %4, ptr addrspace(4) %5, i32 %6) + +; CHECK-NEXT: OpStore %[[#exp]] %[[#Result]] +; CHECK-NEXT: %[[#CallRes:]] = OpIEqual %[[#bool]] %[[#Result]] %[[#ComparatorWeak]] +; CHECK-NOT: %[[#Result]] + + %frombool3 = zext i1 %call2 to i8 + store i8 %frombool3, ptr %weak_res, align 1 + %7 = load i8, ptr %weak_res, align 1 + %tobool4 = trunc i8 %7 to i1 + %lnot5 = xor i1 %tobool4, true + %frombool6 = zext i1 %lnot5 to i8 + store i8 %frombool6, ptr %res, align 1 + ret void +} + +declare spir_func zeroext i1 @_Z30atomic_compare_exchange_strongPVU3AS4U7_AtomiciPU3AS4ii(ptr addrspace(4), ptr addrspace(4), i32) #1 +declare spir_func zeroext i1 @_Z28atomic_compare_exchange_weakPVU3AS4U7_AtomiciPU3AS4ii(ptr addrspace(4), ptr addrspace(4), i32) #1 diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll index 40c38c2..71253c8 100644 --- a/llvm/test/CodeGen/X86/cpus-intel.ll +++ b/llvm/test/CodeGen/X86/cpus-intel.ll @@ -38,6 +38,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=wildcatlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty @@ -104,6 +105,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=wildcatlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll index 42617c1..18588aa 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -24,7 +24,7 @@ define float @sqrt_ieee_ninf(float %f) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr @@ -71,7 +71,7 @@ define float @sqrt_daz_ninf(float %f) #1 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 3a88273..56a5663 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -2697,4 +2697,4 @@ for.body: ; preds = %for.body.lr.ph, %fo !9 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} !10 = !{!"llvm.loop.vectorize.enable", i1 true} attributes #0 = { vscale_range(1,16) "target-features"="+sve" } -attributes #1 = { vscale_range(1,16) "target-features"="+neon,+dotprod,+sve" "cpu"="neoverse-v2" } +attributes #1 = { vscale_range(1,16) "target-features"="+neon,+dotprod,+sve" "target-cpu"="neoverse-v2" } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 3c2ae1c7..1e6bcb1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -410,20 +410,32 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = zext <16 x i8> [[WIDE_LOAD5]] to <16 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[WIDE_LOAD6]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]]) ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP5]]) -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE8]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP10]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE9]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP7]]) +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE3]], [[PARTIAL_REDUCE]] -; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[PARTIAL_REDUCE8]], [[BIN_RDX]] +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[PARTIAL_REDUCE9]], [[BIN_RDX10]] +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-INTERLEAVED-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK-INTERLEAVED: scalar.ph: ; @@ -432,25 +444,20 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] +; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP7]], align 1 -; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i32> -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP9]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP1]]) +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-MAXBW-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-MAXBW: middle.block: -; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE]]) -; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH:%.*]] +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) +; CHECK-MAXBW-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK-MAXBW: scalar.ph: ; entry: @@ -693,20 +700,32 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP9]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = zext <16 x i8> [[WIDE_LOAD5]] to <16 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext <16 x i8> [[WIDE_LOAD6]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP6]] = sub <16 x i32> [[VEC_PHI]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[TMP7]] = sub <16 x i32> [[VEC_PHI1]], [[TMP5]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-INTERLEAVED-NEXT: [[TMP10]] = sub <16 x i32> [[VEC_PHI2]], [[TMP12]] +; CHECK-INTERLEAVED-NEXT: [[TMP11]] = sub <16 x i32> [[VEC_PHI3]], [[TMP14]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP7]], [[TMP6]] -; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX7:%.*]] = add <16 x i32> [[TMP10]], [[BIN_RDX]] +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX8:%.*]] = add <16 x i32> [[TMP11]], [[BIN_RDX7]] +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX8]]) ; CHECK-INTERLEAVED-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK-INTERLEAVED: scalar.ph: ; @@ -1093,9 +1112,124 @@ exit: ret i32 %add.lcssa } +define i64 @sext_reduction_i32_to_i64(ptr %arr, i64 %n) #1 { +; CHECK-INTERLEAVE1-LABEL: define i64 @sext_reduction_i32_to_i64( +; CHECK-INTERLEAVE1-SAME: ptr [[ARR:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { +; CHECK-INTERLEAVE1-NEXT: entry: +; CHECK-INTERLEAVE1-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 2 +; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-INTERLEAVE1: vector.ph: +; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 2 +; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-INTERLEAVE1: vector.body: +; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]] +; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; CHECK-INTERLEAVE1-NEXT: [[TMP2]] = add <2 x i64> [[VEC_PHI]], [[TMP1]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-INTERLEAVE1: middle.block: +; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP2]]) +; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INTERLEAVE1: scalar.ph: +; +; CHECK-INTERLEAVED-LABEL: define i64 @sext_reduction_i32_to_i64( +; CHECK-INTERLEAVED-SAME: ptr [[ARR:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { +; CHECK-INTERLEAVED-NEXT: entry: +; CHECK-INTERLEAVED-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 8 +; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-INTERLEAVED: vector.ph: +; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 8 +; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-INTERLEAVED: vector.body: +; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]] +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 6 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sext <2 x i32> [[WIDE_LOAD5]] to <2 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext <2 x i32> [[WIDE_LOAD6]] to <2 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI]], [[TMP14]] +; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI1]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI2]], [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[TMP11]] = add <2 x i64> [[VEC_PHI3]], [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-INTERLEAVED: middle.block: +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP9]], [[TMP8]] +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX7:%.*]] = add <2 x i64> [[TMP10]], [[BIN_RDX]] +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX8:%.*]] = add <2 x i64> [[TMP11]], [[BIN_RDX7]] +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX8]]) +; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] +; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INTERLEAVED: scalar.ph: +; +; CHECK-MAXBW-LABEL: define i64 @sext_reduction_i32_to_i64( +; CHECK-MAXBW-SAME: ptr [[ARR:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { +; CHECK-MAXBW-NEXT: entry: +; CHECK-MAXBW-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 2 +; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-MAXBW: vector.ph: +; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 2 +; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-MAXBW: vector.body: +; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]] +; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; CHECK-MAXBW-NEXT: [[TMP2]] = add <2 x i64> [[VEC_PHI]], [[TMP1]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-MAXBW-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-MAXBW: middle.block: +; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP2]]) +; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] +; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-MAXBW: scalar.ph: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %acc = phi i64 [ 0, %entry ], [ %add, %loop ] + %gep = getelementptr inbounds i32, ptr %arr, i64 %iv + %load = load i32, ptr %gep + %sext = sext i32 %load to i64 + %add = add i64 %acc, %sext + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i64 %add +} + !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} attributes #0 = { vscale_range(1,16) "target-features"="+sve" } -attributes #1 = { vscale_range(1,16) "target-features"="+neon,+dotprod,+sve" "cpu"="neoverse-v2" } +attributes #1 = { vscale_range(1,16) "target-features"="+neon,+dotprod,+sve" "target-cpu"="neoverse-v2" } attributes #2 = { "target-features"="+neon,+dotprod" } diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll index eabb697..3b77e49 100644 --- a/llvm/test/Transforms/SROA/slice-width.ll +++ b/llvm/test/Transforms/SROA/slice-width.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -8,6 +8,10 @@ declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind ; This tests that allocas are not split into slices that are not byte width multiple +;. +; CHECK: @foo_copy_source = external constant %union.Foo +; CHECK: @i64_sink = global i64 0 +;. define void @no_split_on_non_byte_width(i32) { ; CHECK-LABEL: @no_split_on_non_byte_width( ; CHECK-NEXT: [[ARG_SROA_0:%.*]] = alloca i8, align 8 @@ -92,12 +96,12 @@ declare i32 @memcpy_vec3float_helper(ptr) ; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte ; vector store, hence accidentally putting gibberish onto the stack. -define i32 @memcpy_vec3float_widening(ptr %x) { +define i32 @memcpy_vec3float_widening(ptr %x) !prof !0 { ; CHECK-LABEL: @memcpy_vec3float_widening( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> -; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef +; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> ; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4 @@ -158,6 +162,15 @@ define i1 @presplit_overlarge_load() { %L2 = load i1, ptr %A ret i1 %L2 } +!0 = !{!"function_entry_count", i32 10} + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"unknown", !"sroa"} +;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} ; CHECK-PRESERVE-CFG: {{.*}} diff --git a/llvm/unittests/ExecutionEngine/Orc/ReOptimizeLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ReOptimizeLayerTest.cpp index cd10ffe..f35a378 100644 --- a/llvm/unittests/ExecutionEngine/Orc/ReOptimizeLayerTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/ReOptimizeLayerTest.cpp @@ -9,6 +9,7 @@ #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" +#include "llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h" #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h" #include "llvm/ExecutionEngine/Orc/SelfExecutorProcessControl.h" @@ -84,8 +85,11 @@ protected: ES = std::make_unique<ExecutionSession>(std::move(*EPC)); JD = &ES->createBareJITDylib("main"); + ObjLinkingLayer = std::make_unique<ObjectLinkingLayer>( - *ES, std::make_unique<InProcessMemoryManager>(*PageSize)); + *ES, std::make_unique<MapperJITLinkMemoryManager>( + 10 * 1024 * 1024, + std::make_unique<InProcessMemoryMapper>(*PageSize))); DL = std::make_unique<DataLayout>(std::move(*DLOrErr)); auto TM = JTMB->createTargetMachine(); diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index a5c5426..3f8be5e 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -1310,14 +1310,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll Transforms/SimpleLoopUnswitch/trivial-unswitch.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll -Transforms/SROA/phi-gep.ll -Transforms/SROA/scalable-vectors-with-known-vscale.ll -Transforms/SROA/select-gep.ll -Transforms/SROA/select-load.ll -Transforms/SROA/slice-width.ll -Transforms/SROA/vector-conversion.ll -Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll -Transforms/SROA/vector-promotion.ll Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll Transforms/StructurizeCFG/hoist-zerocost.ll diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 89fbeb7..ce9ff7e 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -1655,6 +1655,40 @@ def NVVM_ConvertFloatToTF32Op : NVVM_Op<"convert.float.to.tf32"> { }]; } +def NVVM_ConvertF32x2ToF4x2Op : NVVM_Op<"convert.f32x2.to.f4x2"> { + let summary = "Convert a pair of float inputs to f4x2"; + let description = [{ + This Op converts each of the given float inputs to the specified fp4 type. + The result `dst` is returned as an i8 type where the converted values are + packed such that the value converted from `a` is stored in the upper 4 bits + of `dst` and the value converted from `b` is stored in the lower 4 bits of + `dst`. + The `relu` attribute, when set, lowers to the '.relu' variant of + the cvt instruction. + + [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt) + }]; + + let results = (outs I8:$dst); + let arguments = (ins F32:$a, F32:$b, + DefaultValuedAttr<BoolAttr, "false">:$relu, + TypeAttr:$dstTy); + let assemblyFormat = "$a `,` $b attr-dict `:` type($dst) `(` $dstTy `)`"; + let hasVerifier = 1; + + let extraClassDeclaration = [{ + static mlir::NVVM::IDArgPair + getIntrinsicIDAndArgs(NVVM::ConvertF32x2ToF4x2Op op, + LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder); + }]; + + string llvmBuilder = [{ + auto [intId, args] = NVVM::ConvertF32x2ToF4x2Op::getIntrinsicIDAndArgs(op, moduleTranslation, builder); + llvm::Value *packedI16 = createIntrinsicCall(builder, intId, args); + $dst = builder.CreateTruncOrBitCast(packedI16, llvm::Type::getInt8Ty(builder.getContext())); + }]; +} + def NVVM_ConvertF32x2ToF6x2Op : NVVM_Op<"convert.f32x2.to.f6x2"> { let summary = "Convert a pair of float inputs to f6x2"; let description = [{ diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 5edcc40b..ab54183 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -309,6 +309,17 @@ LogicalResult ConvertBF16x2ToF8x2Op::verify() { return success(); } +LogicalResult ConvertF32x2ToF4x2Op::verify() { + mlir::MLIRContext *ctx = getContext(); + + if (!llvm::isa<mlir::Float4E2M1FNType>(getDstTy())) + return emitOpError("Only ") + << mlir::Float4E2M1FNType::get(ctx) + << " type is supported for conversions from f32x2 to f4x2."; + + return success(); +} + LogicalResult BulkStoreOp::verify() { if (getInitVal() != 0) return emitOpError("only 0 is supported for initVal, got ") << getInitVal(); @@ -2047,6 +2058,23 @@ ConvertFloatToTF32Op::getIntrinsicID(NVVM::FPRoundingMode rnd, } } +NVVM::IDArgPair +ConvertF32x2ToF4x2Op::getIntrinsicIDAndArgs(NVVM::ConvertF32x2ToF4x2Op op, + LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase &builder) { + llvm::SmallVector<llvm::Value *> args; + args.push_back(mt.lookupValue(op.getA())); + args.push_back(mt.lookupValue(op.getB())); + + bool hasRelu = op.getRelu(); + + llvm::Intrinsic::ID intId = + hasRelu ? llvm::Intrinsic::nvvm_ff_to_e2m1x2_rn_relu_satfinite + : llvm::Intrinsic::nvvm_ff_to_e2m1x2_rn_satfinite; + + return {intId, std::move(args)}; +} + #define GET_F32x2_TO_F6x2_ID(type, has_relu) \ has_relu ? llvm::Intrinsic::nvvm_ff_to_##type##_rn_relu_satfinite \ : llvm::Intrinsic::nvvm_ff_to_##type##_rn_satfinite diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index c477c6c..dcc1ef9 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -315,7 +315,8 @@ bool mlir::linalg::detail::isContractionBody( Value yielded = getSourceSkipUnary(terminator->getOperand(0)); Operation *reductionOp = yielded.getDefiningOp(); - if (reductionOp->getNumResults() != 1 || reductionOp->getNumOperands() != 2) { + if (!reductionOp || reductionOp->getNumResults() != 1 || + reductionOp->getNumOperands() != 2) { errs << "expected reduction op to be binary"; return false; } diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.cpp b/mlir/lib/Target/LLVMIR/DebugImporter.cpp index 4bbcd8e..db39c70 100644 --- a/mlir/lib/Target/LLVMIR/DebugImporter.cpp +++ b/mlir/lib/Target/LLVMIR/DebugImporter.cpp @@ -34,11 +34,9 @@ Location DebugImporter::translateFuncLocation(llvm::Function *func) { return UnknownLoc::get(context); // Add a fused location to link the subprogram information. - StringAttr funcName = StringAttr::get(context, subprogram->getName()); StringAttr fileName = StringAttr::get(context, subprogram->getFilename()); return FusedLocWith<DISubprogramAttr>::get( - {NameLoc::get(funcName), - FileLineColLoc::get(fileName, subprogram->getLine(), /*column=*/0)}, + {FileLineColLoc::get(fileName, subprogram->getLine(), /*column=*/0)}, translate(subprogram), context); } diff --git a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir index 2492ada..82426c4 100644 --- a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir +++ b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir @@ -1,6 +1,7 @@ // RUN: mlir-opt %s -gpu-module-to-binary="format=isa" \ // RUN: -debug-only=serialize-to-isa 2> %t // RUN: FileCheck --input-file=%t %s +// REQUIRES: asserts // // MathToXeVM pass generates OpenCL intrinsics function calls when converting // Math ops with `fastmath` attr to native function calls. It is assumed that diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir index 618ba34..66cae5c 100644 --- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir +++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir @@ -1011,6 +1011,20 @@ module attributes { transform.target_tag = "start_here" } { } -> tensor<1x1x4xf32> return } + + func.func @generic_none(%arg0: tensor<128x128xi32>, %arg1: tensor<128x128xi32>, %arg2: tensor<128x128xi32>) { + %0 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, + affine_map<(d0, d1, d2) -> (d2, d1)>, + affine_map<(d0, d1, d2) -> (d0, d1)>], + iterator_types = ["parallel", "parallel", "reduction"]} + ins(%arg0, %arg1 : tensor<128x128xi32>, tensor<128x128xi32>) + outs(%arg2 : tensor<128x128xi32>) { + ^bb0(%in: i32, %in_0: i32, %out: i32): + linalg.yield %out : i32 + } -> tensor<128x128xi32> + return + } } // ----- diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll index e056e43..61376b8 100644 --- a/mlir/test/Target/LLVMIR/Import/debug-info.ll +++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll @@ -240,11 +240,10 @@ define void @subprogram() !dbg !3 { define void @func_loc() !dbg !3 { ret void } -; CHECK-DAG: #[[NAME_LOC:.+]] = loc("func_loc") ; CHECK-DAG: #[[FILE_LOC:.+]] = loc("debug-info.ll":42:0) ; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram<id = distinct[{{.*}}]<>, compileUnit = #{{.*}}, scope = #{{.*}}, name = "func_loc", file = #{{.*}}, line = 42, subprogramFlags = Definition> -; CHECK: loc(fused<#[[SP]]>[#[[NAME_LOC]], #[[FILE_LOC]]] +; CHECK: loc(fused<#[[SP]]>[#[[FILE_LOC]]] !llvm.dbg.cu = !{!1} !llvm.module.flags = !{!0} diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir new file mode 100644 index 0000000..04e2ddf --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir @@ -0,0 +1,12 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// CHECK-LABEL: @convert_f32x2_to_f4x2_e2m1 +llvm.func @convert_f32x2_to_f4x2_e2m1(%srcA : f32, %srcB : f32) { + // CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.satfinite(float %{{.*}}, float %{{.*}}) + // CHECK-NEXT: %{{.*}} = trunc i16 %[[res1]] to i8 + %res1 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB : i8 (f4E2M1FN) + // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}}) + // CHECK-NEXT: %{{.*}} = trunc i16 %[[res2]] to i8 + %res2 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB {relu = true} : i8 (f4E2M1FN) + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir index 0b36154..78e1e659 100644 --- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir @@ -254,6 +254,14 @@ llvm.func @nvvm_cvt_f32x2_to_f6x2_invalid_type(%a : f32, %b : f32) { // ----- +llvm.func @nvvm_cvt_f32x2_to_f4x2_invalid_type(%a : f32, %b : f32) { + // expected-error @below {{Only 'f4E2M1FN' type is supported for conversions from f32x2 to f4x2.}} + %res = nvvm.convert.f32x2.to.f4x2 %a, %b : i8 (f8E4M3FN) + llvm.return +} + +// ----- + llvm.func @nvvm_prefetch_L1_with_evict_priority(%global_ptr: !llvm.ptr<1>) { // expected-error @below {{cache eviction priority supported only for cache level L2}} nvvm.prefetch level = L1, evict_priority = evict_last, %global_ptr : !llvm.ptr<1> diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel index b00e8f2..d8fcb53 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel @@ -1,4 +1,5 @@ load("//llvm:lit_test.bzl", "lit_test") +load("//llvm:targets.bzl", "llvm_targets") licenses(["notice"]) @@ -15,6 +16,9 @@ package(default_visibility = ["//visibility:public"]) ) for src in glob( include = ["**/*.mlir"], - exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"], + exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"] + ( + # MathToXeVM needs SPIRV; see MathToXeVM/lit.local.cfg + ["MathToXeVM/**"] if "SPIRV" not in llvm_targets else [] + ), ) ] |