diff options
author | Vitaly Buka <vitalybuka@google.com> | 2024-03-27 10:37:41 -0700 |
---|---|---|
committer | Vitaly Buka <vitalybuka@google.com> | 2024-03-27 10:37:41 -0700 |
commit | 3c80a2aa608d9068623cde891cf71cd4f7c19f16 (patch) | |
tree | 45b8bc261e29f078314b6c33064318e441f7f64a | |
parent | 45cb6b7a7f942f8e83ea5039ca0fcee1788346b7 (diff) | |
parent | 3403aee7a38ac979dbb5e57c779063535c605f6d (diff) | |
download | llvm-users/vitalybuka/spr/instrprofiling-do-not-sanitize-pgo-instrumentation.zip llvm-users/vitalybuka/spr/instrprofiling-do-not-sanitize-pgo-instrumentation.tar.gz llvm-users/vitalybuka/spr/instrprofiling-do-not-sanitize-pgo-instrumentation.tar.bz2 |
Created using spr 1.3.4
355 files changed, 8165 insertions, 4319 deletions
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index b8e8ab2..ff61cf8 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -36,7 +36,7 @@ jobs: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@e38b1902ae4f44df626f11ba0734b14fb91f8f86 # v2.1.2 + uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 with: results_file: results.sarif results_format: sarif diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0dd026a..0fdd9e3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -454,6 +454,7 @@ Bug Fixes to C++ Support - Fix a crash when instantiating a lambda that captures ``this`` outside of its context. Fixes (#GH85343). - Fix an issue where a namespace alias could be defined using a qualified name (all name components following the first `::` were ignored). +- Fix an out-of-bounds crash when checking the validity of template partial specializations. (part of #GH86757). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 66da1c7..8af99a0 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -849,10 +849,89 @@ Check for performance anti-patterns when using Grand Central Dispatch. .. _optin-performance-Padding: -optin.performance.Padding -""""""""""""""""""""""""" +optin.performance.Padding (C, C++, ObjC) +"""""""""""""""""""""""""""""""""""""""" Check for excessively padded structs. +This checker detects structs with excessive padding, which can lead to wasted +memory thus decreased performance by reducing the effectiveness of the +processor cache. Padding bytes are added by compilers to align data accesses +as some processors require data to be aligned to certain boundaries. On others, +unaligned data access are possible, but impose significantly larger latencies. + +To avoid padding bytes, the fields of a struct should be ordered by decreasing +by alignment. Usually, its easier to think of the ``sizeof`` of the fields, and +ordering the fields by ``sizeof`` would usually also lead to the same optimal +layout. + +In rare cases, one can use the ``#pragma pack(1)`` directive to enforce a packed +layout too, but it can significantly increase the access times, so reordering the +fields is usually a better solution. + + +.. code-block:: cpp + + // warn: Excessive padding in 'struct NonOptimal' (35 padding bytes, where 3 is optimal) + struct NonOptimal { + char c1; + // 7 bytes of padding + std::int64_t big1; // 8 bytes + char c2; + // 7 bytes of padding + std::int64_t big2; // 8 bytes + char c3; + // 7 bytes of padding + std::int64_t big3; // 8 bytes + char c4; + // 7 bytes of padding + std::int64_t big4; // 8 bytes + char c5; + // 7 bytes of padding + }; + static_assert(sizeof(NonOptimal) == 4*8+5+5*7); + + // no-warning: The fields are nicely aligned to have the minimal amount of padding bytes. + struct Optimal { + std::int64_t big1; // 8 bytes + std::int64_t big2; // 8 bytes + std::int64_t big3; // 8 bytes + std::int64_t big4; // 8 bytes + char c1; + char c2; + char c3; + char c4; + char c5; + // 3 bytes of padding + }; + static_assert(sizeof(Optimal) == 4*8+5+3); + + // no-warning: Bit packing representation is also accepted by this checker, but + // it can significantly increase access times, so prefer reordering the fields. + #pragma pack(1) + struct BitPacked { + char c1; + std::int64_t big1; // 8 bytes + char c2; + std::int64_t big2; // 8 bytes + char c3; + std::int64_t big3; // 8 bytes + char c4; + std::int64_t big4; // 8 bytes + char c5; + }; + static_assert(sizeof(BitPacked) == 4*8+5); + +The ``AllowedPad`` option can be used to specify a threshold for the number +padding bytes raising the warning. If the number of padding bytes of the struct +and the optimal number of padding bytes differ by more than the threshold value, +a warning will be raised. + +By default, the ``AllowedPad`` threshold is 24 bytes. + +To override this threshold to e.g. 4 bytes, use the +``-analyzer-config optin.performance.Padding:AllowedPad=4`` option. + + .. _optin-portability-UnixAPI: optin.portability.UnixAPI diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 60db3cf..7a8bd98 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2991,6 +2991,7 @@ enum CXCallingConv { CXCallingConv_AArch64SVEPCS = 18, CXCallingConv_M68kRTD = 19, CXCallingConv_PreserveNone = 20, + CXCallingConv_RISCVVectorCall = 21, CXCallingConv_Invalid = 100, CXCallingConv_Unexposed = 200 diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 318d4e5..80e6075 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3011,6 +3011,13 @@ def PreserveNone : DeclOrTypeAttr, TargetSpecificAttr<TargetAnyX86> { let Documentation = [PreserveNoneDocs]; } +def RISCVVectorCC: DeclOrTypeAttr, TargetSpecificAttr<TargetRISCV> { + let Spellings = [CXX11<"riscv", "vector_cc">, + C23<"riscv", "vector_cc">, + Clang<"riscv_vector_cc">]; + let Documentation = [RISCVVectorCCDocs]; +} + def Target : InheritableAttr { let Spellings = [GCC<"target">]; let Args = [StringArgument<"featuresStr">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 384aebb..3ea4d67 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5494,6 +5494,17 @@ for clang builtin functions. }]; } +def RISCVVectorCCDocs : Documentation { + let Category = DocCatCallingConvs; + let Heading = "riscv::vector_cc, riscv_vector_cc, clang::riscv_vector_cc"; + let Content = [{ +The ``riscv_vector_cc`` attribute can be applied to a function. It preserves 15 +registers namely, v1-v7 and v24-v31 as callee-saved. Callers thus don't need +to save these registers before function calls, and callees only need to save +them if they use them. + }]; +} + def PreferredNameDocs : Documentation { let Category = DocCatDecl; let Content = [{ diff --git a/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td index 27df731..e3263fe 100644 --- a/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td +++ b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td @@ -18,6 +18,7 @@ def err_no_output_file: Error<"no output file specified">; def err_no_such_header_file : Error<"no such %select{public|private|project}1 header file: '%0'">; def warn_no_such_excluded_header_file : Warning<"no such excluded %select{public|private}0 header file: '%1'">, InGroup<InstallAPIViolation>; def warn_glob_did_not_match: Warning<"glob '%0' did not match any header file">, InGroup<InstallAPIViolation>; +def err_no_such_umbrella_header_file : Error<"%select{public|private|project}1 umbrella header file not found in input: '%0'">; } // end of command line category. let CategoryName = "Verification" in { diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h index 8586405..fb11e82 100644 --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -273,29 +273,30 @@ namespace clang { /// CallingConv - Specifies the calling convention that a function uses. enum CallingConv { - CC_C, // __attribute__((cdecl)) - CC_X86StdCall, // __attribute__((stdcall)) - CC_X86FastCall, // __attribute__((fastcall)) - CC_X86ThisCall, // __attribute__((thiscall)) - CC_X86VectorCall, // __attribute__((vectorcall)) - CC_X86Pascal, // __attribute__((pascal)) - CC_Win64, // __attribute__((ms_abi)) - CC_X86_64SysV, // __attribute__((sysv_abi)) - CC_X86RegCall, // __attribute__((regcall)) - CC_AAPCS, // __attribute__((pcs("aapcs"))) - CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) - CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) - CC_SpirFunction, // default for OpenCL functions on SPIR target - CC_OpenCLKernel, // inferred for OpenCL kernels - CC_Swift, // __attribute__((swiftcall)) + CC_C, // __attribute__((cdecl)) + CC_X86StdCall, // __attribute__((stdcall)) + CC_X86FastCall, // __attribute__((fastcall)) + CC_X86ThisCall, // __attribute__((thiscall)) + CC_X86VectorCall, // __attribute__((vectorcall)) + CC_X86Pascal, // __attribute__((pascal)) + CC_Win64, // __attribute__((ms_abi)) + CC_X86_64SysV, // __attribute__((sysv_abi)) + CC_X86RegCall, // __attribute__((regcall)) + CC_AAPCS, // __attribute__((pcs("aapcs"))) + CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) + CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) + CC_SpirFunction, // default for OpenCL functions on SPIR target + CC_OpenCLKernel, // inferred for OpenCL kernels + CC_Swift, // __attribute__((swiftcall)) CC_SwiftAsync, // __attribute__((swiftasynccall)) - CC_PreserveMost, // __attribute__((preserve_most)) - CC_PreserveAll, // __attribute__((preserve_all)) + CC_PreserveMost, // __attribute__((preserve_most)) + CC_PreserveAll, // __attribute__((preserve_all)) CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) - CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) - CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) - CC_M68kRTD, // __attribute__((m68k_rtd)) - CC_PreserveNone, // __attribute__((preserve_none)) + CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) + CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) + CC_M68kRTD, // __attribute__((m68k_rtd)) + CC_PreserveNone, // __attribute__((preserve_none)) + CC_RISCVVectorCall, // __attribute__((riscv_vector_cc)) }; /// Checks whether the given calling convention supports variadic diff --git a/clang/include/clang/InstallAPI/HeaderFile.h b/clang/include/clang/InstallAPI/HeaderFile.h index 235b4da..c67503d 100644 --- a/clang/include/clang/InstallAPI/HeaderFile.h +++ b/clang/include/clang/InstallAPI/HeaderFile.h @@ -24,8 +24,6 @@ namespace clang::installapi { enum class HeaderType { - /// Unset or unknown type. - Unknown, /// Represents declarations accessible to all clients. Public, /// Represents declarations accessible to a disclosed set of clients. @@ -33,6 +31,8 @@ enum class HeaderType { /// Represents declarations only accessible as implementation details to the /// input library. Project, + /// Unset or unknown type. + Unknown, }; inline StringRef getName(const HeaderType T) { @@ -62,6 +62,8 @@ class HeaderFile { bool Excluded{false}; /// Add header file to processing. bool Extra{false}; + /// Specify that header file is the umbrella header for library. + bool Umbrella{false}; public: HeaderFile() = delete; @@ -79,17 +81,21 @@ public: void setExtra(bool V = true) { Extra = V; } void setExcluded(bool V = true) { Excluded = V; } + void setUmbrellaHeader(bool V = true) { Umbrella = V; } bool isExtra() const { return Extra; } bool isExcluded() const { return Excluded; } + bool isUmbrellaHeader() const { return Umbrella; } bool useIncludeName() const { return Type != HeaderType::Project && !IncludeName.empty(); } bool operator==(const HeaderFile &Other) const { - return std::tie(Type, FullPath, IncludeName, Language, Excluded, Extra) == - std::tie(Other.Type, Other.FullPath, Other.IncludeName, - Other.Language, Other.Excluded, Other.Extra); + return std::tie(Type, FullPath, IncludeName, Language, Excluded, Extra, + Umbrella) == std::tie(Other.Type, Other.FullPath, + Other.IncludeName, Other.Language, + Other.Excluded, Other.Extra, + Other.Umbrella); } }; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5ecd2f9..3a1abd4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2234,7 +2234,8 @@ private: bool CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum); bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall); - void checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D); + void checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D, + const llvm::StringMap<bool> &FeatureMap); bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall); bool CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI, diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index bf46766..5fe5c92 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -908,7 +908,7 @@ def PaddingChecker : Checker<"Padding">, "24", Released> ]>, - Documentation<NotDocumented>; + Documentation<HasDocumentation>; } // end: "padding" diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index f619d65..425f84e 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3445,6 +3445,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) { case CC_PreserveAll: case CC_M68kRTD: case CC_PreserveNone: + case CC_RISCVVectorCall: // FIXME: we should be mangling all of the above. return ""; diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index d2ffb23..8f3e26d 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3484,6 +3484,9 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { case CC_PreserveAll: return "preserve_all"; case CC_M68kRTD: return "m68k_rtd"; case CC_PreserveNone: return "preserve_none"; + // clang-format off + case CC_RISCVVectorCall: return "riscv_vector_cc"; + // clang-format on } llvm_unreachable("Invalid calling convention."); @@ -4074,6 +4077,7 @@ bool AttributedType::isCallingConv() const { case attr::PreserveAll: case attr::M68kRTD: case attr::PreserveNone: + case attr::RISCVVectorCC: return true; } llvm_unreachable("invalid attr kind"); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index f176d04..0aa1d93 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1071,6 +1071,9 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, case CC_PreserveNone: OS << " __attribute__((preserve_none))"; break; + case CC_RISCVVectorCall: + OS << "__attribute__((riscv_vector_cc))"; + break; } } @@ -1960,6 +1963,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, case attr::PreserveNone: OS << "preserve_none"; break; + case attr::RISCVVectorCC: + OS << "riscv_vector_cc"; + break; case attr::NoDeref: OS << "noderef"; break; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index a6d4af2..f3d705e 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -467,3 +467,14 @@ ParsedTargetAttr RISCVTargetInfo::parseTargetAttr(StringRef Features) const { } return Ret; } + +TargetInfo::CallingConvCheckResult +RISCVTargetInfo::checkCallingConvention(CallingConv CC) const { + switch (CC) { + default: + return CCCR_Warning; + case CC_C: + case CC_RISCVVectorCall: + return CCCR_OK; + } +} diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index bfbdafb..78580b5 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -110,6 +110,8 @@ public: bool hasBFloat16Type() const override { return true; } + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override; + bool useFP16ConversionIntrinsics() const override { return false; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3cfdb26..fdb517e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -792,7 +792,8 @@ EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; - return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); + return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}), + ArgValue); } /// Checks if using the result of __builtin_object_size(p, @p From) in place of @@ -3018,7 +3019,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_va_copy: { Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); - Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr}); + Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}), + {DstPtr, SrcPtr}); return RValue::get(nullptr); } case Builtin::BIabs: diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 475d96b..b8adf5c 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -74,6 +74,9 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_SwiftAsync: return llvm::CallingConv::SwiftTail; case CC_M68kRTD: return llvm::CallingConv::M68k_RTD; case CC_PreserveNone: return llvm::CallingConv::PreserveNone; + // clang-format off + case CC_RISCVVectorCall: return llvm::CallingConv::RISCV_VectorCall; + // clang-format on } } @@ -260,6 +263,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, if (D->hasAttr<PreserveNoneAttr>()) return CC_PreserveNone; + if (D->hasAttr<RISCVVectorCCAttr>()) + return CC_RISCVVectorCall; + return CC_C; } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 0e20de2..2a385d8 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1452,6 +1452,8 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_LLVM_M68kRTD; case CC_PreserveNone: return llvm::dwarf::DW_CC_LLVM_PreserveNone; + case CC_RISCVVectorCall: + return llvm::dwarf::DW_CC_LLVM_RISCVVectorCall; } return 0; } diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index b55f433..72393be 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2065,8 +2065,11 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { targetDiag(D->getLocation(), diag::note_defined_here, FD) << D; } - if (TI.hasRISCVVTypes() && Ty->isRVVSizelessBuiltinType()) - checkRVVTypeSupport(Ty, Loc, D); + if (TI.hasRISCVVTypes() && Ty->isRVVSizelessBuiltinType() && FD) { + llvm::StringMap<bool> CallerFeatureMap; + Context.getFunctionFeatureMap(CallerFeatureMap, FD); + checkRVVTypeSupport(Ty, Loc, D, CallerFeatureMap); + } // Don't allow SVE types in functions without a SVE target. if (Ty->isSVESizelessBuiltinType() && FD && FD->hasBody()) { diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index 836c633..a312830 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -52,49 +52,54 @@ static void applyNullability(Sema &S, Decl *D, NullabilityKind Nullability, if (!Metadata.IsActive) return; - auto IsModified = [&](Decl *D, QualType QT, - NullabilityKind Nullability) -> bool { + auto GetModified = + [&](Decl *D, QualType QT, + NullabilityKind Nullability) -> std::optional<QualType> { QualType Original = QT; S.CheckImplicitNullabilityTypeSpecifier(QT, Nullability, D->getLocation(), isa<ParmVarDecl>(D), /*OverrideExisting=*/true); - return QT.getTypePtr() != Original.getTypePtr(); + return (QT.getTypePtr() != Original.getTypePtr()) ? std::optional(QT) + : std::nullopt; }; if (auto Function = dyn_cast<FunctionDecl>(D)) { - if (IsModified(D, Function->getReturnType(), Nullability)) { - QualType FnType = Function->getType(); - Function->setType(FnType); + if (auto Modified = + GetModified(D, Function->getReturnType(), Nullability)) { + const FunctionType *FnType = Function->getType()->castAs<FunctionType>(); + if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(FnType)) + Function->setType(S.Context.getFunctionType( + *Modified, proto->getParamTypes(), proto->getExtProtoInfo())); + else + Function->setType( + S.Context.getFunctionNoProtoType(*Modified, FnType->getExtInfo())); } } else if (auto Method = dyn_cast<ObjCMethodDecl>(D)) { - QualType Type = Method->getReturnType(); - if (IsModified(D, Type, Nullability)) { - Method->setReturnType(Type); + if (auto Modified = GetModified(D, Method->getReturnType(), Nullability)) { + Method->setReturnType(*Modified); // Make it a context-sensitive keyword if we can. - if (!isIndirectPointerType(Type)) + if (!isIndirectPointerType(*Modified)) Method->setObjCDeclQualifier(Decl::ObjCDeclQualifier( Method->getObjCDeclQualifier() | Decl::OBJC_TQ_CSNullability)); } } else if (auto Value = dyn_cast<ValueDecl>(D)) { - QualType Type = Value->getType(); - if (IsModified(D, Type, Nullability)) { - Value->setType(Type); + if (auto Modified = GetModified(D, Value->getType(), Nullability)) { + Value->setType(*Modified); // Make it a context-sensitive keyword if we can. if (auto Parm = dyn_cast<ParmVarDecl>(D)) { - if (Parm->isObjCMethodParameter() && !isIndirectPointerType(Type)) + if (Parm->isObjCMethodParameter() && !isIndirectPointerType(*Modified)) Parm->setObjCDeclQualifier(Decl::ObjCDeclQualifier( Parm->getObjCDeclQualifier() | Decl::OBJC_TQ_CSNullability)); } } } else if (auto Property = dyn_cast<ObjCPropertyDecl>(D)) { - QualType Type = Property->getType(); - if (IsModified(D, Type, Nullability)) { - Property->setType(Type, Property->getTypeSourceInfo()); + if (auto Modified = GetModified(D, Property->getType(), Nullability)) { + Property->setType(*Modified, Property->getTypeSourceInfo()); // Make it a property attribute if we can. - if (!isIndirectPointerType(Type)) + if (!isIndirectPointerType(*Modified)) Property->setPropertyAttributes( ObjCPropertyAttribute::kind_null_resettable); } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 0844958..447e736 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5760,57 +5760,6 @@ static bool CheckInvalidVLENandLMUL(const TargetInfo &TI, CallExpr *TheCall, bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { - // CodeGenFunction can also detect this, but this gives a better error - // message. - bool FeatureMissing = false; - SmallVector<StringRef> ReqFeatures; - StringRef Features = Context.BuiltinInfo.getRequiredFeatures(BuiltinID); - Features.split(ReqFeatures, ',', -1, false); - - // Check if each required feature is included - for (StringRef F : ReqFeatures) { - SmallVector<StringRef> ReqOpFeatures; - F.split(ReqOpFeatures, '|'); - - if (llvm::none_of(ReqOpFeatures, - [&TI](StringRef OF) { return TI.hasFeature(OF); })) { - std::string FeatureStrs; - bool IsExtension = true; - for (StringRef OF : ReqOpFeatures) { - // If the feature is 64bit, alter the string so it will print better in - // the diagnostic. - if (OF == "64bit") { - assert(ReqOpFeatures.size() == 1 && "Expected '64bit' to be alone"); - OF = "RV64"; - IsExtension = false; - } - if (OF == "32bit") { - assert(ReqOpFeatures.size() == 1 && "Expected '32bit' to be alone"); - OF = "RV32"; - IsExtension = false; - } - - // Convert features like "zbr" and "experimental-zbr" to "Zbr". - OF.consume_front("experimental-"); - std::string FeatureStr = OF.str(); - FeatureStr[0] = std::toupper(FeatureStr[0]); - // Combine strings. - FeatureStrs += FeatureStrs.empty() ? "" : ", "; - FeatureStrs += "'"; - FeatureStrs += FeatureStr; - FeatureStrs += "'"; - } - // Error message - FeatureMissing = true; - Diag(TheCall->getBeginLoc(), diag::err_riscv_builtin_requires_extension) - << IsExtension - << TheCall->getSourceRange() << StringRef(FeatureStrs); - } - } - - if (FeatureMissing) - return true; - // vmulh.vv, vmulh.vx, vmulhu.vv, vmulhu.vx, vmulhsu.vv, vmulhsu.vx, // vsmul.vv, vsmul.vx are not included for EEW=64 in Zve64*. switch (BuiltinID) { @@ -6714,36 +6663,35 @@ bool Sema::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI, return false; } -void Sema::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D) { - const TargetInfo &TI = Context.getTargetInfo(); - +void Sema::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D, + const llvm::StringMap<bool> &FeatureMap) { ASTContext::BuiltinVectorTypeInfo Info = Context.getBuiltinVectorTypeInfo(Ty->castAs<BuiltinType>()); unsigned EltSize = Context.getTypeSize(Info.ElementType); unsigned MinElts = Info.EC.getKnownMinValue(); if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Double) && - !TI.hasFeature("zve64d")) + !FeatureMap.lookup("zve64d")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64d"; // (ELEN, LMUL) pairs of (8, mf8), (16, mf4), (32, mf2), (64, m1) requires at // least zve64x else if (((EltSize == 64 && Info.ElementType->isIntegerType()) || MinElts == 1) && - !TI.hasFeature("zve64x")) + !FeatureMap.lookup("zve64x")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64x"; - else if (Info.ElementType->isFloat16Type() && !TI.hasFeature("zvfh") && - !TI.hasFeature("zvfhmin")) + else if (Info.ElementType->isFloat16Type() && !FeatureMap.lookup("zvfh") && + !FeatureMap.lookup("zvfhmin")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zvfh or zvfhmin"; else if (Info.ElementType->isBFloat16Type() && - !TI.hasFeature("experimental-zvfbfmin")) + !FeatureMap.lookup("experimental-zvfbfmin")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zvfbfmin"; else if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Float) && - !TI.hasFeature("zve32f")) + !FeatureMap.lookup("zve32f")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32f"; // Given that caller already checked isRVVType() before calling this function, // if we don't have at least zve32x supported, then we need to emit error. - else if (!TI.hasFeature("zve32x")) + else if (!FeatureMap.lookup("zve32x")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32x"; } diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 1c546e9..b6c4d3d 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -1269,10 +1269,18 @@ substituteParameterMappings(Sema &S, NormalizedConstraint &N, : SourceLocation())); Atomic.ParameterMapping.emplace(TempArgs, OccurringIndices.count()); } + SourceLocation InstLocBegin = + ArgsAsWritten->arguments().empty() + ? ArgsAsWritten->getLAngleLoc() + : ArgsAsWritten->arguments().front().getSourceRange().getBegin(); + SourceLocation InstLocEnd = + ArgsAsWritten->arguments().empty() + ? ArgsAsWritten->getRAngleLoc() + : ArgsAsWritten->arguments().front().getSourceRange().getEnd(); Sema::InstantiatingTemplate Inst( - S, ArgsAsWritten->arguments().front().getSourceRange().getBegin(), + S, InstLocBegin, Sema::InstantiatingTemplate::ParameterMappingSubstitution{}, Concept, - ArgsAsWritten->arguments().front().getSourceRange()); + {InstLocBegin, InstLocEnd}); if (S.SubstTemplateArguments(*Atomic.ParameterMapping, MLTAL, SubstArgs)) return true; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 66aad25..8b44d24 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8962,8 +8962,13 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { } } - if (T->isRVVSizelessBuiltinType()) - checkRVVTypeSupport(T, NewVD->getLocation(), cast<Decl>(CurContext)); + if (T->isRVVSizelessBuiltinType() && isa<FunctionDecl>(CurContext)) { + const FunctionDecl *FD = cast<FunctionDecl>(CurContext); + llvm::StringMap<bool> CallerFeatureMap; + Context.getFunctionFeatureMap(CallerFeatureMap, FD); + checkRVVTypeSupport(T, NewVD->getLocation(), cast<Decl>(CurContext), + CallerFeatureMap); + } } /// Perform semantic checking on a newly-created variable diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 0a62c65..f25f3af 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5271,6 +5271,9 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { case ParsedAttr::AT_PreserveNone: D->addAttr(::new (S.Context) PreserveNoneAttr(S.Context, AL)); return; + case ParsedAttr::AT_RISCVVectorCC: + D->addAttr(::new (S.Context) RISCVVectorCCAttr(S.Context, AL)); + return; default: llvm_unreachable("unexpected attribute kind"); } @@ -5475,6 +5478,9 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, case ParsedAttr::AT_PreserveNone: CC = CC_PreserveNone; break; + case ParsedAttr::AT_RISCVVectorCC: + CC = CC_RISCVVectorCall; + break; default: llvm_unreachable("unexpected attribute kind"); } @@ -9637,6 +9643,7 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_AMDGPUKernelCall: case ParsedAttr::AT_M68kRTD: case ParsedAttr::AT_PreserveNone: + case ParsedAttr::AT_RISCVVectorCC: handleCallConvAttr(S, D, AL); break; case ParsedAttr::AT_Suppress: diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp index 4636d89..f9e1ad0 100644 --- a/clang/lib/Sema/SemaObjCProperty.cpp +++ b/clang/lib/Sema/SemaObjCProperty.cpp @@ -638,8 +638,6 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S, PDecl->setInvalidDecl(); } - ProcessDeclAttributes(S, PDecl, FD.D); - // Regardless of setter/getter attribute, we save the default getter/setter // selector names in anticipation of declaration of setter/getter methods. PDecl->setGetterName(GetterSel, GetterNameLoc); @@ -647,6 +645,8 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S, PDecl->setPropertyAttributesAsWritten( makePropertyAttributesAsWritten(AttributesAsWritten)); + ProcessDeclAttributes(S, PDecl, FD.D); + if (Attributes & ObjCPropertyAttribute::kind_readonly) PDecl->setPropertyAttributes(ObjCPropertyAttribute::kind_readonly); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index d7521a5..fd94caa 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -138,7 +138,8 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_PreserveMost: \ case ParsedAttr::AT_PreserveAll: \ case ParsedAttr::AT_M68kRTD: \ - case ParsedAttr::AT_PreserveNone + case ParsedAttr::AT_PreserveNone: \ + case ParsedAttr::AT_RISCVVectorCC // Function type attributes. #define FUNCTION_TYPE_ATTRS_CASELIST \ @@ -7939,6 +7940,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr<M68kRTDAttr>(Ctx, Attr); case ParsedAttr::AT_PreserveNone: return createSimpleAttr<PreserveNoneAttr>(Ctx, Attr); + case ParsedAttr::AT_RISCVVectorCC: + return createSimpleAttr<RISCVVectorCCAttr>(Ctx, Attr); } llvm_unreachable("unexpected attribute kind!"); } diff --git a/clang/test/APINotes/Inputs/APINotes/SomeOtherKit.apinotes b/clang/test/APINotes/Inputs/APINotes/SomeOtherKit.apinotes new file mode 100644 index 0000000..ccdc4e1 --- /dev/null +++ b/clang/test/APINotes/Inputs/APINotes/SomeOtherKit.apinotes @@ -0,0 +1,8 @@ +Name: SomeOtherKit +Classes: + - Name: A + Methods: + - Selector: "methodB" + MethodKind: Instance + Availability: none + AvailabilityMsg: "anything but this" diff --git a/clang/test/APINotes/Inputs/BrokenHeaders/APINotes.apinotes b/clang/test/APINotes/Inputs/BrokenHeaders/APINotes.apinotes new file mode 100644 index 0000000..cd5475b --- /dev/null +++ b/clang/test/APINotes/Inputs/BrokenHeaders/APINotes.apinotes @@ -0,0 +1,5 @@ +Name: SomeBrokenLib +Functions: + - Name: do_something_with_pointers + Nu llabilityOfRet: O + # the space is intentional, to make sure we don't crash on malformed API Notes diff --git a/clang/test/APINotes/Inputs/BrokenHeaders/SomeBrokenLib.h b/clang/test/APINotes/Inputs/BrokenHeaders/SomeBrokenLib.h new file mode 100644 index 0000000..b09c6f6 --- /dev/null +++ b/clang/test/APINotes/Inputs/BrokenHeaders/SomeBrokenLib.h @@ -0,0 +1,6 @@ +#ifndef SOME_BROKEN_LIB_H +#define SOME_BROKEN_LIB_H + +void do_something_with_pointers(int *ptr1, int *ptr2); + +#endif // SOME_BROKEN_LIB_H diff --git a/clang/test/APINotes/Inputs/BrokenHeaders2/APINotes.apinotes b/clang/test/APINotes/Inputs/BrokenHeaders2/APINotes.apinotes new file mode 100644 index 0000000..33eeaaa --- /dev/null +++ b/clang/test/APINotes/Inputs/BrokenHeaders2/APINotes.apinotes @@ -0,0 +1,7 @@ +Name: SomeBrokenLib +Functions: + - Name: do_something_with_pointers + NullabilityOfRet: O + - Name: do_something_with_pointers + NullabilityOfRet: O + diff --git a/clang/test/APINotes/Inputs/BrokenHeaders2/SomeBrokenLib.h b/clang/test/APINotes/Inputs/BrokenHeaders2/SomeBrokenLib.h new file mode 100644 index 0000000..b09c6f6 --- /dev/null +++ b/clang/test/APINotes/Inputs/BrokenHeaders2/SomeBrokenLib.h @@ -0,0 +1,6 @@ +#ifndef SOME_BROKEN_LIB_H +#define SOME_BROKEN_LIB_H + +void do_something_with_pointers(int *ptr1, int *ptr2); + +#endif // SOME_BROKEN_LIB_H diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Headers/FrameworkWithActualPrivateModule.h b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Headers/FrameworkWithActualPrivateModule.h new file mode 100644 index 0000000..523de4f --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Headers/FrameworkWithActualPrivateModule.h @@ -0,0 +1 @@ +extern int FrameworkWithActualPrivateModule; diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.modulemap new file mode 100644 index 0000000..859d723 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module FrameworkWithActualPrivateModule { + umbrella header "FrameworkWithActualPrivateModule.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.private.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.private.modulemap new file mode 100644 index 0000000..e7fafe3 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.private.modulemap @@ -0,0 +1,5 @@ +framework module FrameworkWithActualPrivateModule_Private { + umbrella header "FrameworkWithActualPrivateModule_Private.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.apinotes new file mode 100644 index 0000000..831cf1e --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.apinotes @@ -0,0 +1 @@ +Name: FrameworkWithActualPrivateModule_Private diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.h b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.h new file mode 100644 index 0000000..c07a3e9 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.h @@ -0,0 +1,2 @@ +#include <FrameworkWithActualPrivateModule/FrameworkWithActualPrivateModule.h> +extern int FrameworkWithActualPrivateModule_Private; diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Headers/FrameworkWithWrongCase.h b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Headers/FrameworkWithWrongCase.h new file mode 100644 index 0000000..4f3b631 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Headers/FrameworkWithWrongCase.h @@ -0,0 +1 @@ +extern int FrameworkWithWrongCase; diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Modules/module.modulemap new file mode 100644 index 0000000..e97d361 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module FrameworkWithWrongCase { + umbrella header "FrameworkWithWrongCase.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/PrivateHeaders/FrameworkWithWrongCase_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/PrivateHeaders/FrameworkWithWrongCase_Private.apinotes new file mode 100644 index 0000000..ae5447c --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/PrivateHeaders/FrameworkWithWrongCase_Private.apinotes @@ -0,0 +1 @@ +Name: FrameworkWithWrongCase diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Headers/FrameworkWithWrongCasePrivate.h b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Headers/FrameworkWithWrongCasePrivate.h new file mode 100644 index 0000000..d3d6148 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Headers/FrameworkWithWrongCasePrivate.h @@ -0,0 +1 @@ +extern int FrameworkWithWrongCasePrivate; diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.modulemap new file mode 100644 index 0000000..04b96ad --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module FrameworkWithWrongCasePrivate { + umbrella header "FrameworkWithWrongCasePrivate.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.private.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.private.modulemap new file mode 100644 index 0000000..d6ad53c --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.private.modulemap @@ -0,0 +1 @@ +module FrameworkWithWrongCasePrivate.Inner {} diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/PrivateHeaders/FrameworkWithWrongCasePrivate_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/PrivateHeaders/FrameworkWithWrongCasePrivate_Private.apinotes new file mode 100644 index 0000000..d7af293 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/PrivateHeaders/FrameworkWithWrongCasePrivate_Private.apinotes @@ -0,0 +1 @@ +Name: FrameworkWithWrongCasePrivate diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Headers/LayeredKit.h b/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Headers/LayeredKit.h new file mode 100644 index 0000000..a95d19e --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Headers/LayeredKit.h @@ -0,0 +1,11 @@ +@import LayeredKitImpl; + +// @interface declarations already don't inherit attributes from forward +// declarations, so in order to test this properly we have to /not/ define +// UpwardClass anywhere. + +// @interface UpwardClass +// @end + +@protocol UpwardProto +@end diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Modules/module.modulemap new file mode 100644 index 0000000..04bbe72 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module LayeredKit { + umbrella header "LayeredKit.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.apinotes b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.apinotes new file mode 100644 index 0000000..bece28c --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.apinotes @@ -0,0 +1,9 @@ +Name: LayeredKitImpl +Classes: +- Name: PerfectlyNormalClass + Availability: none +- Name: UpwardClass + Availability: none +Protocols: +- Name: UpwardProto + Availability: none diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.h b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.h new file mode 100644 index 0000000..99591d3 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.h @@ -0,0 +1,7 @@ +@protocol UpwardProto; +@class UpwardClass; + +@interface PerfectlyNormalClass +@end + +void doImplementationThings(UpwardClass *first, id <UpwardProto> second) __attribute((unavailable)); diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Modules/module.modulemap new file mode 100644 index 0000000..58a6e55 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module LayeredKitImpl { + umbrella header "LayeredKitImpl.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Modules/module.modulemap new file mode 100644 index 0000000..2d07e76 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module SimpleKit { + umbrella header "SimpleKit.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit.apinotes new file mode 100644 index 0000000..817af12 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit.apinotes @@ -0,0 +1,74 @@ +Name: SomeKit +Classes: + - Name: A + Methods: + - Selector: "transform:" + MethodKind: Instance + Availability: none + AvailabilityMsg: "anything but this" + - Selector: "transform:integer:" + MethodKind: Instance + NullabilityOfRet: N + Nullability: [ N, S ] + Properties: + - Name: intValue + PropertyKind: Instance + Availability: none + AvailabilityMsg: "wouldn't work anyway" + - Name: nonnullAInstance + PropertyKind: Instance + Nullability: N + - Name: nonnullAClass + PropertyKind: Class + Nullability: N + - Name: nonnullABoth + Nullability: N + - Name: B + Availability: none + AvailabilityMsg: "just don't" + - Name: C + Methods: + - Selector: "initWithA:" + MethodKind: Instance + DesignatedInit: true + - Name: OverriddenTypes + Methods: + - Selector: "methodToMangle:second:" + MethodKind: Instance + ResultType: 'char *' + Parameters: + - Position: 0 + Type: 'SOMEKIT_DOUBLE *' + - Position: 1 + Type: 'float *' + Properties: + - Name: intPropertyToMangle + PropertyKind: Instance + Type: 'double *' +Functions: + - Name: global_int_fun + ResultType: 'char *' + Parameters: + - Position: 0 + Type: 'double *' + - Position: 1 + Type: 'float *' +Globals: + - Name: global_int_ptr + Type: 'double *' +SwiftVersions: + - Version: 3.0 + Classes: + - Name: A + Methods: + - Selector: "transform:integer:" + MethodKind: Instance + NullabilityOfRet: O + Nullability: [ O, S ] + Properties: + - Name: explicitNonnullInstance + PropertyKind: Instance + Nullability: O + - Name: explicitNullableInstance + PropertyKind: Instance + Nullability: N diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit_private.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit_private.apinotes new file mode 100644 index 0000000..28ede9d --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit_private.apinotes @@ -0,0 +1,15 @@ +Name: SomeKit +Classes: + - Name: A + Methods: + - Selector: "privateTransform:input:" + MethodKind: Instance + NullabilityOfRet: N + Nullability: [ N, S ] + Properties: + - Name: internalProperty + Nullability: N +Protocols: + - Name: InternalProtocol + Availability: none + AvailabilityMsg: "not for you" diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitForNullAnnotation.h b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitForNullAnnotation.h new file mode 100644 index 0000000..bc0c5da --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitForNullAnnotation.h @@ -0,0 +1,55 @@ +#ifndef SOMEKIT_H +#define SOMEKIT_H + +#define ROOT_CLASS __attribute__((objc_root_class)) + +ROOT_CLASS +@interface A +-(A*)transform:(A*)input; +-(A*)transform:(A*)input integer:(int)integer; + +@property (nonatomic, readonly, retain) A* someA; +@property (nonatomic, retain) A* someOtherA; + +@property (nonatomic) int intValue; +@end + +@interface B : A +@end + +@interface C : A +- (instancetype)init; +- (instancetype)initWithA:(A*)a; +@end + + +@interface MyClass : A +- Inst; ++ Clas; +@end + +struct CGRect { + float origin; + float size; +}; +typedef struct CGRect NSRect; + +@interface I +- (void) Meth : (NSRect[4])exposedRects; +- (void) Meth1 : (const I*)exposedRects; +- (void) Meth2 : (const I*)exposedRects; +- (void) Meth3 : (I*)exposedRects; +- (const I*) Meth4; +- (const I*) Meth5 : (int) Arg1 : (const I*)Arg2 : (double)Arg3 : (const I*) Arg4 :(const volatile id) Arg5; +- (volatile const I*) Meth6 : (const char *)Arg1 : (const char *)Arg2 : (double)Arg3 : (const I*) Arg4 :(const volatile id) Arg5; +@end + +@class NSURL, NSArray, NSError; +@interface INTF_BLOCKS + + (void)getNonLocalVersionsOfItemAtURL:(NSURL *)url completionHandler:(void (^)(NSArray *nonLocalFileVersions, NSError *error))completionHandler; + + (void *)getNonLocalVersionsOfItemAtURL2:(NSURL *)url completionHandler:(void (^)(NSArray *nonLocalFileVersions, NSError *error))completionHandler; + + (NSError **)getNonLocalVersionsOfItemAtURL3:(int)url completionHandler:(void (^)(NSArray *nonLocalFileVersions, NSError *error))completionHandler; + + (id)getNonLocalVersionsOfItemAtURL4:(NSURL *)url completionHandler:(void (^)(int nonLocalFileVersions, NSError *error, NSURL*))completionHandler; +@end + +#endif diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.modulemap new file mode 100644 index 0000000..3abee2d --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module SomeKit { + umbrella header "SomeKit.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.private.modulemap b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.private.modulemap new file mode 100644 index 0000000..bbda9d0 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.private.modulemap @@ -0,0 +1,8 @@ +module SomeKit.Private { + header "SomeKit_Private.h" + export * + + explicit module NullAnnotation { + header "SomeKit_PrivateForNullAnnotation.h" + } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module_private.modulemap b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module_private.modulemap new file mode 100644 index 0000000..e310343 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module_private.modulemap @@ -0,0 +1,8 @@ +explicit framework module SomeKit.Private { + header "SomeKit_Private.h" + explicit NullAnnotation { header "SomeKit_PrivateForNullAnnotation.h" } + export * + module * { export * } +syntax error + +} diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_Private.h b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_Private.h new file mode 100644 index 0000000..c761112 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_Private.h @@ -0,0 +1,16 @@ +#ifndef SOMEKIT_PRIVATE_H +#define SOMEKIT_PRIVATE_H + +#import <SomeKit/SomeKit.h> + +@interface A(Private) +-(A*)privateTransform:(A*)input; + +@property (nonatomic) A* internalProperty; +@end + +@protocol InternalProtocol +@end + +#endif + diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_PrivateForNullAnnotation.h b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_PrivateForNullAnnotation.h new file mode 100644 index 0000000..bae4456 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_PrivateForNullAnnotation.h @@ -0,0 +1,17 @@ +#ifndef SOMEKIT_PRIVATE_H +#define SOMEKIT_PRIVATE_H + +#import <SomeKit/SomeKitForNullAnnotation.h> + +@interface A(Private) +-(A*)privateTransform:(A*)input; + +@property (nonatomic) A* internalProperty; +@end + +@protocol InternalProtocol +- (id) MomeMethod; +@end + +#endif + diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_private.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_private.apinotes new file mode 100644 index 0000000..28ede9d --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_private.apinotes @@ -0,0 +1,15 @@ +Name: SomeKit +Classes: + - Name: A + Methods: + - Selector: "privateTransform:input:" + MethodKind: Instance + NullabilityOfRet: N + Nullability: [ N, S ] + Properties: + - Name: internalProperty + Nullability: N +Protocols: + - Name: InternalProtocol + Availability: none + AvailabilityMsg: "not for you" diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/APINotes/SomeOtherKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/APINotes/SomeOtherKit.apinotes new file mode 100644 index 0000000..2ad546b --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/APINotes/SomeOtherKit.apinotes @@ -0,0 +1,8 @@ +Name: SomeOtherKit +Classes: + - Name: A + Methods: + - Selector: "methodA" + MethodKind: Instance + Availability: none + AvailabilityMsg: "anything but this" diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.apinotes new file mode 100644 index 0000000..2ad546b --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.apinotes @@ -0,0 +1,8 @@ +Name: SomeOtherKit +Classes: + - Name: A + Methods: + - Selector: "methodA" + MethodKind: Instance + Availability: none + AvailabilityMsg: "anything but this" diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.h b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.h new file mode 100644 index 0000000..3911d76 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.h @@ -0,0 +1,9 @@ +#ifndef SOME_OTHER_KIT_H + +__attribute__((objc_root_class)) +@interface A +-(void)methodA; +-(void)methodB; +@end + +#endif diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Modules/module.modulemap new file mode 100644 index 0000000..0aaad92 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module SomeOtherKit { + umbrella header "SomeOtherKit.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit.h b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit.h new file mode 100644 index 0000000..d3376f1 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit.h @@ -0,0 +1 @@ +extern int TopLevelPrivateKit_Public; diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit_Private.apinotes new file mode 100644 index 0000000..ece1dd2 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit_Private.apinotes @@ -0,0 +1 @@ +garbage here because this file shouldn't get read
\ No newline at end of file diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.modulemap new file mode 100644 index 0000000..70faa54 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module TopLevelPrivateKit { + umbrella header "TopLevelPrivateKit.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.private.modulemap b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.private.modulemap new file mode 100644 index 0000000..0958a14 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.private.modulemap @@ -0,0 +1,5 @@ +framework module TopLevelPrivateKit_Private { + umbrella header "TopLevelPrivateKit_Private.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit.apinotes new file mode 100644 index 0000000..908dae0 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit.apinotes @@ -0,0 +1 @@ +garbage here because this file shouldn't get read diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.apinotes new file mode 100644 index 0000000..4332362 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.apinotes @@ -0,0 +1,4 @@ +Name: TopLevelPrivateKit_Private +Globals: +- Name: TopLevelPrivateKit_Private + Type: float diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.h b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.h new file mode 100644 index 0000000..39cbfe6 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.h @@ -0,0 +1 @@ +extern int TopLevelPrivateKit_Private; diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private_private.apinotes b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private_private.apinotes new file mode 100644 index 0000000..ece1dd2 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private_private.apinotes @@ -0,0 +1 @@ +garbage here because this file shouldn't get read
\ No newline at end of file diff --git a/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.apinotes new file mode 100644 index 0000000..572c714 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.apinotes @@ -0,0 +1,156 @@ +Name: VersionedKit +Classes: + - Name: TestProperties + SwiftObjCMembers: true + Properties: + - Name: accessorsOnly + PropertyKind: Instance + SwiftImportAsAccessors: true + - Name: accessorsOnlyForClass + PropertyKind: Class + SwiftImportAsAccessors: true + - Name: accessorsOnlyExceptInVersion3 + PropertyKind: Instance + SwiftImportAsAccessors: true + - Name: accessorsOnlyForClassExceptInVersion3 + PropertyKind: Class + SwiftImportAsAccessors: true +Functions: + - Name: unversionedRenameDUMP + SwiftName: 'unversionedRename_NOTES()' +Tags: + - Name: APINotedFlagEnum + FlagEnum: true + - Name: APINotedOpenEnum + EnumExtensibility: open + - Name: APINotedClosedEnum + EnumExtensibility: closed + - Name: SoonToBeCFEnum + EnumKind: CFEnum + - Name: SoonToBeNSEnum + EnumKind: NSEnum + - Name: SoonToBeCFOptions + EnumKind: CFOptions + - Name: SoonToBeNSOptions + EnumKind: NSOptions + - Name: SoonToBeCFClosedEnum + EnumKind: CFClosedEnum + - Name: SoonToBeNSClosedEnum + EnumKind: NSClosedEnum + - Name: UndoAllThatHasBeenDoneToMe + EnumKind: none +Typedefs: + - Name: MultiVersionedTypedef34Notes + SwiftName: MultiVersionedTypedef34Notes_NEW + - Name: MultiVersionedTypedef345Notes + SwiftName: MultiVersionedTypedef345Notes_NEW + - Name: MultiVersionedTypedef4Notes + SwiftName: MultiVersionedTypedef4Notes_NEW + - Name: MultiVersionedTypedef45Notes + SwiftName: MultiVersionedTypedef45Notes_NEW +SwiftVersions: + - Version: 3.0 + Classes: + - Name: MyReferenceType + SwiftBridge: '' + - Name: TestGenericDUMP + SwiftImportAsNonGeneric: true + - Name: TestProperties + SwiftObjCMembers: false + Properties: + - Name: accessorsOnlyInVersion3 + PropertyKind: Instance + SwiftImportAsAccessors: true + - Name: accessorsOnlyForClassInVersion3 + PropertyKind: Class + SwiftImportAsAccessors: true + - Name: accessorsOnlyExceptInVersion3 + PropertyKind: Instance + SwiftImportAsAccessors: false + - Name: accessorsOnlyForClassExceptInVersion3 + PropertyKind: Class + SwiftImportAsAccessors: false + - Name: Swift3RenamedOnlyDUMP + SwiftName: SpecialSwift3Name + - Name: Swift3RenamedAlsoDUMP + SwiftName: SpecialSwift3Also + Functions: + - Name: moveToPointDUMP + SwiftName: 'moveTo(a:b:)' + - Name: acceptClosure + Parameters: + - Position: 0 + NoEscape: false + - Name: privateFunc + SwiftPrivate: false + Tags: + - Name: MyErrorCode + NSErrorDomain: '' + - Name: NewlyFlagEnum + FlagEnum: false + - Name: OpenToClosedEnum + EnumExtensibility: open + - Name: ClosedToOpenEnum + EnumExtensibility: closed + - Name: NewlyClosedEnum + EnumExtensibility: none + - Name: NewlyOpenEnum + EnumExtensibility: none + Typedefs: + - Name: MyDoubleWrapper + SwiftWrapper: none + - Name: MultiVersionedTypedef34 + SwiftName: MultiVersionedTypedef34_3 + - Name: MultiVersionedTypedef34Header + SwiftName: MultiVersionedTypedef34Header_3 + - Name: MultiVersionedTypedef34Notes + SwiftName: MultiVersionedTypedef34Notes_3 + - Name: MultiVersionedTypedef345 + SwiftName: MultiVersionedTypedef345_3 + - Name: MultiVersionedTypedef345Header + SwiftName: MultiVersionedTypedef345Header_3 + - Name: MultiVersionedTypedef345Notes + SwiftName: MultiVersionedTypedef345Notes_3 + - Version: 5 + Typedefs: + - Name: MultiVersionedTypedef345 + SwiftName: MultiVersionedTypedef345_5 + - Name: MultiVersionedTypedef345Header + SwiftName: MultiVersionedTypedef345Header_5 + - Name: MultiVersionedTypedef345Notes + SwiftName: MultiVersionedTypedef345Notes_5 + - Name: MultiVersionedTypedef45 + SwiftName: MultiVersionedTypedef45_5 + - Name: MultiVersionedTypedef45Header + SwiftName: MultiVersionedTypedef45Header_5 + - Name: MultiVersionedTypedef45Notes + SwiftName: MultiVersionedTypedef45Notes_5 + - Version: 4 # Versions are deliberately ordered as "3, 5, 4" to catch bugs. + Classes: + - Name: Swift4RenamedDUMP + SwiftName: SpecialSwift4Name + Typedefs: + - Name: MultiVersionedTypedef34 + SwiftName: MultiVersionedTypedef34_4 + - Name: MultiVersionedTypedef34Header + SwiftName: MultiVersionedTypedef34Header_4 + - Name: MultiVersionedTypedef34Notes + SwiftName: MultiVersionedTypedef34Notes_4 + - Name: MultiVersionedTypedef345 + SwiftName: MultiVersionedTypedef345_4 + - Name: MultiVersionedTypedef345Header + SwiftName: MultiVersionedTypedef345Header_4 + - Name: MultiVersionedTypedef345Notes + SwiftName: MultiVersionedTypedef345Notes_4 + - Name: MultiVersionedTypedef4 + SwiftName: MultiVersionedTypedef4_4 + - Name: MultiVersionedTypedef4Header + SwiftName: MultiVersionedTypedef4Header_4 + - Name: MultiVersionedTypedef4Notes + SwiftName: MultiVersionedTypedef4Notes_4 + - Name: MultiVersionedTypedef45 + SwiftName: MultiVersionedTypedef45_4 + - Name: MultiVersionedTypedef45Header + SwiftName: MultiVersionedTypedef45Header_4 + - Name: MultiVersionedTypedef45Notes + SwiftName: MultiVersionedTypedef45Notes_4 diff --git a/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.h b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.h new file mode 100644 index 0000000..9ce9563 --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.h @@ -0,0 +1,137 @@ +void moveToPointDUMP(double x, double y) __attribute__((swift_name("moveTo(x:y:)"))); + +void unversionedRenameDUMP(void) __attribute__((swift_name("unversionedRename_HEADER()"))); + +void acceptClosure(void (^ __attribute__((noescape)) block)(void)); + +void privateFunc(void) __attribute__((swift_private)); + +typedef double MyDoubleWrapper __attribute__((swift_wrapper(struct))); + +#if __OBJC__ +@class NSString; + +extern NSString *MyErrorDomain; + +enum __attribute__((ns_error_domain(MyErrorDomain))) MyErrorCode { + MyErrorCodeFailed = 1 +}; + +__attribute__((swift_bridge("MyValueType"))) +@interface MyReferenceType +@end + +@interface TestProperties +@property (nonatomic, readwrite, retain) id accessorsOnly; +@property (nonatomic, readwrite, retain, class) id accessorsOnlyForClass; + +@property (nonatomic, readwrite, retain) id accessorsOnlyInVersion3; +@property (nonatomic, readwrite, retain, class) id accessorsOnlyForClassInVersion3; + +@property (nonatomic, readwrite, retain) id accessorsOnlyExceptInVersion3; +@property (nonatomic, readwrite, retain, class) id accessorsOnlyForClassExceptInVersion3; +@end + +@interface Base +@end + +@interface TestGenericDUMP<Element> : Base +- (Element)element; +@end + +@interface Swift3RenamedOnlyDUMP +@end + +__attribute__((swift_name("Swift4Name"))) +@interface Swift3RenamedAlsoDUMP +@end + +@interface Swift4RenamedDUMP +@end + +#endif + + +enum __attribute__((flag_enum)) FlagEnum { + FlagEnumA = 1, + FlagEnumB = 2 +}; + +enum __attribute__((flag_enum)) NewlyFlagEnum { + NewlyFlagEnumA = 1, + NewlyFlagEnumB = 2 +}; + +enum APINotedFlagEnum { + APINotedFlagEnumA = 1, + APINotedFlagEnumB = 2 +}; + + +enum __attribute__((enum_extensibility(open))) OpenEnum { + OpenEnumA = 1, +}; + +enum __attribute__((enum_extensibility(open))) NewlyOpenEnum { + NewlyOpenEnumA = 1, +}; + +enum __attribute__((enum_extensibility(closed))) NewlyClosedEnum { + NewlyClosedEnumA = 1, +}; + +enum __attribute__((enum_extensibility(open))) ClosedToOpenEnum { + ClosedToOpenEnumA = 1, +}; + +enum __attribute__((enum_extensibility(closed))) OpenToClosedEnum { + OpenToClosedEnumA = 1, +}; + +enum APINotedOpenEnum { + APINotedOpenEnumA = 1, +}; + +enum APINotedClosedEnum { + APINotedClosedEnumA = 1, +}; + + +enum SoonToBeCFEnum { + SoonToBeCFEnumA = 1 +}; +enum SoonToBeNSEnum { + SoonToBeNSEnumA = 1 +}; +enum SoonToBeCFOptions { + SoonToBeCFOptionsA = 1 +}; +enum SoonToBeNSOptions { + SoonToBeNSOptionsA = 1 +}; +enum SoonToBeCFClosedEnum { + SoonToBeCFClosedEnumA = 1 +}; +enum SoonToBeNSClosedEnum { + SoonToBeNSClosedEnumA = 1 +}; +enum UndoAllThatHasBeenDoneToMe { + UndoAllThatHasBeenDoneToMeA = 1 +} __attribute__((flag_enum)) __attribute__((enum_extensibility(closed))); + + +typedef int MultiVersionedTypedef4; +typedef int MultiVersionedTypedef4Notes; +typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_NEW"))); + +typedef int MultiVersionedTypedef34; +typedef int MultiVersionedTypedef34Notes; +typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_NEW"))); + +typedef int MultiVersionedTypedef45; +typedef int MultiVersionedTypedef45Notes; +typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_NEW"))); + +typedef int MultiVersionedTypedef345; +typedef int MultiVersionedTypedef345Notes; +typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_NEW"))); diff --git a/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Modules/module.modulemap new file mode 100644 index 0000000..6d957fd --- /dev/null +++ b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Modules/module.modulemap @@ -0,0 +1,5 @@ +framework module VersionedKit { + umbrella header "VersionedKit.h" + export * + module * { export * } +} diff --git a/clang/test/APINotes/Inputs/Headers/APINotes.apinotes b/clang/test/APINotes/Inputs/Headers/APINotes.apinotes new file mode 100644 index 0000000..08210fc --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/APINotes.apinotes @@ -0,0 +1,18 @@ +Name: HeaderLib +SwiftInferImportAsMember: true +Functions: + - Name: custom_realloc + NullabilityOfRet: N + Nullability: [ N, S ] + - Name: unavailable_function + Availability: none + AvailabilityMsg: "I beg you not to use this" + - Name: do_something_with_pointers + NullabilityOfRet: O + Nullability: [ N, O ] + +Globals: + - Name: global_int + Nullability: N + - Name: unavailable_global_int + Availability: none diff --git a/clang/test/APINotes/Inputs/Headers/BrokenTypes.apinotes b/clang/test/APINotes/Inputs/Headers/BrokenTypes.apinotes new file mode 100644 index 0000000..00f7b50 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/BrokenTypes.apinotes @@ -0,0 +1,10 @@ +Name: BrokenTypes +Functions: + - Name: break_me_function + ResultType: 'int * with extra junk' + Parameters: + - Position: 0 + Type: 'not_a_type' +Globals: + - Name: break_me_variable + Type: 'double' diff --git a/clang/test/APINotes/Inputs/Headers/BrokenTypes.h b/clang/test/APINotes/Inputs/Headers/BrokenTypes.h new file mode 100644 index 0000000..fee054b --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/BrokenTypes.h @@ -0,0 +1,8 @@ +#ifndef BROKEN_TYPES_H +#define BROKEN_TYPES_H + +char break_me_function(void *ptr); + +extern char break_me_variable; + +#endif // BROKEN_TYPES_H diff --git a/clang/test/APINotes/Inputs/Headers/ExternCtx.apinotes b/clang/test/APINotes/Inputs/Headers/ExternCtx.apinotes new file mode 100644 index 0000000..0f47ac6 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/ExternCtx.apinotes @@ -0,0 +1,15 @@ +Name: ExternCtx +Globals: + - Name: globalInExternC + Availability: none + AvailabilityMsg: "oh no" + - Name: globalInExternCXX + Availability: none + AvailabilityMsg: "oh no #2" +Functions: + - Name: globalFuncInExternC + Availability: none + AvailabilityMsg: "oh no #3" + - Name: globalFuncInExternCXX + Availability: none + AvailabilityMsg: "oh no #4" diff --git a/clang/test/APINotes/Inputs/Headers/ExternCtx.h b/clang/test/APINotes/Inputs/Headers/ExternCtx.h new file mode 100644 index 0000000..669d443 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/ExternCtx.h @@ -0,0 +1,11 @@ +extern "C" { + static int globalInExternC = 1; + + static void globalFuncInExternC() {} +} + +extern "C++" { + static int globalInExternCXX = 2; + + static void globalFuncInExternCXX() {} +} diff --git a/clang/test/APINotes/Inputs/Headers/HeaderLib.apinotes b/clang/test/APINotes/Inputs/Headers/HeaderLib.apinotes new file mode 100644 index 0000000..7dcb224 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/HeaderLib.apinotes @@ -0,0 +1,37 @@ +Name: HeaderLib +SwiftInferImportAsMember: true +Functions: + - Name: custom_realloc + NullabilityOfRet: N + Nullability: [ N, S ] + - Name: unavailable_function + Availability: none + AvailabilityMsg: "I beg you not to use this" + - Name: do_something_with_pointers + NullabilityOfRet: O + Nullability: [ N, O ] + - Name: do_something_with_arrays + Parameters: + - Position: 0 + Nullability: N + - Position: 1 + Nullability: N + - Name: take_pointer_and_int + Parameters: + - Position: 0 + Nullability: N + NoEscape: true + - Position: 1 + NoEscape: true +Globals: + - Name: global_int + Nullability: N + - Name: unavailable_global_int + Availability: none +Tags: + - Name: unavailable_struct + Availability: none + +Typedefs: + - Name: unavailable_typedef + Availability: none diff --git a/clang/test/APINotes/Inputs/Headers/HeaderLib.h b/clang/test/APINotes/Inputs/Headers/HeaderLib.h new file mode 100644 index 0000000..8065249 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/HeaderLib.h @@ -0,0 +1,19 @@ +#ifndef HEADER_LIB_H +#define HEADER_LIB_H + +void *custom_realloc(void *member, unsigned size); + +int *global_int; + +int unavailable_function(void); +int unavailable_global_int; + +void do_something_with_pointers(int *ptr1, int *ptr2); +void do_something_with_arrays(int simple[], int nested[][2]); + +typedef int unavailable_typedef; +struct unavailable_struct { int x, y, z; }; + +void take_pointer_and_int(int *ptr1, int value); + +#endif diff --git a/clang/test/APINotes/Inputs/Headers/InstancetypeModule.apinotes b/clang/test/APINotes/Inputs/Headers/InstancetypeModule.apinotes new file mode 100644 index 0000000..813eb50 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/InstancetypeModule.apinotes @@ -0,0 +1,10 @@ +Name: InstancetypeModule +Classes: +- Name: SomeBaseClass + Methods: + - Selector: instancetypeFactoryMethod + MethodKind: Class + ResultType: SomeBaseClass * _Nonnull + - Selector: staticFactoryMethod + MethodKind: Class + ResultType: SomeBaseClass * _Nonnull diff --git a/clang/test/APINotes/Inputs/Headers/InstancetypeModule.h b/clang/test/APINotes/Inputs/Headers/InstancetypeModule.h new file mode 100644 index 0000000..767f201 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/InstancetypeModule.h @@ -0,0 +1,10 @@ +@interface Object +@end + +@interface SomeBaseClass : Object ++ (nullable instancetype)instancetypeFactoryMethod; ++ (nullable SomeBaseClass *)staticFactoryMethod; +@end + +@interface SomeSubclass : SomeBaseClass +@end diff --git a/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase.h b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase.h new file mode 100644 index 0000000..867a15c --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase.h @@ -0,0 +1 @@ +extern int ModuleWithWrongCase; diff --git a/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate.h b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate.h new file mode 100644 index 0000000..aa01429 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate.h @@ -0,0 +1 @@ +extern int ModuleWithWrongCasePrivate; diff --git a/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate_Private.apinotes b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate_Private.apinotes new file mode 100644 index 0000000..dc6dc50 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate_Private.apinotes @@ -0,0 +1 @@ +Name: ModuleWithWrongCasePrivate diff --git a/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase_Private.apinotes b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase_Private.apinotes new file mode 100644 index 0000000..dc6dc50 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase_Private.apinotes @@ -0,0 +1 @@ +Name: ModuleWithWrongCasePrivate diff --git a/clang/test/APINotes/Inputs/Headers/Namespaces.apinotes b/clang/test/APINotes/Inputs/Headers/Namespaces.apinotes new file mode 100644 index 0000000..e9da367 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/Namespaces.apinotes @@ -0,0 +1,53 @@ +--- +Name: Namespaces +Globals: + - Name: varInInlineNamespace + SwiftName: swiftVarInInlineNamespace +Functions: + - Name: funcInNamespace + SwiftName: inWrongContext() + - Name: funcInInlineNamespace + SwiftName: swiftFuncInInlineNamespace() +Tags: + - Name: char_box + SwiftName: InWrongContext +Namespaces: + - Name: Namespace1 + Typedefs: + - Name: my_typedef + SwiftName: SwiftTypedef + - Name: my_using_decl + SwiftName: SwiftUsingDecl + Globals: + - Name: varInNamespace + SwiftName: swiftVarInNamespace + Functions: + - Name: funcInNamespace + SwiftName: swiftFuncInNamespace() + Tags: + - Name: char_box + SwiftName: CharBox + Namespaces: + - Name: Nested1 + Globals: + - Name: varInNestedNamespace + SwiftName: swiftVarInNestedNamespace + Functions: + - Name: funcInNestedNamespace + SwiftName: swiftFuncInNestedNamespace(_:) + Tags: + - Name: char_box + SwiftName: NestedCharBox + Namespaces: + - Name: Namespace1 + Tags: + - Name: char_box + SwiftName: DeepNestedCharBox + - Name: Nested2 + Globals: + - Name: varInNestedNamespace + SwiftName: swiftAnotherVarInNestedNamespace + - Name: InlineNamespace1 + Functions: + - Name: funcInInlineNamespace + SwiftName: shouldNotSpellOutInlineNamespaces() diff --git a/clang/test/APINotes/Inputs/Headers/Namespaces.h b/clang/test/APINotes/Inputs/Headers/Namespaces.h new file mode 100644 index 0000000..6a79e99 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/Namespaces.h @@ -0,0 +1,39 @@ +namespace Namespace1 { namespace Nested1 {} } + +namespace Namespace1 { +static int varInNamespace = 1; +struct char_box { char c; }; +void funcInNamespace(); + +namespace Nested1 { +void funcInNestedNamespace(int i); +struct char_box { + char c; +}; +} + +namespace Nested1 { +static int varInNestedNamespace = 1; +void funcInNestedNamespace(int i); + +namespace Namespace1 { +struct char_box { char c; }; +} // namespace Namespace1 +} // namespace Nested1 + +namespace Nested2 { +static int varInNestedNamespace = 2; +} // namespace Nested2 + +namespace Nested1 { namespace Namespace1 {} } +} // namespace Namespace1 + +namespace Namespace1 { +typedef int my_typedef; +using my_using_decl = int; +} + +inline namespace InlineNamespace1 { +static int varInInlineNamespace = 3; +void funcInInlineNamespace(); +} diff --git a/clang/test/APINotes/Inputs/Headers/PrivateLib.apinotes b/clang/test/APINotes/Inputs/Headers/PrivateLib.apinotes new file mode 100644 index 0000000..5f62284 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/PrivateLib.apinotes @@ -0,0 +1,4 @@ +Name: HeaderLib +Globals: +- Name: PrivateLib + Type: float diff --git a/clang/test/APINotes/Inputs/Headers/PrivateLib.h b/clang/test/APINotes/Inputs/Headers/PrivateLib.h new file mode 100644 index 0000000..59aeef0 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/PrivateLib.h @@ -0,0 +1 @@ +extern int PrivateLib; diff --git a/clang/test/APINotes/Inputs/Headers/PrivateLib_private.apinotes b/clang/test/APINotes/Inputs/Headers/PrivateLib_private.apinotes new file mode 100644 index 0000000..908dae0 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/PrivateLib_private.apinotes @@ -0,0 +1 @@ +garbage here because this file shouldn't get read diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes new file mode 100644 index 0000000..5dbb83c --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes @@ -0,0 +1,9 @@ +--- +Name: SwiftImportAs +Tags: +- Name: ImmortalRefType + SwiftImportAs: reference +- Name: RefCountedType + SwiftImportAs: reference + SwiftReleaseOp: RCRelease + SwiftRetainOp: RCRetain diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h new file mode 100644 index 0000000..82b8a67 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h @@ -0,0 +1,6 @@ +struct ImmortalRefType {}; + +struct RefCountedType { int value; }; + +inline void RCRetain(RefCountedType *x) { x->value++; } +inline void RCRelease(RefCountedType *x) { x->value--; } diff --git a/clang/test/APINotes/Inputs/Headers/module.modulemap b/clang/test/APINotes/Inputs/Headers/module.modulemap new file mode 100644 index 0000000..98b4ee3 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/module.modulemap @@ -0,0 +1,31 @@ +module ExternCtx { + header "ExternCtx.h" +} + +module HeaderLib { + header "HeaderLib.h" +} + +module InstancetypeModule { + header "InstancetypeModule.h" +} + +module BrokenTypes { + header "BrokenTypes.h" +} + +module ModuleWithWrongCase { + header "ModuleWithWrongCase.h" +} + +module ModuleWithWrongCasePrivate { + header "ModuleWithWrongCasePrivate.h" +} + +module Namespaces { + header "Namespaces.h" +} + +module SwiftImportAs { + header "SwiftImportAs.h" +} diff --git a/clang/test/APINotes/Inputs/Headers/module.private.modulemap b/clang/test/APINotes/Inputs/Headers/module.private.modulemap new file mode 100644 index 0000000..2ecf322 --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/module.private.modulemap @@ -0,0 +1,5 @@ +module PrivateLib { + header "PrivateLib.h" +} + +module ModuleWithWrongCasePrivate.Inner {} diff --git a/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.apinotes b/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.apinotes new file mode 100644 index 0000000..77db844 --- /dev/null +++ b/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.apinotes @@ -0,0 +1,65 @@ +--- +Name: UIKit +Classes: + - Name: UIFont + Methods: + - Selector: 'fontWithName:size:' + MethodKind: Instance + Nullability: [ N ] + NullabilityOfRet: O + DesignatedInit: true +# CHECK: duplicate definition of method '-[UIFont fontWithName:size:]' + - Selector: 'fontWithName:size:' + MethodKind: Instance + Nullability: [ N ] + NullabilityOfRet: O + DesignatedInit: true + Properties: + - Name: familyName + Nullability: N + - Name: fontName + Nullability: N +# CHECK: duplicate definition of instance property 'UIFont.familyName' + - Name: familyName + Nullability: N +# CHECK: multiple definitions of class 'UIFont' + - Name: UIFont +Protocols: + - Name: MyProto + AuditedForNullability: true +# CHECK: multiple definitions of protocol 'MyProto' + - Name: MyProto + AuditedForNullability: true +Functions: + - Name: 'globalFoo' + Nullability: [ N, N, O, S ] + NullabilityOfRet: O + - Name: 'globalFoo2' + Nullability: [ N, N, O, S ] + NullabilityOfRet: O +Globals: + - Name: globalVar + Nullability: O + - Name: globalVar2 + Nullability: O +Tags: +# CHECK: cannot mix EnumKind and FlagEnum (for FlagAndEnumKind) + - Name: FlagAndEnumKind + FlagEnum: true + EnumKind: CFOptions +# CHECK: cannot mix EnumKind and FlagEnum (for FlagAndEnumKind2) + - Name: FlagAndEnumKind2 + EnumKind: CFOptions + FlagEnum: false +# CHECK: cannot mix EnumKind and EnumExtensibility (for ExtensibilityAndEnumKind) + - Name: ExtensibilityAndEnumKind + EnumExtensibility: open + EnumKind: CFOptions +# CHECK: cannot mix EnumKind and EnumExtensibility (for ExtensibilityAndEnumKind2) + - Name: ExtensibilityAndEnumKind2 + EnumKind: CFOptions + EnumExtensibility: closed +# CHECK: cannot mix EnumKind and EnumExtensibility (for ExtensibilityAndEnumKind3) + - Name: ExtensibilityAndEnumKind3 + EnumKind: none + EnumExtensibility: none diff --git a/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.h b/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.h new file mode 100644 index 0000000..55313ae --- /dev/null +++ b/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.h @@ -0,0 +1 @@ +extern int yesOfCourseThisIsWhatUIKitLooksLike; diff --git a/clang/test/APINotes/Inputs/yaml-reader-errors/module.modulemap b/clang/test/APINotes/Inputs/yaml-reader-errors/module.modulemap new file mode 100644 index 0000000..3d683d7 --- /dev/null +++ b/clang/test/APINotes/Inputs/yaml-reader-errors/module.modulemap @@ -0,0 +1,3 @@ +module UIKit { + header "UIKit.h" +} diff --git a/clang/test/APINotes/availability.m b/clang/test/APINotes/availability.m new file mode 100644 index 0000000..2ddc2a7 --- /dev/null +++ b/clang/test/APINotes/availability.m @@ -0,0 +1,48 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -fmodules -Wno-private-module -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify + +#include "HeaderLib.h" +#import <SomeKit/SomeKit.h> +#import <SomeKit/SomeKit_Private.h> + +int main() { + int i; + i = unavailable_function(); // expected-error{{'unavailable_function' is unavailable: I beg you not to use this}} + // expected-note@HeaderLib.h:8{{'unavailable_function' has been explicitly marked unavailable here}} + i = unavailable_global_int; // expected-error{{'unavailable_global_int' is unavailable}} + // expected-note@HeaderLib.h:9{{'unavailable_global_int' has been explicitly marked unavailable here}} + + unavailable_typedef t; // expected-error{{'unavailable_typedef' is unavailable}} + // expected-note@HeaderLib.h:14{{'unavailable_typedef' has been explicitly marked unavailable here}} + + struct unavailable_struct s; // expected-error{{'unavailable_struct' is unavailable}} + // expected-note@HeaderLib.h:15{{'unavailable_struct' has been explicitly marked unavailable here}} + + B *b = 0; // expected-error{{'B' is unavailable: just don't}} + // expected-note@SomeKit/SomeKit.h:15{{'B' has been explicitly marked unavailable here}} + + id<InternalProtocol> proto = 0; // expected-error{{'InternalProtocol' is unavailable: not for you}} + // expected-note@SomeKit/SomeKit_Private.h:12{{'InternalProtocol' has been explicitly marked unavailable here}} + + A *a = 0; + i = a.intValue; // expected-error{{intValue' is unavailable: wouldn't work anyway}} + // expected-note@SomeKit/SomeKit.h:12{{'intValue' has been explicitly marked unavailable here}} + + [a transform:a]; // expected-error{{'transform:' is unavailable: anything but this}} + // expected-note@SomeKit/SomeKit.h:6{{'transform:' has been explicitly marked unavailable here}} + + [a implicitGetOnlyInstance]; // expected-error{{'implicitGetOnlyInstance' is unavailable: getter gone}} + // expected-note@SomeKit/SomeKit.h:53{{'implicitGetOnlyInstance' has been explicitly marked unavailable here}} + [A implicitGetOnlyClass]; // expected-error{{'implicitGetOnlyClass' is unavailable: getter gone}} + // expected-note@SomeKit/SomeKit.h:54{{'implicitGetOnlyClass' has been explicitly marked unavailable here}} + [a implicitGetSetInstance]; // expected-error{{'implicitGetSetInstance' is unavailable: getter gone}} + // expected-note@SomeKit/SomeKit.h:56{{'implicitGetSetInstance' has been explicitly marked unavailable here}} + [a setImplicitGetSetInstance: a]; // expected-error{{'setImplicitGetSetInstance:' is unavailable: setter gone}} + // expected-note@SomeKit/SomeKit.h:56{{'setImplicitGetSetInstance:' has been explicitly marked unavailable here}} + [A implicitGetSetClass]; // expected-error{{'implicitGetSetClass' is unavailable: getter gone}} + // expected-note@SomeKit/SomeKit.h:57{{'implicitGetSetClass' has been explicitly marked unavailable here}} + [A setImplicitGetSetClass: a]; // expected-error{{'setImplicitGetSetClass:' is unavailable: setter gone}} + // expected-note@SomeKit/SomeKit.h:57{{'setImplicitGetSetClass:' has been explicitly marked unavailable here}} + return 0; +} + diff --git a/clang/test/APINotes/broken_types.m b/clang/test/APINotes/broken_types.m new file mode 100644 index 0000000..ee33ff7 --- /dev/null +++ b/clang/test/APINotes/broken_types.m @@ -0,0 +1,19 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s 2> %t.err +// RUN: FileCheck %s < %t.err + +#include "BrokenTypes.h" + +// CHECK: <API Notes>:1:1: error: unknown type name 'not_a_type' +// CHECK-NEXT: not_a_type +// CHECK-NEXT: ^ + +// CHECK: <API Notes>:1:7: error: unparsed tokens following type +// CHECK-NEXT: int * with extra junk +// CHECK-NEXT: ^ + +// CHECK: BrokenTypes.h:4:6: error: API notes replacement type 'int *' has a different size from original type 'char' + +// CHECK: BrokenTypes.h:6:13: error: API notes replacement type 'double' has a different size from original type 'char' + +// CHECK: 5 errors generated. diff --git a/clang/test/APINotes/case-for-private-apinotes-file.c b/clang/test/APINotes/case-for-private-apinotes-file.c new file mode 100644 index 0000000..6aff3db --- /dev/null +++ b/clang/test/APINotes/case-for-private-apinotes-file.c @@ -0,0 +1,22 @@ +// REQUIRES: case-insensitive-filesystem + +// RUN: rm -rf %t +// RUN: %clang_cc1 -fsyntax-only -fmodules -fapinotes-modules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/Frameworks -I %S/Inputs/Headers %s 2>&1 | FileCheck %s + +// RUN: rm -rf %t +// RUN: %clang_cc1 -fsyntax-only -fmodules -fapinotes-modules -fimplicit-module-maps -fmodules-cache-path=%t -iframework %S/Inputs/Frameworks -isystem %S/Inputs/Headers %s -Werror + +// RUN: rm -rf %t +// RUN: %clang_cc1 -fsyntax-only -fmodules -fapinotes-modules -fimplicit-module-maps -fmodules-cache-path=%t -iframework %S/Inputs/Frameworks -isystem %S/Inputs/Headers %s -Wnonportable-private-system-apinotes-path 2>&1 | FileCheck %s + +#include <ModuleWithWrongCase.h> +#include <ModuleWithWrongCasePrivate.h> +#include <FrameworkWithWrongCase/FrameworkWithWrongCase.h> +#include <FrameworkWithWrongCasePrivate/FrameworkWithWrongCasePrivate.h> +#include <FrameworkWithActualPrivateModule/FrameworkWithActualPrivateModule_Private.h> + +// CHECK-NOT: warning: +// CHECK: warning: private API notes file for module 'ModuleWithWrongCasePrivate' should be named 'ModuleWithWrongCasePrivate_private.apinotes', not 'ModuleWithWrongCasePrivate_Private.apinotes' +// CHECK-NOT: warning: +// CHECK: warning: private API notes file for module 'FrameworkWithWrongCasePrivate' should be named 'FrameworkWithWrongCasePrivate_private.apinotes', not 'FrameworkWithWrongCasePrivate_Private.apinotes' +// CHECK-NOT: warning: diff --git a/clang/test/APINotes/extern-context.cpp b/clang/test/APINotes/extern-context.cpp new file mode 100644 index 0000000..331dee0 --- /dev/null +++ b/clang/test/APINotes/extern-context.cpp @@ -0,0 +1,23 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -ast-dump -ast-dump-filter globalInExternC -x c++ | FileCheck -check-prefix=CHECK-EXTERN-C %s +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -ast-dump -ast-dump-filter globalInExternCXX -x c++ | FileCheck -check-prefix=CHECK-EXTERN-CXX %s +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -ast-dump -ast-dump-filter globalFuncInExternC -x c++ | FileCheck -check-prefix=CHECK-FUNC-EXTERN-C %s +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -ast-dump -ast-dump-filter globalFuncInExternCXX -x c++ | FileCheck -check-prefix=CHECK-FUNC-EXTERN-CXX %s + +#include "ExternCtx.h" + +// CHECK-EXTERN-C: Dumping globalInExternC: +// CHECK-EXTERN-C: VarDecl {{.+}} imported in ExternCtx globalInExternC 'int' +// CHECK-EXTERN-C: UnavailableAttr {{.+}} <<invalid sloc>> "oh no" + +// CHECK-EXTERN-CXX: Dumping globalInExternCXX: +// CHECK-EXTERN-CXX: VarDecl {{.+}} imported in ExternCtx globalInExternCXX 'int' +// CHECK-EXTERN-CXX: UnavailableAttr {{.+}} <<invalid sloc>> "oh no #2" + +// CHECK-FUNC-EXTERN-C: Dumping globalFuncInExternC: +// CHECK-FUNC-EXTERN-C: FunctionDecl {{.+}} imported in ExternCtx globalFuncInExternC 'void ()' +// CHECK-FUNC-EXTERN-C: UnavailableAttr {{.+}} <<invalid sloc>> "oh no #3" + +// CHECK-FUNC-EXTERN-CXX: Dumping globalFuncInExternCXX: +// CHECK-FUNC-EXTERN-CXX: FunctionDecl {{.+}} imported in ExternCtx globalFuncInExternCXX 'void ()' +// CHECK-FUNC-EXTERN-CXX: UnavailableAttr {{.+}} <<invalid sloc>> "oh no #4" diff --git a/clang/test/APINotes/instancetype.m b/clang/test/APINotes/instancetype.m new file mode 100644 index 0000000..30339e5 --- /dev/null +++ b/clang/test/APINotes/instancetype.m @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -verify %s + +@import InstancetypeModule; + +void test() { + // The nullability is here to verify that the API notes were applied. + int good = [SomeSubclass instancetypeFactoryMethod]; // expected-error {{initializing 'int' with an expression of type 'SomeSubclass * _Nonnull'}} + int bad = [SomeSubclass staticFactoryMethod]; // expected-error {{initializing 'int' with an expression of type 'SomeBaseClass * _Nonnull'}} +} diff --git a/clang/test/APINotes/module-cache.m b/clang/test/APINotes/module-cache.m new file mode 100644 index 0000000..5dcaf11 --- /dev/null +++ b/clang/test/APINotes/module-cache.m @@ -0,0 +1,65 @@ +// RUN: rm -rf %t + +// Set up directories +// RUN: mkdir -p %t/APINotes +// RUN: cp %S/Inputs/APINotes/SomeOtherKit.apinotes %t/APINotes/SomeOtherKit.apinotes +// RUN: mkdir -p %t/Frameworks +// RUN: cp -r %S/Inputs/Frameworks/SomeOtherKit.framework %t/Frameworks + +// First build: check that 'methodB' is unavailable but 'methodA' is available. +// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/before.log 2>&1 +// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/before.log +// RUN: FileCheck -check-prefix=CHECK-REBUILD %s < %t/before.log +// RUN: FileCheck -check-prefix=CHECK-ONE-ERROR %s < %t/before.log + +// Do it again; now we're using caches. +// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/before.log 2>&1 +// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/before.log +// RUN: FileCheck -check-prefix=CHECK-WITHOUT-REBUILD %s < %t/before.log +// RUN: FileCheck -check-prefix=CHECK-ONE-ERROR %s < %t/before.log + +// Add a blank line to the header to force the module to rebuild, without +// (yet) changing API notes. +// RUN: echo >> %t/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.h +// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/before.log 2>&1 +// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/before.log +// RUN: FileCheck -check-prefix=CHECK-REBUILD %s < %t/before.log +// RUN: FileCheck -check-prefix=CHECK-ONE-ERROR %s < %t/before.log + +// Change the API notes file, after the module has rebuilt once. +// RUN: echo ' - Selector: "methodA"' >> %t/APINotes/SomeOtherKit.apinotes +// RUN: echo ' MethodKind: Instance' >> %t/APINotes/SomeOtherKit.apinotes +// RUN: echo ' Availability: none' >> %t/APINotes/SomeOtherKit.apinotes +// RUN: echo ' AvailabilityMsg: "not here either"' >> %t/APINotes/SomeOtherKit.apinotes + +// Build again: check that both methods are now unavailable and that the module rebuilt. +// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/after.log 2>&1 +// RUN: FileCheck -check-prefix=CHECK-METHODA %s < %t/after.log +// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/after.log +// RUN: FileCheck -check-prefix=CHECK-REBUILD %s < %t/after.log +// RUN: FileCheck -check-prefix=CHECK-TWO-ERRORS %s < %t/after.log + +// Run the build again: check that both methods are now unavailable +// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/after.log 2>&1 +// RUN: FileCheck -check-prefix=CHECK-METHODA %s < %t/after.log +// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/after.log +// RUN: FileCheck -check-prefix=CHECK-WITHOUT-REBUILD %s < %t/after.log +// RUN: FileCheck -check-prefix=CHECK-TWO-ERRORS %s < %t/after.log + +@import SomeOtherKit; + +void test(A *a) { + // CHECK-METHODA: error: 'methodA' is unavailable: not here either + [a methodA]; + + // CHECK-METHODB: error: 'methodB' is unavailable: anything but this + [a methodB]; +} + +// CHECK-REBUILD: remark: building module{{.*}}SomeOtherKit + +// CHECK-WITHOUT-REBUILD-NOT: remark: building module{{.*}}SomeOtherKit + +// CHECK-ONE-ERROR: 1 error generated. +// CHECK-TWO-ERRORS: 2 errors generated. + diff --git a/clang/test/APINotes/namespaces.cpp b/clang/test/APINotes/namespaces.cpp new file mode 100644 index 0000000..2f9d93c --- /dev/null +++ b/clang/test/APINotes/namespaces.cpp @@ -0,0 +1,69 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -x objective-c++ +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::my_typedef -x objective-c++ | FileCheck -check-prefix=CHECK-TYPEDEF-IN-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::my_using_decl -x objective-c++ | FileCheck -check-prefix=CHECK-USING-DECL-IN-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::varInNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-GLOBAL-IN-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::funcInNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-FUNC-IN-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::char_box -x objective-c++ | FileCheck -check-prefix=CHECK-STRUCT-IN-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested1::varInNestedNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-GLOBAL-IN-NESTED-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested2::varInNestedNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested1::char_box -x objective-c++ | FileCheck -check-prefix=CHECK-STRUCT-IN-NESTED-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested1::funcInNestedNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-FUNC-IN-NESTED-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested1::Namespace1::char_box -x objective-c++ | FileCheck -check-prefix=CHECK-STRUCT-IN-DEEP-NESTED-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter varInInlineNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-GLOBAL-IN-INLINE-NAMESPACE %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter funcInInlineNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-FUNC-IN-INLINE-NAMESPACE %s + +#import <Namespaces.h> + +// CHECK-TYPEDEF-IN-NAMESPACE: Dumping Namespace1::my_typedef: +// CHECK-TYPEDEF-IN-NAMESPACE-NEXT: TypedefDecl {{.+}} imported in Namespaces my_typedef 'int' +// CHECK-TYPEDEF-IN-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "SwiftTypedef" + +// CHECK-USING-DECL-IN-NAMESPACE: Dumping Namespace1::my_using_decl: +// CHECK-USING-DECL-IN-NAMESPACE-NEXT: TypeAliasDecl {{.+}} imported in Namespaces my_using_decl 'int' +// CHECK-USING-DECL-IN-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "SwiftUsingDecl" + +// CHECK-GLOBAL-IN-NAMESPACE: Dumping Namespace1::varInNamespace: +// CHECK-GLOBAL-IN-NAMESPACE-NEXT: VarDecl {{.+}} imported in Namespaces varInNamespace 'int' static cinit +// CHECK-GLOBAL-IN-NAMESPACE-NEXT: IntegerLiteral {{.+}} 'int' 1 +// CHECK-GLOBAL-IN-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftVarInNamespace" + +// CHECK-FUNC-IN-NAMESPACE: Dumping Namespace1::funcInNamespace: +// CHECK-FUNC-IN-NAMESPACE-NEXT: FunctionDecl {{.+}} imported in Namespaces funcInNamespace 'void ()' +// CHECK-FUNC-IN-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftFuncInNamespace()" + +// CHECK-STRUCT-IN-NAMESPACE: Dumping Namespace1::char_box: +// CHECK-STRUCT-IN-NAMESPACE-NEXT: CXXRecordDecl {{.+}} imported in Namespaces <undeserialized declarations> struct char_box +// CHECK-STRUCT-IN-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "CharBox" + +// CHECK-GLOBAL-IN-NESTED-NAMESPACE: Dumping Namespace1::Nested1::varInNestedNamespace: +// CHECK-GLOBAL-IN-NESTED-NAMESPACE-NEXT: VarDecl {{.+}} imported in Namespaces varInNestedNamespace 'int' static cinit +// CHECK-GLOBAL-IN-NESTED-NAMESPACE-NEXT: IntegerLiteral {{.+}} 'int' 1 +// CHECK-GLOBAL-IN-NESTED-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftVarInNestedNamespace" + +// CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE: Dumping Namespace1::Nested2::varInNestedNamespace: +// CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE-NEXT: VarDecl {{.+}} imported in Namespaces varInNestedNamespace 'int' static cinit +// CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE-NEXT: IntegerLiteral {{.+}} 'int' 2 +// CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftAnotherVarInNestedNamespace" + +// CHECK-FUNC-IN-NESTED-NAMESPACE: Dumping Namespace1::Nested1::funcInNestedNamespace: +// CHECK-FUNC-IN-NESTED-NAMESPACE-NEXT: FunctionDecl {{.+}} imported in Namespaces funcInNestedNamespace 'void (int)' +// CHECK-FUNC-IN-NESTED-NAMESPACE-NEXT: ParmVarDecl {{.+}} i 'int' +// CHECK-FUNC-IN-NESTED-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftFuncInNestedNamespace(_:)" + +// CHECK-STRUCT-IN-NESTED-NAMESPACE: Dumping Namespace1::Nested1::char_box: +// CHECK-STRUCT-IN-NESTED-NAMESPACE-NEXT: CXXRecordDecl {{.+}} imported in Namespaces <undeserialized declarations> struct char_box +// CHECK-STRUCT-IN-NESTED-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "NestedCharBox" + +// CHECK-STRUCT-IN-DEEP-NESTED-NAMESPACE: Dumping Namespace1::Nested1::Namespace1::char_box: +// CHECK-STRUCT-IN-DEEP-NESTED-NAMESPACE-NEXT: CXXRecordDecl {{.+}} imported in Namespaces <undeserialized declarations> struct char_box +// CHECK-STRUCT-IN-DEEP-NESTED-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "DeepNestedCharBox" + +// CHECK-GLOBAL-IN-INLINE-NAMESPACE: Dumping varInInlineNamespace: +// CHECK-GLOBAL-IN-INLINE-NAMESPACE-NEXT: VarDecl {{.+}} imported in Namespaces varInInlineNamespace 'int' static cinit +// CHECK-GLOBAL-IN-INLINE-NAMESPACE-NEXT: IntegerLiteral {{.+}} 'int' 3 +// CHECK-GLOBAL-IN-INLINE-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftVarInInlineNamespace" + +// CHECK-FUNC-IN-INLINE-NAMESPACE: Dumping funcInInlineNamespace: +// CHECK-FUNC-IN-INLINE-NAMESPACE-NEXT: FunctionDecl {{.+}} imported in Namespaces funcInInlineNamespace 'void ()' +// CHECK-FUNC-IN-INLINE-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftFuncInInlineNamespace()" diff --git a/clang/test/APINotes/nullability.c b/clang/test/APINotes/nullability.c new file mode 100644 index 0000000..e07fc2e --- /dev/null +++ b/clang/test/APINotes/nullability.c @@ -0,0 +1,21 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify + +#include "HeaderLib.h" + +int main() { + custom_realloc(0, 0); // expected-warning{{null passed to a callee that requires a non-null argument}} + int i = 0; + do_something_with_pointers(&i, 0); + do_something_with_pointers(0, &i); // expected-warning{{null passed to a callee that requires a non-null argument}} + + extern void *p; + do_something_with_arrays(0, p); // expected-warning{{null passed to a callee that requires a non-null argument}} + do_something_with_arrays(p, 0); // expected-warning{{null passed to a callee that requires a non-null argument}} + + take_pointer_and_int(0, 0); // expected-warning{{null passed to a callee that requires a non-null argument}} + + float *fp = global_int; // expected-warning{{incompatible pointer types initializing 'float *' with an expression of type 'int * _Nonnull'}} + return 0; +} + diff --git a/clang/test/APINotes/nullability.m b/clang/test/APINotes/nullability.m new file mode 100644 index 0000000..21ec668 --- /dev/null +++ b/clang/test/APINotes/nullability.m @@ -0,0 +1,46 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify + +// Test with Swift version 3.0. This should only affect the few APIs that have an entry in the 3.0 tables. + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fapinotes-swift-version=3.0 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify -DSWIFT_VERSION_3_0 -fmodules-ignore-macro=SWIFT_VERSION_3_0 + +#import <SomeKit/SomeKit.h> + +int main() { + A *a; + +#if SWIFT_VERSION_3_0 + float *fp = // expected-warning{{incompatible pointer types initializing 'float *' with an expression of type 'A * _Nullable'}} + [a transform: 0 integer: 0]; +#else + float *fp = // expected-warning{{incompatible pointer types initializing 'float *' with an expression of type 'A *'}} + [a transform: 0 integer: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}} +#endif + + [a setNonnullAInstance: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}} + [A setNonnullAInstance: 0]; // no warning + a.nonnullAInstance = 0; // expected-warning{{null passed to a callee that requires a non-null argument}} + A* _Nonnull aPtr = a.nonnullAInstance; // no warning + + [a setNonnullAClass: 0]; // no warning + [A setNonnullAClass: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}} + + [a setNonnullABoth: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}} + [A setNonnullABoth: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}} + + [a setInternalProperty: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}} + +#if SWIFT_VERSION_3_0 + // Version 3 information overrides header information. + [a setExplicitNonnullInstance: 0]; // okay + [a setExplicitNullableInstance: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}} +#else + // Header information overrides unversioned information. + [a setExplicitNonnullInstance: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}} + [a setExplicitNullableInstance: 0]; // okay +#endif + + return 0; +} + diff --git a/clang/test/APINotes/objc-forward-declarations.m b/clang/test/APINotes/objc-forward-declarations.m new file mode 100644 index 0000000..e82bed2 --- /dev/null +++ b/clang/test/APINotes/objc-forward-declarations.m @@ -0,0 +1,12 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -F %S/Inputs/Frameworks %s -verify + +@import LayeredKit; + +void test( + UpwardClass *okayClass, + id <UpwardProto> okayProto, + PerfectlyNormalClass *badClass // expected-error {{'PerfectlyNormalClass' is unavailable}} +) { + // expected-note@LayeredKitImpl/LayeredKitImpl.h:4 {{'PerfectlyNormalClass' has been explicitly marked unavailable here}} +} diff --git a/clang/test/APINotes/objc_designated_inits.m b/clang/test/APINotes/objc_designated_inits.m new file mode 100644 index 0000000..1f2b8ed --- /dev/null +++ b/clang/test/APINotes/objc_designated_inits.m @@ -0,0 +1,17 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify + +#include "HeaderLib.h" +#import <SomeKit/SomeKit.h> + +@interface CSub : C +-(instancetype)initWithA:(A*)a; +@end + +@implementation CSub +-(instancetype)initWithA:(A*)a { // expected-warning{{designated initializer missing a 'super' call to a designated initializer of the super class}} + // expected-note@SomeKit/SomeKit.h:20 2{{method marked as designated initializer of the class here}} + self = [super init]; // expected-warning{{designated initializer invoked a non-designated initializer}} + return self; +} +@end diff --git a/clang/test/APINotes/properties.m b/clang/test/APINotes/properties.m new file mode 100644 index 0000000..f218092 --- /dev/null +++ b/clang/test/APINotes/properties.m @@ -0,0 +1,42 @@ +// RUN: rm -rf %t && mkdir -p %t + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fblocks -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter 'TestProperties::' | FileCheck -check-prefix=CHECK -check-prefix=CHECK-4 %s +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fblocks -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter 'TestProperties::' -fapinotes-swift-version=3 | FileCheck -check-prefix=CHECK -check-prefix=CHECK-3 %s + +@import VersionedKit; + +// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnly 'id' +// CHECK-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-NOT: Attr + +// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyForClass 'id' +// CHECK-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-NOT: Attr + +// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyInVersion3 'id' +// CHECK-3-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-4-NEXT: SwiftVersionedAdditionAttr {{.+}} 3.0{{$}} +// CHECK-4-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-NOT: Attr + +// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyForClassInVersion3 'id' +// CHECK-3-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-4-NEXT: SwiftVersionedAdditionAttr {{.+}} 3.0{{$}} +// CHECK-4-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-NOT: Attr + +// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyExceptInVersion3 'id' +// CHECK-3-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 IsReplacedByActive{{$}} +// CHECK-3-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-4-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-4-NEXT: SwiftVersionedRemovalAttr {{.+}} Implicit 3.0 {{[0-9]+}} +// CHECK-NOT: Attr + +// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyForClassExceptInVersion3 'id' +// CHECK-3-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 IsReplacedByActive{{$}} +// CHECK-3-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-4-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>> +// CHECK-4-NEXT: SwiftVersionedRemovalAttr {{.+}} Implicit 3.0 {{[0-9]+}} +// CHECK-NOT: Attr + +// CHECK-LABEL: Decl diff --git a/clang/test/APINotes/retain-count-convention.m b/clang/test/APINotes/retain-count-convention.m new file mode 100644 index 0000000..4bf9610a --- /dev/null +++ b/clang/test/APINotes/retain-count-convention.m @@ -0,0 +1,38 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fdisable-module-hash -fsyntax-only -F %S/Inputs/Frameworks %s +// RUN: %clang_cc1 -ast-print %t/ModulesCache/SimpleKit.pcm | FileCheck %s +// RUN: %clang_cc1 -ast-dump -ast-dump-filter 'DUMP' %t/ModulesCache/SimpleKit.pcm | FileCheck -check-prefix CHECK-DUMP %s + +#import <SimpleKit/SimpleKit.h> + +// CHECK: void *getCFOwnedToUnowned(void) __attribute__((cf_returns_not_retained)); +// CHECK: void *getCFUnownedToOwned(void) __attribute__((cf_returns_retained)); +// CHECK: void *getCFOwnedToNone(void) __attribute__((cf_unknown_transfer)); +// CHECK: id getObjCOwnedToUnowned(void) __attribute__((ns_returns_not_retained)); +// CHECK: id getObjCUnownedToOwned(void) __attribute__((ns_returns_retained)); +// CHECK: int indirectGetCFOwnedToUnowned(void * _Nullable *out __attribute__((cf_returns_not_retained))); +// CHECK: int indirectGetCFUnownedToOwned(void * _Nullable *out __attribute__((cf_returns_retained))); +// CHECK: int indirectGetCFOwnedToNone(void * _Nullable *out); +// CHECK: int indirectGetCFNoneToOwned(void **out __attribute__((cf_returns_not_retained))); + +// CHECK-LABEL: @interface MethodTest +// CHECK: - (id)getOwnedToUnowned __attribute__((ns_returns_not_retained)); +// CHECK: - (id)getUnownedToOwned __attribute__((ns_returns_retained)); +// CHECK: @end + +// CHECK-DUMP-LABEL: Dumping getCFAuditedToUnowned_DUMP: +// CHECK-DUMP-NEXT: FunctionDecl +// CHECK-DUMP-NEXT: CFReturnsNotRetainedAttr +// CHECK-DUMP-NEXT: CFAuditedTransferAttr +// CHECK-DUMP-NOT: Attr + +// CHECK-DUMP-LABEL: Dumping getCFAuditedToOwned_DUMP: +// CHECK-DUMP-NEXT: FunctionDecl +// CHECK-DUMP-NEXT: CFReturnsRetainedAttr +// CHECK-DUMP-NEXT: CFAuditedTransferAttr +// CHECK-DUMP-NOT: Attr + +// CHECK-DUMP-LABEL: Dumping getCFAuditedToNone_DUMP: +// CHECK-DUMP-NEXT: FunctionDecl +// CHECK-DUMP-NEXT: CFUnknownTransferAttr +// CHECK-DUMP-NOT: Attr diff --git a/clang/test/APINotes/search-order.m b/clang/test/APINotes/search-order.m new file mode 100644 index 0000000..17e81d5 --- /dev/null +++ b/clang/test/APINotes/search-order.m @@ -0,0 +1,25 @@ +// RUN: rm -rf %t && mkdir -p %t + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -DFROM_FRAMEWORK=1 -verify + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %S/Inputs/APINotes -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -DFROM_SEARCH_PATH=1 -verify + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -iapinotes-modules %S/Inputs/APINotes -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -DFROM_FRAMEWORK=1 -verify + +@import SomeOtherKit; + +void test(A *a) { +#if FROM_FRAMEWORK + [a methodA]; // expected-error{{unavailable}} + [a methodB]; + + // expected-note@SomeOtherKit/SomeOtherKit.h:5{{'methodA' has been explicitly marked unavailable here}} +#elif FROM_SEARCH_PATH + [a methodA]; + [a methodB]; // expected-error{{unavailable}} + + // expected-note@SomeOtherKit/SomeOtherKit.h:6{{'methodB' has been explicitly marked unavailable here}} +#else +# error Not something we need to test +#endif +} diff --git a/clang/test/APINotes/swift-import-as.cpp b/clang/test/APINotes/swift-import-as.cpp new file mode 100644 index 0000000..904857e --- /dev/null +++ b/clang/test/APINotes/swift-import-as.cpp @@ -0,0 +1,16 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -x c++ +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter ImmortalRefType | FileCheck -check-prefix=CHECK-IMMORTAL %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter RefCountedType | FileCheck -check-prefix=CHECK-REF-COUNTED %s + +#include <SwiftImportAs.h> + +// CHECK-IMMORTAL: Dumping ImmortalRefType: +// CHECK-IMMORTAL-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs {{.+}} struct ImmortalRefType +// CHECK-IMMORTAL: SwiftAttrAttr {{.+}} <<invalid sloc>> "import_reference" + +// CHECK-REF-COUNTED: Dumping RefCountedType: +// CHECK-REF-COUNTED-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs {{.+}} struct RefCountedType +// CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "import_reference" +// CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "retain:RCRetain" +// CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "release:RCRelease" diff --git a/clang/test/APINotes/top-level-private-modules.c b/clang/test/APINotes/top-level-private-modules.c new file mode 100644 index 0000000..0da72b2 --- /dev/null +++ b/clang/test/APINotes/top-level-private-modules.c @@ -0,0 +1,8 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify + +#include <PrivateLib.h> +#include <TopLevelPrivateKit/TopLevelPrivateKit_Private.h> + +void *testPlain = PrivateLib; // expected-error {{initializing 'void *' with an expression of incompatible type 'float'}} +void *testFramework = TopLevelPrivateKit_Private; // expected-error {{initializing 'void *' with an expression of incompatible type 'float'}} diff --git a/clang/test/APINotes/types.m b/clang/test/APINotes/types.m new file mode 100644 index 0000000..133d504 --- /dev/null +++ b/clang/test/APINotes/types.m @@ -0,0 +1,28 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fdisable-module-hash -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify +// RUN: %clang_cc1 -ast-print %t/ModulesCache/SimpleKit.pcm | FileCheck %s + +#import <SomeKit/SomeKit.h> +#import <SimpleKit/SimpleKit.h> + +// CHECK: struct __attribute__((swift_name("SuccessfullyRenamedA"))) RenamedAgainInAPINotesA { +// CHECK: struct __attribute__((swift_name("SuccessfullyRenamedB"))) RenamedAgainInAPINotesB { + +void test(OverriddenTypes *overridden) { + int *ip1 = global_int_ptr; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'double (*)(int, int)'}} + + int *ip2 = global_int_fun( // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'char *'}} + ip2, // expected-warning{{incompatible pointer types passing 'int *' to parameter of type 'double *'}} + ip2); // expected-warning{{incompatible pointer types passing 'int *' to parameter of type 'float *'}} + + int *ip3 = [overridden // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'char *'}} + methodToMangle: ip3 // expected-warning{{incompatible pointer types sending 'int *' to parameter of type 'double *'}} + second: ip3]; // expected-warning{{incompatible pointer types sending 'int *' to parameter of type 'float *'}} + + int *ip4 = overridden.intPropertyToMangle; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'double *'}} +} + +// expected-note@SomeKit/SomeKit.h:42{{passing argument to parameter 'ptr' here}} +// expected-note@SomeKit/SomeKit.h:42{{passing argument to parameter 'ptr2' here}} +// expected-note@SomeKit/SomeKit.h:48{{passing argument to parameter 'ptr1' here}} +// expected-note@SomeKit/SomeKit.h:48{{passing argument to parameter 'ptr2' here}} diff --git a/clang/test/APINotes/versioned-multi.c b/clang/test/APINotes/versioned-multi.c new file mode 100644 index 0000000..48c51fd --- /dev/null +++ b/clang/test/APINotes/versioned-multi.c @@ -0,0 +1,69 @@ +// RUN: rm -rf %t && mkdir -p %t + +// Build and check the unversioned module file. +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Unversioned -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s +// RUN: %clang_cc1 -ast-print %t/ModulesCache/Unversioned/VersionedKit.pcm | FileCheck -check-prefix=CHECK-UNVERSIONED %s + +// Build and check the various versions. +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned3 -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=3 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s +// RUN: %clang_cc1 -ast-print %t/ModulesCache/Versioned3/VersionedKit.pcm | FileCheck -check-prefix=CHECK-VERSIONED-3 %s + +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned4 -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=4 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s +// RUN: %clang_cc1 -ast-print %t/ModulesCache/Versioned4/VersionedKit.pcm | FileCheck -check-prefix=CHECK-VERSIONED-4 %s + +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned5 -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=5 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s +// RUN: %clang_cc1 -ast-print %t/ModulesCache/Versioned5/VersionedKit.pcm | FileCheck -check-prefix=CHECK-VERSIONED-5 %s + +#import <VersionedKit/VersionedKit.h> + +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef4; +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef4Notes __attribute__((swift_name("MultiVersionedTypedef4Notes_NEW"))); +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_NEW"))); +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef34; +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef34Notes __attribute__((swift_name("MultiVersionedTypedef34Notes_NEW"))); +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_NEW"))); +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef45; +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef45Notes __attribute__((swift_name("MultiVersionedTypedef45Notes_NEW"))); +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_NEW"))); +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef345; +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef345Notes __attribute__((swift_name("MultiVersionedTypedef345Notes_NEW"))); +// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_NEW"))); + +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef4 __attribute__((swift_name("MultiVersionedTypedef4_4"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef4Notes __attribute__((swift_name("MultiVersionedTypedef4Notes_4"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_4"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef34 __attribute__((swift_name("MultiVersionedTypedef34_3"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef34Notes __attribute__((swift_name("MultiVersionedTypedef34Notes_3"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_3"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef45 __attribute__((swift_name("MultiVersionedTypedef45_4"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef45Notes __attribute__((swift_name("MultiVersionedTypedef45Notes_4"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_4"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef345 __attribute__((swift_name("MultiVersionedTypedef345_3"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef345Notes __attribute__((swift_name("MultiVersionedTypedef345Notes_3"))); +// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_3"))); + +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef4 __attribute__((swift_name("MultiVersionedTypedef4_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef4Notes __attribute__((swift_name("MultiVersionedTypedef4Notes_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef34 __attribute__((swift_name("MultiVersionedTypedef34_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef34Notes __attribute__((swift_name("MultiVersionedTypedef34Notes_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef45 __attribute__((swift_name("MultiVersionedTypedef45_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef45Notes __attribute__((swift_name("MultiVersionedTypedef45Notes_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef345 __attribute__((swift_name("MultiVersionedTypedef345_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef345Notes __attribute__((swift_name("MultiVersionedTypedef345Notes_4"))); +// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_4"))); + +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef4; +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef4Notes __attribute__((swift_name("MultiVersionedTypedef4Notes_NEW"))); +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_NEW"))); +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef34; +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef34Notes __attribute__((swift_name("MultiVersionedTypedef34Notes_NEW"))); +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_NEW"))); +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef45 __attribute__((swift_name("MultiVersionedTypedef45_5"))); +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef45Notes __attribute__((swift_name("MultiVersionedTypedef45Notes_5"))); +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_5"))); +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef345 __attribute__((swift_name("MultiVersionedTypedef345_5"))); +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef345Notes __attribute__((swift_name("MultiVersionedTypedef345Notes_5"))); +// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_5"))); diff --git a/clang/test/APINotes/versioned.m b/clang/test/APINotes/versioned.m new file mode 100644 index 0000000..61cc8c3 --- /dev/null +++ b/clang/test/APINotes/versioned.m @@ -0,0 +1,187 @@ +// RUN: rm -rf %t && mkdir -p %t + +// Build and check the unversioned module file. +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Unversioned -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s +// RUN: %clang_cc1 -ast-print %t/ModulesCache/Unversioned/VersionedKit.pcm | FileCheck -check-prefix=CHECK-UNVERSIONED %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Unversioned -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter 'DUMP' | FileCheck -check-prefix=CHECK-DUMP -check-prefix=CHECK-UNVERSIONED-DUMP %s + +// Build and check the versioned module file. +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=3 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s +// RUN: %clang_cc1 -ast-print %t/ModulesCache/Versioned/VersionedKit.pcm | FileCheck -check-prefix=CHECK-VERSIONED %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=3 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter 'DUMP' | FileCheck -check-prefix=CHECK-DUMP -check-prefix=CHECK-VERSIONED-DUMP %s + +#import <VersionedKit/VersionedKit.h> + +// CHECK-UNVERSIONED: void moveToPointDUMP(double x, double y) __attribute__((swift_name("moveTo(x:y:)"))); +// CHECK-VERSIONED: void moveToPointDUMP(double x, double y) __attribute__((swift_name("moveTo(a:b:)"))); + +// CHECK-DUMP-LABEL: Dumping moveToPointDUMP +// CHECK-VERSIONED-DUMP: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 IsReplacedByActive{{$}} +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "moveTo(x:y:)" +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "moveTo(a:b:)" +// CHECK-UNVERSIONED-DUMP: SwiftNameAttr {{.+}} "moveTo(x:y:)" +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0{{$}} +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "moveTo(a:b:)" +// CHECK-DUMP-NOT: Attr + +// CHECK-DUMP-LABEL: Dumping unversionedRenameDUMP +// CHECK-DUMP: in VersionedKit unversionedRenameDUMP +// CHECK-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 0 IsReplacedByActive{{$}} +// CHECK-DUMP-NEXT: SwiftNameAttr {{.+}} "unversionedRename_HEADER()" +// CHECK-DUMP-NEXT: SwiftNameAttr {{.+}} "unversionedRename_NOTES()" +// CHECK-DUMP-NOT: Attr + +// CHECK-DUMP-LABEL: Dumping TestGenericDUMP +// CHECK-VERSIONED-DUMP: SwiftImportAsNonGenericAttr {{.+}} <<invalid sloc>> +// CHECK-UNVERSIONED-DUMP: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0{{$}} +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftImportAsNonGenericAttr {{.+}} <<invalid sloc>> +// CHECK-DUMP-NOT: Attr + +// CHECK-DUMP-LABEL: Dumping Swift3RenamedOnlyDUMP +// CHECK-DUMP: in VersionedKit Swift3RenamedOnlyDUMP +// CHECK-VERSIONED-DUMP-NEXT: SwiftVersionedRemovalAttr {{.+}} Implicit 3.0 {{[0-9]+}} IsReplacedByActive{{$}} +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "SpecialSwift3Name" +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0{{$}} +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "SpecialSwift3Name" +// CHECK-DUMP-NOT: Attr + +// CHECK-DUMP-LABEL: Dumping Swift3RenamedAlsoDUMP +// CHECK-DUMP: in VersionedKit Swift3RenamedAlsoDUMP +// CHECK-VERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 IsReplacedByActive{{$}} +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <line:{{.+}}, col:{{.+}}> "Swift4Name" +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "SpecialSwift3Also" +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <line:{{.+}}, col:{{.+}}> "Swift4Name" +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0{{$}} +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "SpecialSwift3Also" +// CHECK-DUMP-NOT: Attr + +// CHECK-DUMP-LABEL: Dumping Swift4RenamedDUMP +// CHECK-DUMP: in VersionedKit Swift4RenamedDUMP +// CHECK-VERSIONED-DUMP-NEXT: SwiftVersionedRemovalAttr {{.+}} Implicit 4 {{[0-9]+}} IsReplacedByActive{{$}} +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "SpecialSwift4Name" +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 4{{$}} +// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "SpecialSwift4Name" +// CHECK-DUMP-NOT: Attr + +// CHECK-DUMP-NOT: Dumping + +// CHECK-UNVERSIONED: void acceptClosure(void (^block)(void) __attribute__((noescape))); +// CHECK-VERSIONED: void acceptClosure(void (^block)(void)); + +// CHECK-UNVERSIONED: void privateFunc(void) __attribute__((swift_private)); + +// CHECK-UNVERSIONED: typedef double MyDoubleWrapper __attribute__((swift_wrapper("struct"))); + +// CHECK-UNVERSIONED: enum __attribute__((ns_error_domain(MyErrorDomain))) MyErrorCode { +// CHECK-UNVERSIONED-NEXT: MyErrorCodeFailed = 1 +// CHECK-UNVERSIONED-NEXT: }; + +// CHECK-UNVERSIONED: __attribute__((swift_bridge("MyValueType"))) +// CHECK-UNVERSIONED: @interface MyReferenceType + +// CHECK-VERSIONED: void privateFunc(void); + +// CHECK-VERSIONED: typedef double MyDoubleWrapper; + +// CHECK-VERSIONED: enum MyErrorCode { +// CHECK-VERSIONED-NEXT: MyErrorCodeFailed = 1 +// CHECK-VERSIONED-NEXT: }; + +// CHECK-VERSIONED-NOT: __attribute__((swift_bridge("MyValueType"))) +// CHECK-VERSIONED: @interface MyReferenceType + +// CHECK-UNVERSIONED: __attribute__((swift_objc_members) +// CHECK-UNVERSIONED-NEXT: @interface TestProperties +// CHECK-VERSIONED-NOT: __attribute__((swift_objc_members) +// CHECK-VERSIONED: @interface TestProperties + +// CHECK-UNVERSIONED-LABEL: enum __attribute__((flag_enum)) FlagEnum { +// CHECK-UNVERSIONED-NEXT: FlagEnumA = 1, +// CHECK-UNVERSIONED-NEXT: FlagEnumB = 2 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((flag_enum)) NewlyFlagEnum { +// CHECK-UNVERSIONED-NEXT: NewlyFlagEnumA = 1, +// CHECK-UNVERSIONED-NEXT: NewlyFlagEnumB = 2 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((flag_enum)) APINotedFlagEnum { +// CHECK-UNVERSIONED-NEXT: APINotedFlagEnumA = 1, +// CHECK-UNVERSIONED-NEXT: APINotedFlagEnumB = 2 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) OpenEnum { +// CHECK-UNVERSIONED-NEXT: OpenEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) NewlyOpenEnum { +// CHECK-UNVERSIONED-NEXT: NewlyOpenEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) NewlyClosedEnum { +// CHECK-UNVERSIONED-NEXT: NewlyClosedEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) ClosedToOpenEnum { +// CHECK-UNVERSIONED-NEXT: ClosedToOpenEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) OpenToClosedEnum { +// CHECK-UNVERSIONED-NEXT: OpenToClosedEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) APINotedOpenEnum { +// CHECK-UNVERSIONED-NEXT: APINotedOpenEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) APINotedClosedEnum { +// CHECK-UNVERSIONED-NEXT: APINotedClosedEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; + +// CHECK-VERSIONED-LABEL: enum __attribute__((flag_enum)) FlagEnum { +// CHECK-VERSIONED-NEXT: FlagEnumA = 1, +// CHECK-VERSIONED-NEXT: FlagEnumB = 2 +// CHECK-VERSIONED-NEXT: }; +// CHECK-VERSIONED-LABEL: enum NewlyFlagEnum { +// CHECK-VERSIONED-NEXT: NewlyFlagEnumA = 1, +// CHECK-VERSIONED-NEXT: NewlyFlagEnumB = 2 +// CHECK-VERSIONED-NEXT: }; +// CHECK-VERSIONED-LABEL: enum __attribute__((flag_enum)) APINotedFlagEnum { +// CHECK-VERSIONED-NEXT: APINotedFlagEnumA = 1, +// CHECK-VERSIONED-NEXT: APINotedFlagEnumB = 2 +// CHECK-VERSIONED-NEXT: }; +// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) OpenEnum { +// CHECK-VERSIONED-NEXT: OpenEnumA = 1 +// CHECK-VERSIONED-NEXT: }; +// CHECK-VERSIONED-LABEL: enum NewlyOpenEnum { +// CHECK-VERSIONED-NEXT: NewlyOpenEnumA = 1 +// CHECK-VERSIONED-NEXT: }; +// CHECK-VERSIONED-LABEL: enum NewlyClosedEnum { +// CHECK-VERSIONED-NEXT: NewlyClosedEnumA = 1 +// CHECK-VERSIONED-NEXT: }; +// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) ClosedToOpenEnum { +// CHECK-VERSIONED-NEXT: ClosedToOpenEnumA = 1 +// CHECK-VERSIONED-NEXT: }; +// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) OpenToClosedEnum { +// CHECK-VERSIONED-NEXT: OpenToClosedEnumA = 1 +// CHECK-VERSIONED-NEXT: }; +// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) APINotedOpenEnum { +// CHECK-VERSIONED-NEXT: APINotedOpenEnumA = 1 +// CHECK-VERSIONED-NEXT: }; +// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) APINotedClosedEnum { +// CHECK-VERSIONED-NEXT: APINotedClosedEnumA = 1 +// CHECK-VERSIONED-NEXT: }; + +// These don't actually have versioned information, so we just check them once. +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) SoonToBeCFEnum { +// CHECK-UNVERSIONED-NEXT: SoonToBeCFEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) SoonToBeNSEnum { +// CHECK-UNVERSIONED-NEXT: SoonToBeNSEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) __attribute__((flag_enum)) SoonToBeCFOptions { +// CHECK-UNVERSIONED-NEXT: SoonToBeCFOptionsA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) __attribute__((flag_enum)) SoonToBeNSOptions { +// CHECK-UNVERSIONED-NEXT: SoonToBeNSOptionsA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) SoonToBeCFClosedEnum { +// CHECK-UNVERSIONED-NEXT: SoonToBeCFClosedEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) SoonToBeNSClosedEnum { +// CHECK-UNVERSIONED-NEXT: SoonToBeNSClosedEnumA = 1 +// CHECK-UNVERSIONED-NEXT: }; +// CHECK-UNVERSIONED-LABEL: enum UndoAllThatHasBeenDoneToMe { +// CHECK-UNVERSIONED-NEXT: UndoAllThatHasBeenDoneToMeA = 1 +// CHECK-UNVERSIONED-NEXT: }; diff --git a/clang/test/APINotes/yaml-convert-diags.c b/clang/test/APINotes/yaml-convert-diags.c new file mode 100644 index 0000000..1d352dc --- /dev/null +++ b/clang/test/APINotes/yaml-convert-diags.c @@ -0,0 +1,6 @@ +// RUN: rm -rf %t +// RUN: not %clang_cc1 -fsyntax-only -fapinotes %s -I %S/Inputs/BrokenHeaders2 2>&1 | FileCheck %s + +#include "SomeBrokenLib.h" + +// CHECK: error: multiple definitions of global function 'do_something_with_pointers' diff --git a/clang/test/APINotes/yaml-parse-diags.c b/clang/test/APINotes/yaml-parse-diags.c new file mode 100644 index 0000000..3ae39cc --- /dev/null +++ b/clang/test/APINotes/yaml-parse-diags.c @@ -0,0 +1,6 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -fsyntax-only -fapinotes %s -I %S/Inputs/BrokenHeaders -verify + +#include "SomeBrokenLib.h" + +// expected-error@APINotes.apinotes:4{{unknown key 'Nu llabilityOfRet'}} diff --git a/clang/test/APINotes/yaml-reader-errors.m b/clang/test/APINotes/yaml-reader-errors.m new file mode 100644 index 0000000..9e5ee34 --- /dev/null +++ b/clang/test/APINotes/yaml-reader-errors.m @@ -0,0 +1,5 @@ +// RUN: rm -rf %t +// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fapinotes -fapinotes-modules -fmodules-cache-path=%t -I %S/Inputs/yaml-reader-errors/ -fsyntax-only %s > %t.err 2>&1 +// RUN: FileCheck %S/Inputs/yaml-reader-errors/UIKit.apinotes < %t.err + +@import UIKit; diff --git a/clang/test/CodeGen/CSKY/csky-abi.c b/clang/test/CodeGen/CSKY/csky-abi.c index 2e54937..29ed661 100644 --- a/clang/test/CodeGen/CSKY/csky-abi.c +++ b/clang/test/CodeGen/CSKY/csky-abi.c @@ -185,13 +185,13 @@ void f_va_caller(void) { // CHECK: [[VA:%.*]] = alloca ptr, align 4 // CHECK: [[V:%.*]] = alloca i32, align 4 // CHECK: store ptr %fmt, ptr [[FMT_ADDR]], align 4 -// CHECK: call void @llvm.va_start(ptr [[VA]]) +// CHECK: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 // CHECK: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 // CHECK: [[TMP1:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 // CHECK: store i32 [[TMP1]], ptr [[V]], align 4 -// CHECK: call void @llvm.va_end(ptr [[VA]]) +// CHECK: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK: [[TMP2:%.*]] = load i32, ptr [[V]], align 4 // CHECK: ret i32 [[TMP2]] // CHECK: } @@ -210,13 +210,13 @@ int f_va_1(char *fmt, ...) { // CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 // CHECK-NEXT: [[V:%.*]] = alloca double, align 4 // CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR]], align 4 // CHECK-NEXT: store double [[TMP4]], ptr [[V]], align 4 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 4 // CHECK-NEXT: ret double [[TMP5]] double f_va_2(char *fmt, ...) { @@ -236,7 +236,7 @@ double f_va_2(char *fmt, ...) { // CHECK-NEXT: [[W:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[X:%.*]] = alloca double, align 4 // CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 @@ -252,7 +252,7 @@ double f_va_2(char *fmt, ...) { // CHECK-NEXT: store ptr [[ARGP_NEXT5]], ptr [[VA]], align 4 // CHECK-NEXT: [[TMP11:%.*]] = load double, ptr [[ARGP_CUR4]], align 4 // CHECK-NEXT: store double [[TMP11]], ptr [[X]], align 4 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[V]], align 4 // CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[X]], align 4 // CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]] @@ -279,7 +279,7 @@ double f_va_3(char *fmt, ...) { // CHECK-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 // CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 @@ -302,7 +302,7 @@ double f_va_3(char *fmt, ...) { // CHECK-NEXT: [[ARGP_NEXT9:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR8]], i32 16 // CHECK-NEXT: store ptr [[ARGP_NEXT9]], ptr [[VA]], align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[ARGP_CUR8]], i32 16, i1 false) -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) int f_va_4(char *fmt, ...) { __builtin_va_list va; diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c index 66b480a..fc7f1ea 100644 --- a/clang/test/CodeGen/LoongArch/abi-lp64d.c +++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c @@ -449,13 +449,13 @@ void f_va_caller(void) { // CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 8 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 8 // CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4 // CHECK-NEXT: ret i32 [[TMP1]] int f_va_int(char *fmt, ...) { diff --git a/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c b/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c index 0318242..b3f1e93 100644 --- a/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c +++ b/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c @@ -17,7 +17,7 @@ vector double vector_varargs(int count, ...) { } // CHECK: %arg_list = alloca ptr -// CHECK: call void @llvm.va_start(ptr %arg_list) +// CHECK: call void @llvm.va_start.p0(ptr %arg_list) // AIX32: for.body: // AIX32-NEXT: %argp.cur = load ptr, ptr %arg_list, align 4 @@ -41,4 +41,4 @@ vector double vector_varargs(int count, ...) { // CHECK: for.end: -// CHECK: call void @llvm.va_end(ptr %arg_list) +// CHECK: call void @llvm.va_end.p0(ptr %arg_list) diff --git a/clang/test/CodeGen/PowerPC/aix-vaargs.c b/clang/test/CodeGen/PowerPC/aix-vaargs.c index 8b8417d..724ba656 100644 --- a/clang/test/CodeGen/PowerPC/aix-vaargs.c +++ b/clang/test/CodeGen/PowerPC/aix-vaargs.c @@ -35,7 +35,7 @@ void testva (int n, ...) { // CHECK-NEXT: %v = alloca i32, align 4 // CHECK-NEXT: store i32 %n, ptr %n.addr, align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr %ap) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr %ap) // AIX32-NEXT: %argp.cur = load ptr, ptr %ap, align 4 // AIX32-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 16 @@ -48,7 +48,7 @@ void testva (int n, ...) { // AIX32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 %t, ptr align 4 %argp.cur, i32 16, i1 false) // AIX64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %t, ptr align 8 %argp.cur, i64 16, i1 false) -// CHECK-NEXT: call void @llvm.va_copy(ptr %ap2, ptr %ap) +// CHECK-NEXT: call void @llvm.va_copy.p0(ptr %ap2, ptr %ap) // AIX32-NEXT: %argp.cur1 = load ptr, ptr %ap2, align 4 // AIX32-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur1, i32 4 @@ -62,14 +62,14 @@ void testva (int n, ...) { // AIX64-NEXT: %1 = load i32, ptr %0, align 4 // AIX64-NEXT: store i32 %1, ptr %v, align 4 -// CHECK-NEXT: call void @llvm.va_end(ptr %ap2) -// CHECK-NEXT: call void @llvm.va_end(ptr %ap) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr %ap2) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr %ap) // CHECK-NEXT: ret void -// CHECK: declare void @llvm.va_start(ptr) +// CHECK: declare void @llvm.va_start.p0(ptr) // AIX32: declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) // AIX64: declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) -// CHECK: declare void @llvm.va_copy(ptr, ptr) -// CHECK: declare void @llvm.va_end(ptr) +// CHECK: declare void @llvm.va_copy.p0(ptr, ptr) +// CHECK: declare void @llvm.va_end.p0(ptr) diff --git a/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c b/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c index 396614f..2f5459d 100644 --- a/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c +++ b/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c @@ -31,7 +31,7 @@ void foo_ls(ldbl128_s); // OMP-TARGET: call void @foo_ld(ppc_fp128 noundef %[[V3]]) // OMP-HOST-LABEL: define{{.*}} void @omp( -// OMP-HOST: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]]) +// OMP-HOST: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]]) // OMP-HOST: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]], align 8 // OMP-HOST: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15 // OMP-HOST: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16) @@ -49,13 +49,13 @@ void omp(int n, ...) { } // IEEE-LABEL: define{{.*}} void @f128 -// IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]]) +// IEEE: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]]) // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]] // IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15 // IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16) // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16 // IEEE: call void @foo_fq(fp128 noundef %[[V4]]) -// IEEE: call void @llvm.va_end(ptr %[[AP]]) +// IEEE: call void @llvm.va_end.p0(ptr %[[AP]]) void f128(int n, ...) { va_list ap; va_start(ap, n); @@ -64,20 +64,20 @@ void f128(int n, ...) { } // IEEE-LABEL: define{{.*}} void @long_double -// IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]]) +// IEEE: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]]) // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]] // IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15 // IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16) // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16 // IEEE: call void @foo_ld(fp128 noundef %[[V4]]) -// IEEE: call void @llvm.va_end(ptr %[[AP]]) +// IEEE: call void @llvm.va_end.p0(ptr %[[AP]]) // IBM-LABEL: define{{.*}} void @long_double -// IBM: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]]) +// IBM: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]]) // IBM: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]] // IBM: %[[V4:[0-9a-zA-Z_.]+]] = load ppc_fp128, ptr %[[CUR]], align 8 // IBM: call void @foo_ld(ppc_fp128 noundef %[[V4]]) -// IBM: call void @llvm.va_end(ptr %[[AP]]) +// IBM: call void @llvm.va_end.p0(ptr %[[AP]]) void long_double(int n, ...) { va_list ap; va_start(ap, n); @@ -86,7 +86,7 @@ void long_double(int n, ...) { } // IEEE-LABEL: define{{.*}} void @long_double_struct -// IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]]) +// IEEE: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]]) // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]] // IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15 // IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16) @@ -96,7 +96,7 @@ void long_double(int n, ...) { // IEEE: %[[COERCE:[0-9a-zA-Z_.]+]] = getelementptr inbounds %struct.ldbl128_s, ptr %[[TMP]], i32 0, i32 0 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[COERCE]], align 16 // IEEE: call void @foo_ls(fp128 inreg %[[V4]]) -// IEEE: call void @llvm.va_end(ptr %[[AP]]) +// IEEE: call void @llvm.va_end.p0(ptr %[[AP]]) void long_double_struct(int n, ...) { va_list ap; va_start(ap, n); diff --git a/clang/test/CodeGen/RISCV/riscv-func-attr-target-err.c b/clang/test/CodeGen/RISCV/riscv-func-attr-target-err.c index 35d6973..b303d71 100644 --- a/clang/test/CodeGen/RISCV/riscv-func-attr-target-err.c +++ b/clang/test/CodeGen/RISCV/riscv-func-attr-target-err.c @@ -2,6 +2,28 @@ // RUN: not %clang_cc1 -triple riscv64 -target-feature +zifencei -target-feature +m -target-feature +a \ // RUN: -emit-llvm %s 2>&1 | FileCheck %s +#include <riscv_vector.h> + +void test_builtin() { +// CHECK: error: '__builtin_rvv_vsetvli' needs target feature zve32x + __riscv_vsetvl_e8m8(1); +} + +void test_rvv_i32_type() { +// CHECK: error: RISC-V type 'vint32m1_t' (aka '__rvv_int32m1_t') requires the 'zve32x' extension + vint32m1_t v; +} + +void test_rvv_f32_type() { +// CHECK: error: RISC-V type 'vfloat32m1_t' (aka '__rvv_float32m1_t') requires the 'zve32f' extension + vfloat32m1_t v; +} + +void test_rvv_f64_type() { +// CHECK: error: RISC-V type 'vfloat64m1_t' (aka '__rvv_float64m1_t') requires the 'zve64d' extension + vfloat64m1_t v; +} + // CHECK: error: duplicate 'arch=' in the 'target' attribute string; __attribute__((target("arch=rv64gc;arch=rv64gc_zbb"))) void testMultiArchSelectLast() {} // CHECK: error: duplicate 'cpu=' in the 'target' attribute string; diff --git a/clang/test/CodeGen/RISCV/riscv-func-attr-target.c b/clang/test/CodeGen/RISCV/riscv-func-attr-target.c index f216eaf..1f86821 100644 --- a/clang/test/CodeGen/RISCV/riscv-func-attr-target.c +++ b/clang/test/CodeGen/RISCV/riscv-func-attr-target.c @@ -4,6 +4,8 @@ // RUN: -target-feature -relax -target-feature -zfa \ // RUN: -emit-llvm %s -o - | FileCheck %s +#include <riscv_vector.h> + // CHECK-LABEL: define dso_local void @testDefault // CHECK-SAME: () #0 { void testDefault() {} @@ -35,6 +37,34 @@ testAttrFullArchAndAttrCpu() {} // CHECK-SAME: () #8 { __attribute__((target("cpu=sifive-u54"))) void testAttrCpuOnly() {} +__attribute__((target("arch=+zve32x"))) +void test_builtin_w_zve32x() { +// CHECK-LABEL: test_builtin_w_zve32x +// CHECK-SAME: #9 + __riscv_vsetvl_e8m8(1); +} + +__attribute__((target("arch=+zve32x"))) +void test_rvv_i32_type_w_zve32x() { +// CHECK-LABEL: test_rvv_i32_type_w_zve32x +// CHECK-SAME: #9 + vint32m1_t v; +} + +__attribute__((target("arch=+zve32f"))) +void test_rvv_f32_type_w_zve32f() { +// CHECK-LABEL: test_rvv_f32_type_w_zve32f +// CHECK-SAME: #11 + vfloat32m1_t v; +} + +__attribute__((target("arch=+zve64d"))) +void test_rvv_f64_type_w_zve64d() { +// CHECK-LABEL: test_rvv_f64_type_w_zve64d +// CHECK-SAME: #12 + vfloat64m1_t v; +} + //. // CHECK: attributes #0 = { {{.*}}"target-features"="+64bit,+a,+m,+save-restore,+zifencei,-relax,-zbb,-zfa" } // CHECK: attributes #1 = { {{.*}}"target-cpu"="rocket-rv64" "target-features"="+64bit,+a,+d,+f,+m,+save-restore,+v,+zicsr,+zifencei,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-relax,-zbb,-zfa" "tune-cpu"="generic-rv64" } @@ -46,3 +76,6 @@ __attribute__((target("cpu=sifive-u54"))) void testAttrCpuOnly() {} // CHECK: attributes #6 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+a,+m,+save-restore,+zbb,+zifencei,-relax,-zfa" } // CHECK: attributes #7 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+m,+save-restore,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } // CHECK: attributes #8 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+a,+c,+d,+f,+m,+save-restore,+zicsr,+zifencei,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" } +// CHECK: attributes #9 = { {{.*}}"target-features"="+64bit,+a,+m,+save-restore,+zicsr,+zifencei,+zve32x,+zvl32b,-relax,-zbb,-zfa" } +// CHECK: attributes #11 = { {{.*}}"target-features"="+64bit,+a,+f,+m,+save-restore,+zicsr,+zifencei,+zve32f,+zve32x,+zvl32b,-relax,-zbb,-zfa" } +// CHECK: attributes #12 = { {{.*}}"target-features"="+64bit,+a,+d,+f,+m,+save-restore,+zicsr,+zifencei,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl32b,+zvl64b,-relax,-zbb,-zfa" } diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c new file mode 100644 index 0000000..072d8a8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c @@ -0,0 +1,34 @@ +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v \ +// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-LLVM %s +// RUN: %clang_cc1 -std=c23 -triple riscv64 -target-feature +v \ +// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-LLVM %s + +#include <riscv_vector.h> + +// CHECK-LLVM: call riscv_vector_cc <vscale x 2 x i32> @bar +vint32m1_t __attribute__((riscv_vector_cc)) bar(vint32m1_t input); +vint32m1_t test_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t val = __riscv_vle32_v_i32m1(base, vl); + vint32m1_t ret = bar(input); + __riscv_vse32_v_i32m1(base, val, vl); + return ret; +} + +// CHECK-LLVM: call riscv_vector_cc <vscale x 2 x i32> @bar +[[riscv::vector_cc]] vint32m1_t bar(vint32m1_t input); +vint32m1_t test_vector_cc_attr2(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t val = __riscv_vle32_v_i32m1(base, vl); + vint32m1_t ret = bar(input); + __riscv_vse32_v_i32m1(base, val, vl); + return ret; +} + +// CHECK-LLVM: call <vscale x 2 x i32> @baz +vint32m1_t baz(vint32m1_t input); +vint32m1_t test_no_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t val = __riscv_vle32_v_i32m1(base, vl); + vint32m1_t ret = baz(input); + __riscv_vse32_v_i32m1(base, val, vl); + return ret; +} diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp new file mode 100644 index 0000000..c01aeb2 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp @@ -0,0 +1,32 @@ +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -std=c++11 -triple riscv64 -target-feature +v \ +// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-LLVM %s + +#include <riscv_vector.h> + +// CHECK-LLVM: call riscv_vector_cc <vscale x 2 x i32> @_Z3baru15__rvv_int32m1_t +vint32m1_t __attribute__((riscv_vector_cc)) bar(vint32m1_t input); +vint32m1_t test_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t val = __riscv_vle32_v_i32m1(base, vl); + vint32m1_t ret = bar(input); + __riscv_vse32_v_i32m1(base, val, vl); + return ret; +} + +// CHECK-LLVM: call riscv_vector_cc <vscale x 2 x i32> @_Z3baru15__rvv_int32m1_t +[[riscv::vector_cc]] vint32m1_t bar(vint32m1_t input); +vint32m1_t test_vector_cc_attr2(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t val = __riscv_vle32_v_i32m1(base, vl); + vint32m1_t ret = bar(input); + __riscv_vse32_v_i32m1(base, val, vl); + return ret; +} + +// CHECK-LLVM: call <vscale x 2 x i32> @_Z3bazu15__rvv_int32m1_t +vint32m1_t baz(vint32m1_t input); +vint32m1_t test_no_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t val = __riscv_vle32_v_i32m1(base, vl); + vint32m1_t ret = baz(input); + __riscv_vse32_v_i32m1(base, val, vl); + return ret; +} diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv.c b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.c new file mode 100644 index 0000000..5c35901 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -std=c23 -triple riscv64 -target-feature +v -verify + +__attribute__((riscv_vector_cc)) int var; // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'int'}} + +__attribute__((riscv_vector_cc)) void func(); +__attribute__((riscv_vector_cc(1))) void func_invalid(); // expected-error {{'riscv_vector_cc' attribute takes no arguments}} + +void test_no_attribute(int); // expected-note {{previous declaration is here}} +void __attribute__((riscv_vector_cc)) test_no_attribute(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}} + +[[riscv::vector_cc]] int var2; // expected-warning {{'vector_cc' only applies to function types; type here is 'int'}} + +[[riscv::vector_cc]] void func2(); +[[riscv::vector_cc(1)]] void func_invalid2(); // expected-error {{'vector_cc' attribute takes no arguments}} + +void test_no_attribute2(int); // expected-note {{previous declaration is here}} +[[riscv::vector_cc]] void test_no_attribute2(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}} diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp new file mode 100644 index 0000000..264bb7d --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 %s -triple riscv64 -target-feature +v -verify + +__attribute__((riscv_vector_cc)) int var; // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'int'}} + +__attribute__((riscv_vector_cc)) void func(); +__attribute__((riscv_vector_cc(1))) void func_invalid(); // expected-error {{'riscv_vector_cc' attribute takes no arguments}} + +void test_no_attribute(int); // expected-note {{previous declaration is here}} +void __attribute__((riscv_vector_cc)) test_no_attribute(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}} + +class test_cc { + __attribute__((riscv_vector_cc)) void member_func(); +}; + +void test_lambda() { + __attribute__((riscv_vector_cc)) auto lambda = []() { // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'auto'}} + }; +} + +[[riscv::vector_cc]] int var2; // expected-warning {{'vector_cc' only applies to function types; type here is 'int'}} + +[[riscv::vector_cc]] void func2(); +[[riscv::vector_cc(1)]] void func_invalid2(); // expected-error {{'vector_cc' attribute takes no arguments}} + +void test_no_attribute2(int); // expected-note {{previous declaration is here}} +[[riscv::vector_cc]] void test_no_attribute2(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}} + +class test_cc2 { + [[riscv::vector_cc]] void member_func(); +}; + +void test_lambda2() { + [[riscv::vector_cc]] auto lambda = []() { // expected-warning {{'vector_cc' only applies to function types; type here is 'auto'}} + }; +} diff --git a/clang/test/CodeGen/RISCV/riscv32-vararg.c b/clang/test/CodeGen/RISCV/riscv32-vararg.c index 1c4e41f2..00e04eb 100644 --- a/clang/test/CodeGen/RISCV/riscv32-vararg.c +++ b/clang/test/CodeGen/RISCV/riscv32-vararg.c @@ -80,13 +80,13 @@ void f_va_caller(void) { // CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 // CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 // CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4 // CHECK-NEXT: ret i32 [[TMP1]] // @@ -111,7 +111,7 @@ int f_va_1(char *fmt, ...) { // CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4 // CHECK-ILP32F-NEXT: [[V:%.*]] = alloca double, align 8 // CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32F-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 // CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) @@ -119,7 +119,7 @@ int f_va_1(char *fmt, ...) { // CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 // CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 // CHECK-ILP32F-NEXT: store double [[TMP1]], ptr [[V]], align 8 -// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32F-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8 // CHECK-ILP32F-NEXT: ret double [[TMP2]] // @@ -130,7 +130,7 @@ int f_va_1(char *fmt, ...) { // CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4 // CHECK-ILP32D-NEXT: [[V:%.*]] = alloca double, align 8 // CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32D-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 // CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) @@ -138,7 +138,7 @@ int f_va_1(char *fmt, ...) { // CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 // CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 // CHECK-ILP32D-NEXT: store double [[TMP1]], ptr [[V]], align 8 -// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32D-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8 // CHECK-ILP32D-NEXT: ret double [[TMP2]] // @@ -149,13 +149,13 @@ int f_va_1(char *fmt, ...) { // CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4 // CHECK-ILP32E-NEXT: [[V:%.*]] = alloca double, align 8 // CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32E-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8 // CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 // CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load double, ptr [[ARGP_CUR]], align 4 // CHECK-ILP32E-NEXT: store double [[TMP0]], ptr [[V]], align 8 -// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32E-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load double, ptr [[V]], align 8 // CHECK-ILP32E-NEXT: ret double [[TMP1]] // @@ -180,7 +180,7 @@ double f_va_2(char *fmt, ...) { // CHECK-ILP32F-NEXT: [[W:%.*]] = alloca i32, align 4 // CHECK-ILP32F-NEXT: [[X:%.*]] = alloca double, align 8 // CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32F-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 // CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) @@ -200,7 +200,7 @@ double f_va_2(char *fmt, ...) { // CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 // CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8 // CHECK-ILP32F-NEXT: store double [[TMP4]], ptr [[X]], align 8 -// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32F-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8 // CHECK-ILP32F-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8 // CHECK-ILP32F-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]] @@ -215,7 +215,7 @@ double f_va_2(char *fmt, ...) { // CHECK-ILP32D-NEXT: [[W:%.*]] = alloca i32, align 4 // CHECK-ILP32D-NEXT: [[X:%.*]] = alloca double, align 8 // CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32D-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 // CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) @@ -235,7 +235,7 @@ double f_va_2(char *fmt, ...) { // CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 // CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8 // CHECK-ILP32D-NEXT: store double [[TMP4]], ptr [[X]], align 8 -// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32D-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8 // CHECK-ILP32D-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8 // CHECK-ILP32D-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]] @@ -250,7 +250,7 @@ double f_va_2(char *fmt, ...) { // CHECK-ILP32E-NEXT: [[W:%.*]] = alloca i32, align 4 // CHECK-ILP32E-NEXT: [[X:%.*]] = alloca double, align 8 // CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32E-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8 // CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 @@ -266,7 +266,7 @@ double f_va_2(char *fmt, ...) { // CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 // CHECK-ILP32E-NEXT: [[TMP2:%.*]] = load double, ptr [[ARGP_CUR3]], align 4 // CHECK-ILP32E-NEXT: store double [[TMP2]], ptr [[X]], align 8 -// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32E-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load double, ptr [[V]], align 8 // CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load double, ptr [[X]], align 8 // CHECK-ILP32E-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], [[TMP4]] @@ -296,7 +296,7 @@ double f_va_3(char *fmt, ...) { // CHECK-ILP32F-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 // CHECK-ILP32F-NEXT: [[RET:%.*]] = alloca i32, align 4 // CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32F-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 // CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 @@ -321,7 +321,7 @@ double f_va_3(char *fmt, ...) { // CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 // CHECK-ILP32F-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 // CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) -// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32F-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 // CHECK-ILP32F-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 // CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 @@ -384,7 +384,7 @@ double f_va_3(char *fmt, ...) { // CHECK-ILP32D-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 // CHECK-ILP32D-NEXT: [[RET:%.*]] = alloca i32, align 4 // CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32D-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 // CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 @@ -409,7 +409,7 @@ double f_va_3(char *fmt, ...) { // CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 // CHECK-ILP32D-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 // CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) -// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32D-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 // CHECK-ILP32D-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 // CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 @@ -472,7 +472,7 @@ double f_va_3(char *fmt, ...) { // CHECK-ILP32E-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 // CHECK-ILP32E-NEXT: [[RET:%.*]] = alloca i32, align 4 // CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32E-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 // CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 @@ -497,7 +497,7 @@ double f_va_3(char *fmt, ...) { // CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 // CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 // CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) -// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32E-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 // CHECK-ILP32E-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 // CHECK-ILP32E-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 diff --git a/clang/test/CodeGen/RISCV/riscv64-vararg.c b/clang/test/CodeGen/RISCV/riscv64-vararg.c index 634cde6..efdffa2 100644 --- a/clang/test/CodeGen/RISCV/riscv64-vararg.c +++ b/clang/test/CodeGen/RISCV/riscv64-vararg.c @@ -135,13 +135,13 @@ void f_va_caller(void) { // CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 8 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 8 // CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4 // CHECK-NEXT: ret i32 [[TMP1]] // @@ -166,7 +166,7 @@ int f_va_1(char *fmt, ...) { // CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[V:%.*]] = alloca fp128, align 16 // CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 8 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15 // CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[TMP0]], i64 -16) @@ -174,7 +174,7 @@ int f_va_1(char *fmt, ...) { // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr [[ARGP_CUR_ALIGNED]], align 16 // CHECK-NEXT: store fp128 [[TMP1]], ptr [[V]], align 16 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr [[V]], align 16 // CHECK-NEXT: ret fp128 [[TMP2]] // @@ -199,7 +199,7 @@ long double f_va_2(char *fmt, ...) { // CHECK-NEXT: [[W:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[X:%.*]] = alloca fp128, align 16 // CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 8 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15 // CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[TMP0]], i64 -16) @@ -219,7 +219,7 @@ long double f_va_2(char *fmt, ...) { // CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load fp128, ptr [[ARGP_CUR3_ALIGNED]], align 16 // CHECK-NEXT: store fp128 [[TMP4]], ptr [[X]], align 16 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[TMP5:%.*]] = load fp128, ptr [[V]], align 16 // CHECK-NEXT: [[TMP6:%.*]] = load fp128, ptr [[X]], align 16 // CHECK-NEXT: [[ADD:%.*]] = fadd fp128 [[TMP5]], [[TMP6]] @@ -248,7 +248,7 @@ long double f_va_3(char *fmt, ...) { // CHECK-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 8 // CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 8 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8 @@ -267,7 +267,7 @@ long double f_va_3(char *fmt, ...) { // CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR5]], align 8 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[LS]], ptr align 8 [[TMP1]], i64 32, i1 false) -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0 // CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2 // CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP2]] to i64 diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c index ecf090a..bad6850 100644 --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple riscv32 -target-feature +zbb -verify %s -o - +// RUN: %clang_cc1 -triple riscv32 -target-feature +zbb -S -verify %s -o - unsigned int orc_b_64(unsigned int a) { - return __builtin_riscv_orc_b_64(a); // expected-error {{builtin requires: 'RV64'}} + return __builtin_riscv_orc_b_64(a); // expected-error {{'__builtin_riscv_orc_b_64' needs target feature zbb,64bit}} } diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb-error.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb-error.c index d2e3e76..a256bf7 100644 --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb-error.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb-error.c @@ -1,14 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple riscv64 -target-feature +zbkb -verify %s -o - +// RUN: %clang_cc1 -triple riscv64 -target-feature +zbkb -S -verify %s -o - #include <stdint.h> -uint32_t zip(uint32_t rs1) +uint32_t zip_unzip(uint32_t rs1) { - return __builtin_riscv_zip_32(rs1); // expected-error {{builtin requires: 'RV32'}} -} - -uint32_t unzip(uint32_t rs1) -{ - return __builtin_riscv_unzip_32(rs1); // expected-error {{builtin requires: 'RV32'}} + (void)__builtin_riscv_zip_32(rs1); // expected-error {{'__builtin_riscv_zip_32' needs target feature zbkb,32bit}} + return __builtin_riscv_unzip_32(rs1); // expected-error {{'__builtin_riscv_unzip_32' needs target feature zbkb,32bit}} } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-error.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-error.c index 6ec9b05..ecb6c5f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-error.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-error.c @@ -11,7 +11,7 @@ // CHECK-RV64V-NEXT: ret i32 [[CONV]] // -// CHECK-RV64-ERR: error: builtin requires at least one of the following extensions: 'Zve32x' +// CHECK-RV64-ERR: error: '__builtin_rvv_vsetvli' needs target feature zve32x int test() { return __builtin_rvv_vsetvli(1, 0, 0); diff --git a/clang/test/CodeGen/WebAssembly/wasm-varargs.c b/clang/test/CodeGen/WebAssembly/wasm-varargs.c index c475de1..e794857 100644 --- a/clang/test/CodeGen/WebAssembly/wasm-varargs.c +++ b/clang/test/CodeGen/WebAssembly/wasm-varargs.c @@ -10,13 +10,13 @@ // CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 // CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 // CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4 // CHECK-NEXT: ret i32 [[TMP1]] // @@ -38,7 +38,7 @@ int test_i32(char *fmt, ...) { // CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 // CHECK-NEXT: [[V:%.*]] = alloca i64, align 8 // CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 // CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) @@ -46,7 +46,7 @@ int test_i32(char *fmt, ...) { // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARGP_CUR_ALIGNED]], align 8 // CHECK-NEXT: store i64 [[TMP1]], ptr [[V]], align 8 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[V]], align 8 // CHECK-NEXT: ret i64 [[TMP2]] // @@ -73,13 +73,13 @@ struct S { // CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 // CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 // CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT]], ptr align 4 [[TMP0]], i32 12, i1 false) -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: ret void // struct S test_struct(char *fmt, ...) { @@ -102,7 +102,7 @@ struct Z {}; // CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 // CHECK-NEXT: [[U:%.*]] = alloca [[STRUCT_Z:%.*]], align 1 // CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]) // CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 0 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 @@ -112,7 +112,7 @@ struct Z {}; // CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT]], ptr align 4 [[TMP0]], i32 12, i1 false) -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]]) // CHECK-NEXT: ret void // struct S test_empty_struct(char *fmt, ...) { diff --git a/clang/test/CodeGen/X86/va-arg-sse.c b/clang/test/CodeGen/X86/va-arg-sse.c index e040b0e..b7d00da 100644 --- a/clang/test/CodeGen/X86/va-arg-sse.c +++ b/clang/test/CodeGen/X86/va-arg-sse.c @@ -21,7 +21,7 @@ struct S a[5]; // CHECK-NEXT: store i32 0, ptr [[J]], align 4 // CHECK-NEXT: store i32 0, ptr [[K]], align 4 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) // CHECK-NEXT: store ptr getelementptr inbounds ([5 x %struct.S], ptr @a, i64 0, i64 2), ptr [[P]], align 8 // CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 // CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY2]], i32 0, i32 1 @@ -52,7 +52,7 @@ struct S a[5]; // CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARG]], ptr align 4 [[VAARG_ADDR]], i64 12, i1 false) // CHECK-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_end(ptr [[ARRAYDECAY3]]) +// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[ARRAYDECAY3]]) // CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[P]], align 8 // CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[TMP15]], null // CHECK-NEXT: br i1 [[TOBOOL]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]] diff --git a/clang/test/CodeGen/X86/x86_64-vaarg.c b/clang/test/CodeGen/X86/x86_64-vaarg.c index a18ba83..07c6df1 100644 --- a/clang/test/CodeGen/X86/x86_64-vaarg.c +++ b/clang/test/CodeGen/X86/x86_64-vaarg.c @@ -13,7 +13,7 @@ typedef struct { struct {} a; } empty; // CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1 // CHECK-NEXT: store i32 [[Z]], ptr [[Z_ADDR]], align 4 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) // CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 0, i1 false) // CHECK-NEXT: ret void @@ -37,7 +37,7 @@ typedef struct { // CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 // CHECK-NEXT: store i32 [[Z]], ptr [[Z_ADDR]], align 4 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) // CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 // CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1 // CHECK-NEXT: [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4 diff --git a/clang/test/CodeGen/aarch64-ABI-align-packed.c b/clang/test/CodeGen/aarch64-ABI-align-packed.c index 2b029f6..13c68fe 100644 --- a/clang/test/CodeGen/aarch64-ABI-align-packed.c +++ b/clang/test/CodeGen/aarch64-ABI-align-packed.c @@ -73,7 +73,7 @@ __attribute__((noinline)) void named_arg_non_packed_struct(double d0, double d1, // CHECK-NEXT: entry: // CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6:[0-9]+]] -// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] // CHECK-NEXT: ret void void variadic_non_packed_struct(double d0, double d1, double d2, double d3, @@ -128,7 +128,7 @@ __attribute__((noinline)) void named_arg_packed_struct(double d0, double d1, dou // CHECK-NEXT: entry: // CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] // CHECK-NEXT: ret void void variadic_packed_struct(double d0, double d1, double d2, double d3, @@ -183,7 +183,7 @@ __attribute__((noinline)) void named_arg_packed_member(double d0, double d1, dou // CHECK-NEXT: entry: // CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] // CHECK-NEXT: ret void void variadic_packed_member(double d0, double d1, double d2, double d3, @@ -238,7 +238,7 @@ __attribute__((noinline)) void named_arg_aligned_struct_8(double d0, double d1, // CHECK-NEXT: entry: // CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] // CHECK-NEXT: ret void void variadic_aligned_struct_8(double d0, double d1, double d2, double d3, @@ -293,7 +293,7 @@ __attribute__((noinline)) void named_arg_aligned_member_8(double d0, double d1, // CHECK-NEXT: entry: // CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] // CHECK-NEXT: ret void void variadic_aligned_member_8(double d0, double d1, double d2, double d3, @@ -348,7 +348,7 @@ __attribute__((noinline)) void named_arg_pragma_packed_struct_8(double d0, doubl // CHECK-NEXT: entry: // CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] // CHECK-NEXT: ret void void variadic_pragma_packed_struct_8(double d0, double d1, double d2, double d3, @@ -403,7 +403,7 @@ __attribute__((noinline)) void named_arg_pragma_packed_struct_4(double d0, doubl // CHECK-NEXT: entry: // CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]] // CHECK-NEXT: ret void void variadic_pragma_packed_struct_4(double d0, double d1, double d2, double d3, diff --git a/clang/test/CodeGen/aarch64-varargs.c b/clang/test/CodeGen/aarch64-varargs.c index 44b8702..ee4e88e 100644 --- a/clang/test/CodeGen/aarch64-varargs.c +++ b/clang/test/CodeGen/aarch64-varargs.c @@ -837,7 +837,7 @@ void check_start(int n, ...) { va_list the_list; va_start(the_list, n); // CHECK: [[THE_LIST:%[a-z_0-9]+]] = alloca %struct.__va_list -// CHECK: call void @llvm.va_start(ptr [[THE_LIST]]) +// CHECK: call void @llvm.va_start.p0(ptr [[THE_LIST]]) } typedef struct {} empty; diff --git a/clang/test/CodeGen/arm-varargs.c b/clang/test/CodeGen/arm-varargs.c index f754c7f..ab4ac46 100644 --- a/clang/test/CodeGen/arm-varargs.c +++ b/clang/test/CodeGen/arm-varargs.c @@ -264,5 +264,5 @@ void check_start(int n, ...) { va_list the_list; va_start(the_list, n); // CHECK: [[THE_LIST:%[a-z0-9._]+]] = alloca %struct.__va_list -// CHECK: call void @llvm.va_start(ptr [[THE_LIST]]) +// CHECK: call void @llvm.va_start.p0(ptr [[THE_LIST]]) } diff --git a/clang/test/CodeGen/hexagon-linux-vararg.c b/clang/test/CodeGen/hexagon-linux-vararg.c index 033e72a..84945e8 100644 --- a/clang/test/CodeGen/hexagon-linux-vararg.c +++ b/clang/test/CodeGen/hexagon-linux-vararg.c @@ -9,7 +9,7 @@ struct AAA { int d; }; -// CHECK: call void @llvm.va_start(ptr %arraydecay) +// CHECK: call void @llvm.va_start.p0(ptr %arraydecay) // CHECK: %arraydecay1 = getelementptr inbounds [1 x %struct.__va_list_tag], // ptr %ap, i32 0, i32 0 // CHECK: br label %vaarg.maybe_reg diff --git a/clang/test/CodeGen/mips-varargs.c b/clang/test/CodeGen/mips-varargs.c index 052aedd..029f000 100644 --- a/clang/test/CodeGen/mips-varargs.c +++ b/clang/test/CodeGen/mips-varargs.c @@ -29,7 +29,7 @@ int test_i32(char *fmt, ...) { // ALL: [[V:%.*]] = alloca i32, align 4 // NEW: [[PROMOTION_TEMP:%.*]] = alloca i32, align 4 // -// ALL: call void @llvm.va_start(ptr %va) +// ALL: call void @llvm.va_start.p0(ptr %va) // ALL: [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]] // O32: [[AP_NEXT:%.+]] = getelementptr inbounds i8, ptr [[AP_CUR]], [[$INTPTR_T:i32]] [[$CHUNKSIZE:4]] // NEW: [[AP_NEXT:%.+]] = getelementptr inbounds i8, ptr [[AP_CUR]], [[$INTPTR_T:i32|i64]] [[$CHUNKSIZE:8]] @@ -45,7 +45,7 @@ int test_i32(char *fmt, ...) { // NEW: [[ARG:%.+]] = load i32, ptr [[PROMOTION_TEMP]], align 4 // ALL: store i32 [[ARG]], ptr [[V]], align 4 // -// ALL: call void @llvm.va_end(ptr %va) +// ALL: call void @llvm.va_end.p0(ptr %va) // ALL: } long long test_i64(char *fmt, ...) { @@ -61,7 +61,7 @@ long long test_i64(char *fmt, ...) { // ALL-LABEL: define{{.*}} i64 @test_i64(ptr{{.*}} %fmt, ...) // // ALL: %va = alloca ptr, align [[$PTRALIGN]] -// ALL: call void @llvm.va_start(ptr %va) +// ALL: call void @llvm.va_start.p0(ptr %va) // ALL: [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]] // // i64 is 8-byte aligned, while this is within O32's stack alignment there's no @@ -74,7 +74,7 @@ long long test_i64(char *fmt, ...) { // // ALL: [[ARG:%.+]] = load i64, ptr [[AP_CUR]], align 8 // -// ALL: call void @llvm.va_end(ptr %va) +// ALL: call void @llvm.va_end.p0(ptr %va) // ALL: } char *test_ptr(char *fmt, ...) { @@ -92,7 +92,7 @@ char *test_ptr(char *fmt, ...) { // ALL: %va = alloca ptr, align [[$PTRALIGN]] // ALL: [[V:%.*]] = alloca ptr, align [[$PTRALIGN]] // N32: [[AP_CAST:%.+]] = alloca ptr, align 4 -// ALL: call void @llvm.va_start(ptr %va) +// ALL: call void @llvm.va_start.p0(ptr %va) // ALL: [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]] // ALL: [[AP_NEXT:%.+]] = getelementptr inbounds i8, ptr [[AP_CUR]], [[$INTPTR_T]] [[$CHUNKSIZE]] // ALL: store ptr [[AP_NEXT]], ptr %va, align [[$PTRALIGN]] @@ -109,7 +109,7 @@ char *test_ptr(char *fmt, ...) { // N64: [[ARG:%.+]] = load ptr, ptr [[AP_CUR]], align [[$PTRALIGN]] // ALL: store ptr [[ARG]], ptr [[V]], align [[$PTRALIGN]] // -// ALL: call void @llvm.va_end(ptr %va) +// ALL: call void @llvm.va_end.p0(ptr %va) // ALL: } int test_v4i32(char *fmt, ...) { @@ -128,7 +128,7 @@ int test_v4i32(char *fmt, ...) { // // ALL: %va = alloca ptr, align [[$PTRALIGN]] // ALL: [[V:%.+]] = alloca <4 x i32>, align 16 -// ALL: call void @llvm.va_start(ptr %va) +// ALL: call void @llvm.va_start.p0(ptr %va) // ALL: [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]] // // Vectors are 16-byte aligned, however the O32 ABI has a maximum alignment of @@ -152,7 +152,7 @@ int test_v4i32(char *fmt, ...) { // N32: [[ARG:%.+]] = load <4 x i32>, ptr [[AP_CUR]], align 16 // ALL: store <4 x i32> [[ARG]], ptr [[V]], align 16 // -// ALL: call void @llvm.va_end(ptr %va) +// ALL: call void @llvm.va_end.p0(ptr %va) // ALL: [[VECEXT:%.+]] = extractelement <4 x i32> {{.*}}, i32 0 // ALL: ret i32 [[VECEXT]] // ALL: } diff --git a/clang/test/CodeGen/pr53127.cpp b/clang/test/CodeGen/pr53127.cpp index 97fe129..5a52b48 100644 --- a/clang/test/CodeGen/pr53127.cpp +++ b/clang/test/CodeGen/pr53127.cpp @@ -34,7 +34,7 @@ void operator delete(void*); // CHECK-NEXT: br i1 [[CALL6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] // CHECK: cond.true7: // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[L]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) // CHECK-NEXT: br label [[COND_END9:%.*]] // CHECK: cond.false8: // CHECK-NEXT: br label [[COND_END9]] @@ -44,7 +44,7 @@ void operator delete(void*); // CHECK: cond.true11: // CHECK-NEXT: [[ARRAYDECAY12:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[L]], i64 0, i64 0 // CHECK-NEXT: [[ARRAYDECAY13:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[L2]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_copy(ptr [[ARRAYDECAY12]], ptr [[ARRAYDECAY13]]) +// CHECK-NEXT: call void @llvm.va_copy.p0(ptr [[ARRAYDECAY12]], ptr [[ARRAYDECAY13]]) // CHECK-NEXT: br label [[COND_END15:%.*]] // CHECK: cond.false14: // CHECK-NEXT: br label [[COND_END15]] diff --git a/clang/test/CodeGen/varargs-with-nonzero-default-address-space.c b/clang/test/CodeGen/varargs-with-nonzero-default-address-space.c new file mode 100644 index 0000000..b087da34 --- /dev/null +++ b/clang/test/CodeGen/varargs-with-nonzero-default-address-space.c @@ -0,0 +1,46 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple spirv64-unknown-unknown -fcuda-is-device -emit-llvm -o - %s | FileCheck %s + +struct x { + double b; + long a; +}; + +// CHECK-LABEL: define spir_func void @testva( +// CHECK-SAME: i32 noundef [[N:%.*]], ...) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AP:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_X:%.*]], align 8 +// CHECK-NEXT: [[AP2:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VARET:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr [[N_ADDR]] to ptr addrspace(4) +// CHECK-NEXT: [[AP_ASCAST:%.*]] = addrspacecast ptr [[AP]] to ptr addrspace(4) +// CHECK-NEXT: [[T_ASCAST:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4) +// CHECK-NEXT: [[AP2_ASCAST:%.*]] = addrspacecast ptr [[AP2]] to ptr addrspace(4) +// CHECK-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr [[V]] to ptr addrspace(4) +// CHECK-NEXT: [[VARET_ASCAST:%.*]] = addrspacecast ptr [[VARET]] to ptr addrspace(4) +// CHECK-NEXT: store i32 [[N]], ptr addrspace(4) [[N_ADDR_ASCAST]], align 4 +// CHECK-NEXT: call void @llvm.va_start.p4(ptr addrspace(4) [[AP_ASCAST]]) +// CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr addrspace(4) [[AP_ASCAST]], ptr +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 8 [[T_ASCAST]], ptr align 8 [[TMP0]], i64 16, i1 false) +// CHECK-NEXT: call void @llvm.va_copy.p4(ptr addrspace(4) [[AP2_ASCAST]], ptr addrspace(4) [[AP_ASCAST]]) +// CHECK-NEXT: [[TMP1:%.*]] = va_arg ptr addrspace(4) [[AP2_ASCAST]], i32 +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[VARET_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[VARET_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[V_ASCAST]], align 4 +// CHECK-NEXT: call void @llvm.va_end.p4(ptr addrspace(4) [[AP2_ASCAST]]) +// CHECK-NEXT: call void @llvm.va_end.p4(ptr addrspace(4) [[AP_ASCAST]]) +// CHECK-NEXT: ret void + +void testva(int n, ...) { + __builtin_va_list ap; + __builtin_va_start(ap, n); + struct x t = __builtin_va_arg(ap, struct x); + __builtin_va_list ap2; + __builtin_va_copy(ap2, ap); + int v = __builtin_va_arg(ap2, int); + __builtin_va_end(ap2); + __builtin_va_end(ap); +} diff --git a/clang/test/CodeGen/xcore-abi.c b/clang/test/CodeGen/xcore-abi.c index 4dd0f22..bb8d2fe 100644 --- a/clang/test/CodeGen/xcore-abi.c +++ b/clang/test/CodeGen/xcore-abi.c @@ -28,7 +28,7 @@ void testva (int n, ...) { // CHECK: [[AP:%[a-z0-9]+]] = alloca ptr, align 4 // CHECK: [[V5:%[a-z0-9]+]] = alloca %struct.x, align 4 // CHECK: [[TMP:%[a-z0-9]+]] = alloca [4 x i32], align 4 - // CHECK: call void @llvm.va_start(ptr [[AP]]) + // CHECK: call void @llvm.va_start.p0(ptr [[AP]]) char* v1 = va_arg (ap, char*); f(v1); diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp index 5a4270a..a1d17c8 100644 --- a/clang/test/CodeGenCXX/ext-int.cpp +++ b/clang/test/CodeGenCXX/ext-int.cpp @@ -159,9 +159,9 @@ void TakesVarargs(int i, ...) { // WIN: %[[ARGS:.+]] = alloca ptr __builtin_va_start(args, i); // LIN64: %[[STARTAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]] - // LIN64: call void @llvm.va_start(ptr %[[STARTAD]]) - // LIN32: call void @llvm.va_start(ptr %[[ARGS]]) - // WIN: call void @llvm.va_start(ptr %[[ARGS]]) + // LIN64: call void @llvm.va_start.p0(ptr %[[STARTAD]]) + // LIN32: call void @llvm.va_start.p0(ptr %[[ARGS]]) + // WIN: call void @llvm.va_start.p0(ptr %[[ARGS]]) _BitInt(92) A = __builtin_va_arg(args, _BitInt(92)); // LIN64: %[[AD1:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]] @@ -302,9 +302,9 @@ void TakesVarargs(int i, ...) { __builtin_va_end(args); // LIN64: %[[ENDAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]] - // LIN64: call void @llvm.va_end(ptr %[[ENDAD]]) - // LIN32: call void @llvm.va_end(ptr %[[ARGS]]) - // WIN: call void @llvm.va_end(ptr %[[ARGS]]) + // LIN64: call void @llvm.va_end.p0(ptr %[[ENDAD]]) + // LIN32: call void @llvm.va_end.p0(ptr %[[ARGS]]) + // WIN: call void @llvm.va_end.p0(ptr %[[ARGS]]) } void typeid_tests() { // LIN: define{{.*}} void @_Z12typeid_testsv() diff --git a/clang/test/CodeGenCXX/ibm128-declarations.cpp b/clang/test/CodeGenCXX/ibm128-declarations.cpp index 5ee4f35..e0187e2 100644 --- a/clang/test/CodeGenCXX/ibm128-declarations.cpp +++ b/clang/test/CodeGenCXX/ibm128-declarations.cpp @@ -107,13 +107,13 @@ int main(void) { // CHECK: define dso_local noundef ppc_fp128 @_Z10func_vaargiz(i32 noundef signext %n, ...) // CHECK: entry: // CHECK: store i32 %n, ptr %n.addr, align 4 -// CHECK: call void @llvm.va_start(ptr %ap) +// CHECK: call void @llvm.va_start.p0(ptr %ap) // CHECK: %argp.cur = load ptr, ptr %ap, align 8 // CHECK: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 16 // CHECK: store ptr %argp.next, ptr %ap, align 8 // CHECK: %0 = load ppc_fp128, ptr %argp.cur, align 8 // CHECK: store ppc_fp128 %0, ptr %r, align 16 -// CHECK: call void @llvm.va_end(ptr %ap) +// CHECK: call void @llvm.va_end.p0(ptr %ap) // CHECK: %1 = load ppc_fp128, ptr %r, align 16 // CHECK: ret ppc_fp128 %1 // CHECK: } diff --git a/clang/test/CodeGenCXX/x86_64-vaarg.cpp b/clang/test/CodeGenCXX/x86_64-vaarg.cpp index d221c18..985a0cc 100644 --- a/clang/test/CodeGenCXX/x86_64-vaarg.cpp +++ b/clang/test/CodeGenCXX/x86_64-vaarg.cpp @@ -11,7 +11,7 @@ typedef struct { struct {} a; } empty; // CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1 // CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) // CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 1, i1 false) // CHECK-NEXT: ret void @@ -34,7 +34,7 @@ typedef struct { // CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 // CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) // CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 // CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1 // CHECK-NEXT: [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4 diff --git a/clang/test/Driver/aarch64-sve.c b/clang/test/Driver/aarch64-sve.c index f34b270..4a33c2e 100644 --- a/clang/test/Driver/aarch64-sve.c +++ b/clang/test/Driver/aarch64-sve.c @@ -6,12 +6,11 @@ // RUN: %clang --target=aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-NOSVE %s // GENERICV8A-NOSVE-NOT: "-target-feature" "+sve" -// The 32-bit floating point matrix multiply extension is enabled by default -// for armv8.6-a targets (or later) with SVE, and can optionally be enabled for -// any target from armv8.2a onwards (we don't enforce not using it with earlier -// targets). +// The 32-bit floating point matrix multiply extension is an optional feature +// that can be used for any target from armv8.2a and onwards. This can be +// enabled using the `+f32mm` option.`. // RUN: %clang --target=aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=NO-F32MM %s -// RUN: %clang --target=aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s +// RUN: %clang --target=aarch64 -march=armv8.6a+sve+f32mm -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s // RUN: %clang --target=aarch64 -march=armv8.5a+f32mm -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s // NO-F32MM-NOT: "-target-feature" "+f32mm" // F32MM: "-target-feature" "+f32mm" diff --git a/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/AAA.h b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/AAA.h new file mode 100644 index 0000000..993d5d4 --- /dev/null +++ b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/AAA.h @@ -0,0 +1,3 @@ +#ifndef PUBLIC_UMBRELLA_HEADER_FIRST +#error "Public umbrella header was not included first!" +#endif diff --git a/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/SpecialUmbrella.h b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/SpecialUmbrella.h new file mode 100644 index 0000000..2599ff1 --- /dev/null +++ b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/SpecialUmbrella.h @@ -0,0 +1 @@ +#define PUBLIC_UMBRELLA_HEADER_FIRST diff --git a/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/AAA_Private.h b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/AAA_Private.h new file mode 100644 index 0000000..557209b --- /dev/null +++ b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/AAA_Private.h @@ -0,0 +1,3 @@ +#ifndef PRIVATE_UMBRELLA_HEADER_FIRST +#error "Private umbrella header was not included first!" +#endif diff --git a/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h new file mode 100644 index 0000000..fd5b49b --- /dev/null +++ b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h @@ -0,0 +1 @@ +#define PRIVATE_UMBRELLA_HEADER_FIRST diff --git a/clang/test/InstallAPI/umbrella-headers-unix.test b/clang/test/InstallAPI/umbrella-headers-unix.test new file mode 100644 index 0000000..4611877 --- /dev/null +++ b/clang/test/InstallAPI/umbrella-headers-unix.test @@ -0,0 +1,40 @@ +// UNSUPPORTED: system-windows + +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json +; RUN: mkdir %t/Frameworks/ +; RUN: cp -r %S/Inputs/Umbrella/Umbrella.framework %t/Frameworks/ + +// Only validate path based input that rely on regex matching on unix based file systems. +; RUN: clang-installapi --target=arm64-apple-macosx13 \ +; RUN: -install_name /System/Library/Frameworks/Umbrella2.framework/Versions/A/Umbrella \ +; RUN: -ObjC -F%t/Frameworks/ %t/inputs.json \ +; RUN: --public-umbrella-header=%t/Frameworks/Umbrella.framework/Headers/SpecialUmbrella.h \ +; RUN: -private-umbrella-header \ +; RUN: %t/Frameworks/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h \ +; RUN: -o %t/output.tbd 2>&1 | FileCheck -allow-empty %s + +; CHECK-NOT: error +; CHECK-NOT: warning + +;--- inputs.json.in +{ + "headers": [ { + "path" : "DSTROOT/Frameworks/Umbrella.framework/Headers/AAA.h", + "type" : "public" + }, + { + "path" : "DSTROOT/Frameworks/Umbrella.framework/Headers/SpecialUmbrella.h", + "type" : "public" + }, + { + "path" : "DSTROOT/Frameworks/Umbrella.framework/PrivateHeaders/AAA_Private.h", + "type" : "private" + }, + { + "path" : "DSTROOT/Frameworks/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h", + "type" : "private" + }], + "version": "3" +} diff --git a/clang/test/InstallAPI/umbrella-headers.test b/clang/test/InstallAPI/umbrella-headers.test new file mode 100644 index 0000000..ce9c506 --- /dev/null +++ b/clang/test/InstallAPI/umbrella-headers.test @@ -0,0 +1,48 @@ +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json +; RUN: cp -r %S/Inputs/Umbrella/Umbrella.framework %t/Frameworks/ + +// Check base filename matches. +; RUN: clang-installapi --target=arm64-apple-macosx13 \ +; RUN: -install_name /System/Library/Frameworks/Umbrella.framework/Versions/A/Umbrella \ +; RUN: -ObjC -F%t/Frameworks/ %t/inputs.json \ +; RUN: --public-umbrella-header=SpecialUmbrella.h \ +; RUN: --private-umbrella-header=SpecialPrivateUmbrella.h \ +; RUN: -o %t/output.tbd 2>&1 | FileCheck -allow-empty %s + +// Try missing umbrella header argument. +; RUN: not clang-installapi --target=arm64-apple-macosx13 \ +; RUN: -install_name /System/Library/Frameworks/Umbrella.framework/Versions/A/Umbrella \ +; RUN: -ObjC -F%t/Frameworks/ %t/inputs.json \ +; RUN: --public-umbrella-header=Ignore.h \ +; RUN: -o %t/output.tbd 2>&1 | FileCheck %s -check-prefix=ERR + +; ERR: error: public umbrella header file not found in input: 'Ignore.h' + +; CHECK-NOT: error +; CHECK-NOT: warning + +;--- Frameworks/Umbrella.framework/Headers/Ignore.h +#error "This header should be ignored" + +;--- inputs.json.in +{ + "headers": [ { + "path" : "DSTROOT/Frameworks/Umbrella.framework/Headers/AAA.h", + "type" : "public" + }, + { + "path" : "DSTROOT/Frameworks/Umbrella.framework/Headers/SpecialUmbrella.h", + "type" : "public" + }, + { + "path" : "DSTROOT/Frameworks/Umbrella.framework/PrivateHeaders/AAA_Private.h", + "type" : "private" + }, + { + "path" : "DSTROOT/Frameworks/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h", + "type" : "private" + }], + "version": "3" +} diff --git a/clang/test/Modules/codegen.test b/clang/test/Modules/codegen.test index 7760205..0af630a 100644 --- a/clang/test/Modules/codegen.test +++ b/clang/test/Modules/codegen.test @@ -26,7 +26,7 @@ USE: $_Z4instIiEvv = comdat any USE: $_Z10always_inlv = comdat any FOO: $_ZN13implicit_dtorD2Ev = comdat any FOO: define weak_odr void @_Z2f1PKcz(ptr noundef %fmt, ...) #{{[0-9]+}} comdat -FOO: call void @llvm.va_start(ptr %{{[a-zA-Z0-9]*}}) +FOO: call void @llvm.va_start.p0(ptr %{{[a-zA-Z0-9]*}}) Test that implicit special members are emitted into the FOO module if they're ODR used there, otherwise emit them linkonce_odr as usual in the use. diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 9f8a8bd..85762b7 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -196,7 +196,7 @@ // CHECK-8_6-NOT: __ARM_FEATURE_SHA3 1 // CHECK-8_6-NOT: __ARM_FEATURE_SM4 1 -// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6 %s +// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve+f32mm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6 %s // CHECK-SVE-8_6: __ARM_FEATURE_SVE 1 // CHECK-SVE-8_6: __ARM_FEATURE_SVE_BF16 1 // CHECK-SVE-8_6: __ARM_FEATURE_SVE_MATMUL_FP32 1 diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp index b7ea0d0..787cc80 100644 --- a/clang/test/SemaTemplate/concepts.cpp +++ b/clang/test/SemaTemplate/concepts.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++20 -verify %s +// RUN: %clang_cc1 -std=c++20 -ferror-limit 0 -verify %s namespace PR47043 { template<typename T> concept True = true; @@ -1114,3 +1114,11 @@ void foo() { } } // namespace GH64808 + +namespace GH86757_1 { +template <typename...> concept b = false; +template <typename> concept c = b<>; +template <typename d> concept f = c< d >; +template <f> struct e; // expected-note {{}} +template <f d> struct e<d>; // expected-error {{class template partial specialization is not more specialized than the primary template}} +} diff --git a/clang/tools/clang-installapi/InstallAPIOpts.td b/clang/tools/clang-installapi/InstallAPIOpts.td index ab9e1fe..71532c9 100644 --- a/clang/tools/clang-installapi/InstallAPIOpts.td +++ b/clang/tools/clang-installapi/InstallAPIOpts.td @@ -61,3 +61,15 @@ def exclude_private_header : Separate<["-"], "exclude-private-header">, HelpText<"Exclude private header from parsing">; def exclude_private_header_EQ : Joined<["--"], "exclude-private-header=">, Alias<exclude_private_header>; +def public_umbrella_header : Separate<["-"], "public-umbrella-header">, + MetaVarName<"<path>">, HelpText<"Specify the public umbrella header location">; +def public_umbrella_header_EQ : Joined<["--"], "public-umbrella-header=">, + Alias<public_umbrella_header>; +def private_umbrella_header : Separate<["-"], "private-umbrella-header">, + MetaVarName<"<path>">, HelpText<"Specify the private umbrella header location">; +def private_umbrella_header_EQ : Joined<["--"], "private-umbrella-header=">, + Alias<private_umbrella_header>; +def project_umbrella_header : Separate<["-"], "project-umbrella-header">, + MetaVarName<"<path>">, HelpText<"Specify the project umbrella header location">; +def project_umbrella_header_EQ : Joined<["--"], "project-umbrella-header=">, + Alias<project_umbrella_header>; diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 4f79c62..8e4a1b0 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -270,6 +270,16 @@ Options::processAndFilterOutInstallAPIOptions(ArrayRef<const char *> Args) { OPT_exclude_project_header)) return {}; + // Handle umbrella headers. + if (const Arg *A = ParsedArgs.getLastArg(OPT_public_umbrella_header)) + DriverOpts.PublicUmbrellaHeader = A->getValue(); + + if (const Arg *A = ParsedArgs.getLastArg(OPT_private_umbrella_header)) + DriverOpts.PrivateUmbrellaHeader = A->getValue(); + + if (const Arg *A = ParsedArgs.getLastArg(OPT_project_umbrella_header)) + DriverOpts.ProjectUmbrellaHeader = A->getValue(); + /// Any unclaimed arguments should be forwarded to the clang driver. std::vector<const char *> ClangDriverArgs(ParsedArgs.size()); for (const Arg *A : ParsedArgs) { @@ -323,6 +333,15 @@ Options::Options(DiagnosticsEngine &Diag, FileManager *FM, } } +static const Regex Rule("(.+)/(.+)\\.framework/"); +static StringRef getFrameworkNameFromInstallName(StringRef InstallName) { + SmallVector<StringRef, 3> Match; + Rule.match(InstallName, &Match); + if (Match.empty()) + return ""; + return Match.back(); +} + InstallAPIContext Options::createContext() { InstallAPIContext Ctx; Ctx.FM = FM; @@ -339,6 +358,11 @@ InstallAPIContext Options::createContext() { Ctx.OutputLoc = DriverOpts.OutputPath; Ctx.LangMode = FEOpts.LangMode; + // Attempt to find umbrella headers by capturing framework name. + StringRef FrameworkName; + if (!LinkerOpts.IsDylib) + FrameworkName = getFrameworkNameFromInstallName(LinkerOpts.InstallName); + // Process inputs. for (const std::string &ListPath : DriverOpts.FileLists) { auto Buffer = FM->getBufferForFile(ListPath); @@ -357,8 +381,7 @@ InstallAPIContext Options::createContext() { assert(Type != HeaderType::Unknown && "Missing header type."); for (const StringRef Path : Headers) { if (!FM->getOptionalFileRef(Path)) { - Diags->Report(diag::err_no_such_header_file) - << Path << (unsigned)Type - 1; + Diags->Report(diag::err_no_such_header_file) << Path << (unsigned)Type; return false; } SmallString<PATH_MAX> FullPath(Path); @@ -382,6 +405,7 @@ InstallAPIContext Options::createContext() { std::vector<std::unique_ptr<HeaderGlob>> ExcludedHeaderGlobs; std::set<FileEntryRef> ExcludedHeaderFiles; auto ParseGlobs = [&](const PathSeq &Paths, HeaderType Type) { + assert(Type != HeaderType::Unknown && "Missing header type."); for (const StringRef Path : Paths) { auto Glob = HeaderGlob::create(Path, Type); if (Glob) @@ -424,6 +448,57 @@ InstallAPIContext Options::createContext() { if (!Glob->didMatch()) Diags->Report(diag::warn_glob_did_not_match) << Glob->str(); + // Mark any explicit or inferred umbrella headers. If one exists, move + // that to the beginning of the input headers. + auto MarkandMoveUmbrellaInHeaders = [&](llvm::Regex &Regex, + HeaderType Type) -> bool { + auto It = find_if(Ctx.InputHeaders, [&Regex, Type](const HeaderFile &H) { + return (H.getType() == Type) && Regex.match(H.getPath()); + }); + + if (It == Ctx.InputHeaders.end()) + return false; + It->setUmbrellaHeader(); + + // Because there can be an umbrella header per header type, + // find the first non umbrella header to swap position with. + auto BeginPos = find_if(Ctx.InputHeaders, [](const HeaderFile &H) { + return !H.isUmbrellaHeader(); + }); + if (BeginPos != Ctx.InputHeaders.end() && BeginPos < It) + std::swap(*BeginPos, *It); + return true; + }; + + auto FindUmbrellaHeader = [&](StringRef HeaderPath, HeaderType Type) -> bool { + assert(Type != HeaderType::Unknown && "Missing header type."); + if (!HeaderPath.empty()) { + auto EscapedString = Regex::escape(HeaderPath); + Regex UmbrellaRegex(EscapedString); + if (!MarkandMoveUmbrellaInHeaders(UmbrellaRegex, Type)) { + Diags->Report(diag::err_no_such_umbrella_header_file) + << HeaderPath << (unsigned)Type; + return false; + } + } else if (!FrameworkName.empty() && (Type != HeaderType::Project)) { + auto UmbrellaName = "/" + Regex::escape(FrameworkName); + if (Type == HeaderType::Public) + UmbrellaName += "\\.h"; + else + UmbrellaName += "[_]?Private\\.h"; + Regex UmbrellaRegex(UmbrellaName); + MarkandMoveUmbrellaInHeaders(UmbrellaRegex, Type); + } + return true; + }; + if (!FindUmbrellaHeader(DriverOpts.PublicUmbrellaHeader, + HeaderType::Public) || + !FindUmbrellaHeader(DriverOpts.PrivateUmbrellaHeader, + HeaderType::Private) || + !FindUmbrellaHeader(DriverOpts.ProjectUmbrellaHeader, + HeaderType::Project)) + return Ctx; + // Parse binary dylib and initialize verifier. if (DriverOpts.DylibToVerify.empty()) { Ctx.Verifier = std::make_unique<DylibVerifier>(); diff --git a/clang/tools/clang-installapi/Options.h b/clang/tools/clang-installapi/Options.h index c18309f..3671e4c 100644 --- a/clang/tools/clang-installapi/Options.h +++ b/clang/tools/clang-installapi/Options.h @@ -31,6 +31,15 @@ struct DriverOptions { /// \brief Path to input file lists (JSON). llvm::MachO::PathSeq FileLists; + /// \brief Path to public umbrella header. + std::string PublicUmbrellaHeader; + + /// \brief Path to private umbrella header. + std::string PrivateUmbrellaHeader; + + /// \brief Path to project umbrella header. + std::string ProjectUmbrellaHeader; + /// \brief Paths of extra public headers. PathSeq ExtraPublicHeaders; diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp index 292d524..991767d 100644 --- a/clang/tools/libclang/CXType.cpp +++ b/clang/tools/libclang/CXType.cpp @@ -680,6 +680,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) { TCALLINGCONV(PreserveAll); TCALLINGCONV(M68kRTD); TCALLINGCONV(PreserveNone); + TCALLINGCONV(RISCVVectorCall); case CC_SpirFunction: return CXCallingConv_Unexposed; case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed; case CC_OpenCLKernel: return CXCallingConv_Unexposed; diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp index 8513174..5e41ef9 100644 --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -334,10 +334,6 @@ void RVVEmitter::createHeader(raw_ostream &OS) { OS << "#include <stdint.h>\n"; OS << "#include <stddef.h>\n\n"; - OS << "#ifndef __riscv_vector\n"; - OS << "#error \"Vector intrinsics require the vector extension.\"\n"; - OS << "#endif\n\n"; - OS << "#ifdef __cplusplus\n"; OS << "extern \"C\" {\n"; OS << "#endif\n\n"; diff --git a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp index e068c48..3e41f67 100644 --- a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp @@ -141,7 +141,7 @@ TEST(ScudoStringsTest, CapacityIncreaseFails) { // Test requires that the default length is at least 6 characters. scudo::uptr MaxSize = Str.capacity(); - EXPECT_LE(6, MaxSize); + EXPECT_LE(6u, MaxSize); for (size_t i = 0; i < MaxSize - 5; i++) { Str.append("B"); diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index c0c603f..d31d6a5 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -51,7 +51,7 @@ public: Fortran::semantics::SemanticsContext &semaCtx, const Fortran::parser::OmpClauseList &clauses) : converter(converter), semaCtx(semaCtx), - clauses(makeList(clauses, semaCtx)) {} + clauses(makeClauses(clauses, semaCtx)) {} // 'Unique' clauses: They can appear at most once in the clause list. bool processCollapse( diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index f48e84f..853dcd7 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -347,7 +347,7 @@ Aligned make(const parser::OmpClause::Aligned &inp, return Aligned{{ /*Alignment=*/maybeApply(makeExprFn(semaCtx), t1), - /*List=*/makeList(t0, semaCtx), + /*List=*/makeObjects(t0, semaCtx), }}; } @@ -362,7 +362,7 @@ Allocate make(const parser::OmpClause::Allocate &inp, return Allocate{{/*AllocatorSimpleModifier=*/std::nullopt, /*AllocatorComplexModifier=*/std::nullopt, /*AlignModifier=*/std::nullopt, - /*List=*/makeList(t1, semaCtx)}}; + /*List=*/makeObjects(t1, semaCtx)}}; } using Tuple = decltype(Allocate::t); @@ -374,7 +374,7 @@ Allocate make(const parser::OmpClause::Allocate &inp, return {/*AllocatorSimpleModifier=*/makeExpr(v.v, semaCtx), /*AllocatorComplexModifier=*/std::nullopt, /*AlignModifier=*/std::nullopt, - /*List=*/makeList(t1, semaCtx)}; + /*List=*/makeObjects(t1, semaCtx)}; }, // complex-modifier + align-modifier [&](const wrapped::AllocateModifier::ComplexModifier &v) -> Tuple { @@ -384,14 +384,14 @@ Allocate make(const parser::OmpClause::Allocate &inp, /*AllocatorSimpleModifier=*/std::nullopt, /*AllocatorComplexModifier=*/Allocator{makeExpr(s0.v, semaCtx)}, /*AlignModifier=*/Align{makeExpr(s1.v, semaCtx)}, - /*List=*/makeList(t1, semaCtx)}; + /*List=*/makeObjects(t1, semaCtx)}; }, // align-modifier [&](const wrapped::AllocateModifier::Align &v) -> Tuple { return {/*AllocatorSimpleModifier=*/std::nullopt, /*AllocatorComplexModifier=*/std::nullopt, /*AlignModifier=*/Align{makeExpr(v.v, semaCtx)}, - /*List=*/makeList(t1, semaCtx)}; + /*List=*/makeObjects(t1, semaCtx)}; }, }, t0->u)}; @@ -450,13 +450,13 @@ Collapse make(const parser::OmpClause::Collapse &inp, Copyin make(const parser::OmpClause::Copyin &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return Copyin{/*List=*/makeList(inp.v, semaCtx)}; + return Copyin{/*List=*/makeObjects(inp.v, semaCtx)}; } Copyprivate make(const parser::OmpClause::Copyprivate &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return Copyprivate{/*List=*/makeList(inp.v, semaCtx)}; + return Copyprivate{/*List=*/makeObjects(inp.v, semaCtx)}; } Default make(const parser::OmpClause::Default &inp, @@ -641,7 +641,7 @@ Doacross make(const parser::OmpClause::Doacross &inp, Enter make(const parser::OmpClause::Enter &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return Enter{makeList(/*List=*/inp.v, semaCtx)}; + return Enter{makeObjects(/*List=*/inp.v, semaCtx)}; } Exclusive make(const parser::OmpClause::Exclusive &inp, @@ -671,7 +671,7 @@ Final make(const parser::OmpClause::Final &inp, Firstprivate make(const parser::OmpClause::Firstprivate &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return Firstprivate{/*List=*/makeList(inp.v, semaCtx)}; + return Firstprivate{/*List=*/makeObjects(inp.v, semaCtx)}; } // Flush: empty @@ -681,7 +681,7 @@ From make(const parser::OmpClause::From &inp, // inp.v -> parser::OmpObjectList return From{{/*Expectation=*/std::nullopt, /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt, - /*LocatorList=*/makeList(inp.v, semaCtx)}}; + /*LocatorList=*/makeObjects(inp.v, semaCtx)}}; } // Full: empty @@ -696,7 +696,7 @@ Grainsize make(const parser::OmpClause::Grainsize &inp, HasDeviceAddr make(const parser::OmpClause::HasDeviceAddr &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return HasDeviceAddr{/*List=*/makeList(inp.v, semaCtx)}; + return HasDeviceAddr{/*List=*/makeObjects(inp.v, semaCtx)}; } Hint make(const parser::OmpClause::Hint &inp, @@ -762,20 +762,20 @@ InReduction make(const parser::OmpClause::InReduction &inp, auto &t1 = std::get<parser::OmpObjectList>(inp.v.t); return InReduction{ {/*ReductionIdentifiers=*/{makeReductionOperator(t0, semaCtx)}, - /*List=*/makeList(t1, semaCtx)}}; + /*List=*/makeObjects(t1, semaCtx)}}; } IsDevicePtr make(const parser::OmpClause::IsDevicePtr &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return IsDevicePtr{/*List=*/makeList(inp.v, semaCtx)}; + return IsDevicePtr{/*List=*/makeObjects(inp.v, semaCtx)}; } Lastprivate make(const parser::OmpClause::Lastprivate &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList return Lastprivate{{/*LastprivateModifier=*/std::nullopt, - /*List=*/makeList(inp.v, semaCtx)}}; + /*List=*/makeObjects(inp.v, semaCtx)}}; } Linear make(const parser::OmpClause::Linear &inp, @@ -817,7 +817,7 @@ Linear make(const parser::OmpClause::Linear &inp, Link make(const parser::OmpClause::Link &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return Link{/*List=*/makeList(inp.v, semaCtx)}; + return Link{/*List=*/makeObjects(inp.v, semaCtx)}; } Map make(const parser::OmpClause::Map &inp, @@ -844,7 +844,7 @@ Map make(const parser::OmpClause::Map &inp, if (!t0) { return Map{{/*MapType=*/std::nullopt, /*MapTypeModifiers=*/std::nullopt, /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt, - /*LocatorList=*/makeList(t1, semaCtx)}}; + /*LocatorList=*/makeObjects(t1, semaCtx)}}; } auto &s0 = std::get<std::optional<parser::OmpMapType::Always>>(t0->t); @@ -857,7 +857,7 @@ Map make(const parser::OmpClause::Map &inp, return Map{{/*MapType=*/convert1(s1), /*MapTypeModifiers=*/maybeList, /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt, - /*LocatorList=*/makeList(t1, semaCtx)}}; + /*LocatorList=*/makeObjects(t1, semaCtx)}}; } // Match: incomplete @@ -980,7 +980,7 @@ Priority make(const parser::OmpClause::Priority &inp, Private make(const parser::OmpClause::Private &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return Private{/*List=*/makeList(inp.v, semaCtx)}; + return Private{/*List=*/makeObjects(inp.v, semaCtx)}; } ProcBind make(const parser::OmpClause::ProcBind &inp, @@ -1010,7 +1010,7 @@ Reduction make(const parser::OmpClause::Reduction &inp, return Reduction{ {/*ReductionIdentifiers=*/{makeReductionOperator(t0, semaCtx)}, /*ReductionModifier=*/std::nullopt, - /*List=*/makeList(t1, semaCtx)}}; + /*List=*/makeObjects(t1, semaCtx)}}; } // Relaxed: empty @@ -1104,7 +1104,7 @@ Severity make(const parser::OmpClause::Severity &inp, Shared make(const parser::OmpClause::Shared &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return Shared{/*List=*/makeList(inp.v, semaCtx)}; + return Shared{/*List=*/makeObjects(inp.v, semaCtx)}; } // Simd: empty @@ -1128,7 +1128,7 @@ TaskReduction make(const parser::OmpClause::TaskReduction &inp, auto &t1 = std::get<parser::OmpObjectList>(inp.v.t); return TaskReduction{ {/*ReductionIdentifiers=*/{makeReductionOperator(t0, semaCtx)}, - /*List=*/makeList(t1, semaCtx)}}; + /*List=*/makeObjects(t1, semaCtx)}}; } ThreadLimit make(const parser::OmpClause::ThreadLimit &inp, @@ -1145,7 +1145,7 @@ To make(const parser::OmpClause::To &inp, // inp.v -> parser::OmpObjectList return To{{/*Expectation=*/std::nullopt, /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt, - /*LocatorList=*/makeList(inp.v, semaCtx)}}; + /*LocatorList=*/makeObjects(inp.v, semaCtx)}}; } // UnifiedAddress: empty @@ -1175,13 +1175,13 @@ Use make(const parser::OmpClause::Use &inp, UseDeviceAddr make(const parser::OmpClause::UseDeviceAddr &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return UseDeviceAddr{/*List=*/makeList(inp.v, semaCtx)}; + return UseDeviceAddr{/*List=*/makeObjects(inp.v, semaCtx)}; } UseDevicePtr make(const parser::OmpClause::UseDevicePtr &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpObjectList - return UseDevicePtr{/*List=*/makeList(inp.v, semaCtx)}; + return UseDevicePtr{/*List=*/makeObjects(inp.v, semaCtx)}; } UsesAllocators make(const parser::OmpClause::UsesAllocators &inp, @@ -1205,8 +1205,8 @@ Clause makeClause(const Fortran::parser::OmpClause &cls, cls.u); } -List<Clause> makeList(const parser::OmpClauseList &clauses, - semantics::SemanticsContext &semaCtx) { +List<Clause> makeClauses(const parser::OmpClauseList &clauses, + semantics::SemanticsContext &semaCtx) { return makeList(clauses.v, [&](const parser::OmpClause &s) { return makeClause(s, semaCtx); }); diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h index af13182..3e77642 100644 --- a/flang/lib/Lower/OpenMP/Clauses.h +++ b/flang/lib/Lower/OpenMP/Clauses.h @@ -88,8 +88,8 @@ List<ResultTy> makeList(ContainerTy &&container, FunctionTy &&func) { return v; } -inline ObjectList makeList(const parser::OmpObjectList &objects, - semantics::SemanticsContext &semaCtx) { +inline ObjectList makeObjects(const parser::OmpObjectList &objects, + semantics::SemanticsContext &semaCtx) { return makeList(objects.v, makeObjectFn(semaCtx)); } @@ -256,8 +256,8 @@ Clause makeClause(llvm::omp::Clause id, Specific &&specific, Clause makeClause(const Fortran::parser::OmpClause &cls, semantics::SemanticsContext &semaCtx); -List<Clause> makeList(const parser::OmpClauseList &clauses, - semantics::SemanticsContext &semaCtx); +List<Clause> makeClauses(const parser::OmpClauseList &clauses, + semantics::SemanticsContext &semaCtx); } // namespace Fortran::lower::omp #endif // FORTRAN_LOWER_OPENMP_CLAUSES_H diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index 226abe9..1cbc825 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -89,7 +89,7 @@ public: Fortran::lower::SymMap *symTable = nullptr) : hasLastPrivateOp(false), converter(converter), firOpBuilder(converter.getFirOpBuilder()), - clauses(omp::makeList(opClauseList, semaCtx)), eval(eval), + clauses(omp::makeClauses(opClauseList, semaCtx)), eval(eval), useDelayedPrivatization(useDelayedPrivatization), symTable(symTable) {} // Privatisation is split into two steps. diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 0cf2a8f9..5defffd 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1254,7 +1254,7 @@ static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo( if (const auto *objectList{ Fortran::parser::Unwrap<Fortran::parser::OmpObjectList>(spec.u)}) { - ObjectList objects{makeList(*objectList, semaCtx)}; + ObjectList objects{makeObjects(*objectList, semaCtx)}; // Case: declare target(func, var1, var2) gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to, symbolAndClause); @@ -2352,7 +2352,7 @@ void Fortran::lower::genOpenMPReduction( const Fortran::parser::OmpClauseList &clauseList) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - List<Clause> clauses{makeList(clauseList, semaCtx)}; + List<Clause> clauses{makeClauses(clauseList, semaCtx)}; for (const Clause &clause : clauses) { if (const auto &reductionClause = diff --git a/libc/docs/dev/code_style.rst b/libc/docs/dev/code_style.rst index e6fc6df..22a18b7 100644 --- a/libc/docs/dev/code_style.rst +++ b/libc/docs/dev/code_style.rst @@ -55,7 +55,7 @@ We define two kinds of macros: * ``src/__support/macros/config.h`` - Important compiler and platform features. Such macros can be used to produce portable code by parameterizing compilation based on the presence or lack of a given - feature. e.g., ``LIBC_HAS_BUILTIN`` + feature. e.g., ``LIBC_HAS_FEATURE`` * ``src/__support/macros/attributes.h`` - Attributes for functions, types, and variables. e.g., ``LIBC_UNUSED`` * ``src/__support/macros/optimization.h`` - Portable macros for performance diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt index f76285b..84d01fe 100644 --- a/libc/src/__support/CPP/CMakeLists.txt +++ b/libc/src/__support/CPP/CMakeLists.txt @@ -18,7 +18,6 @@ add_header_library( .limits .type_traits libc.src.__support.macros.attributes - libc.src.__support.macros.config libc.src.__support.macros.sanitizer ) @@ -157,7 +156,6 @@ add_header_library( DEPENDS libc.include.llvm-libc-macros.stdfix_macros libc.src.__support.macros.attributes - libc.src.__support.macros.config libc.src.__support.macros.properties.types ) diff --git a/libc/src/__support/CPP/atomic.h b/libc/src/__support/CPP/atomic.h index b74cb598..5e42894 100644 --- a/libc/src/__support/CPP/atomic.h +++ b/libc/src/__support/CPP/atomic.h @@ -71,10 +71,11 @@ public: T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST, [[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) { - if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_load_n)) - return __scoped_atomic_load_n(&val, int(mem_ord), (int)(mem_scope)); - else - return __atomic_load_n(&val, int(mem_ord)); +#if __has_builtin(__scoped_atomic_load_n) + return __scoped_atomic_load_n(&val, int(mem_ord), (int)(mem_scope)); +#else + return __atomic_load_n(&val, int(mem_ord)); +#endif } // Atomic store. @@ -85,10 +86,11 @@ public: void store(T rhs, MemoryOrder mem_ord = MemoryOrder::SEQ_CST, [[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) { - if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_store_n)) - __scoped_atomic_store_n(&val, rhs, int(mem_ord), (int)(mem_scope)); - else - __atomic_store_n(&val, rhs, int(mem_ord)); +#if __has_builtin(__scoped_atomic_store_n) + __scoped_atomic_store_n(&val, rhs, int(mem_ord), (int)(mem_scope)); +#else + __atomic_store_n(&val, rhs, int(mem_ord)); +#endif } // Atomic compare exchange @@ -101,47 +103,51 @@ public: T exchange(T desired, MemoryOrder mem_ord = MemoryOrder::SEQ_CST, [[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) { - if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_exchange_n)) - return __scoped_atomic_exchange_n(&val, desired, int(mem_ord), - (int)(mem_scope)); - else - return __atomic_exchange_n(&val, desired, int(mem_ord)); +#if __has_builtin(__scoped_atomic_exchange_n) + return __scoped_atomic_exchange_n(&val, desired, int(mem_ord), + (int)(mem_scope)); +#else + return __atomic_exchange_n(&val, desired, int(mem_ord)); +#endif } T fetch_add(T increment, MemoryOrder mem_ord = MemoryOrder::SEQ_CST, [[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) { - if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_add)) - return __scoped_atomic_fetch_add(&val, increment, int(mem_ord), - (int)(mem_scope)); - else - return __atomic_fetch_add(&val, increment, int(mem_ord)); +#if __has_builtin(__scoped_atomic_fetch_add) + return __scoped_atomic_fetch_add(&val, increment, int(mem_ord), + (int)(mem_scope)); +#else + return __atomic_fetch_add(&val, increment, int(mem_ord)); +#endif } T fetch_or(T mask, MemoryOrder mem_ord = MemoryOrder::SEQ_CST, [[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) { - if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_or)) - return __scoped_atomic_fetch_or(&val, mask, int(mem_ord), - (int)(mem_scope)); - else - return __atomic_fetch_or(&val, mask, int(mem_ord)); +#if __has_builtin(__scoped_atomic_fetch_or) + return __scoped_atomic_fetch_or(&val, mask, int(mem_ord), (int)(mem_scope)); +#else + return __atomic_fetch_or(&val, mask, int(mem_ord)); +#endif } T fetch_and(T mask, MemoryOrder mem_ord = MemoryOrder::SEQ_CST, [[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) { - if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_and)) - return __scoped_atomic_fetch_and(&val, mask, int(mem_ord), - (int)(mem_scope)); - else - return __atomic_fetch_and(&val, mask, int(mem_ord)); +#if __has_builtin(__scoped_atomic_fetch_and) + return __scoped_atomic_fetch_and(&val, mask, int(mem_ord), + (int)(mem_scope)); +#else + return __atomic_fetch_and(&val, mask, int(mem_ord)); +#endif } T fetch_sub(T decrement, MemoryOrder mem_ord = MemoryOrder::SEQ_CST, [[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) { - if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_sub)) - return __scoped_atomic_fetch_sub(&val, decrement, int(mem_ord), - (int)(mem_scope)); - else - return __atomic_fetch_sub(&val, decrement, int(mem_ord)); +#if __has_builtin(__scoped_atomic_fetch_sub) + return __scoped_atomic_fetch_sub(&val, decrement, int(mem_ord), + (int)(mem_scope)); +#else + return __atomic_fetch_sub(&val, decrement, int(mem_ord)); +#endif } // Set the value without using an atomic operation. This is useful @@ -166,7 +172,7 @@ LIBC_INLINE void atomic_thread_fence([[maybe_unused]] MemoryOrder mem_ord) { // except no instructions for memory ordering are issued. Only reordering of // the instructions by the compiler is suppressed as order instructs. LIBC_INLINE void atomic_signal_fence([[maybe_unused]] MemoryOrder mem_ord) { -#if LIBC_HAS_BUILTIN(__atomic_signal_fence) +#if __has_builtin(__atomic_signal_fence) __atomic_signal_fence(static_cast<int>(mem_ord)); #else // if the builtin is not ready, use asm as a full compiler barrier. diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 3f2fbec..80f50fd 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -14,14 +14,13 @@ #include "src/__support/CPP/limits.h" // numeric_limits #include "src/__support/CPP/type_traits.h" #include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN #include "src/__support/macros/sanitizer.h" #include <stdint.h> namespace LIBC_NAMESPACE::cpp { -#if LIBC_HAS_BUILTIN(__builtin_memcpy_inline) +#if __has_builtin(__builtin_memcpy_inline) #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE #endif @@ -36,20 +35,20 @@ LIBC_INLINE constexpr cpp::enable_if_t< To> bit_cast(const From &from) { MSAN_UNPOISON(&from, sizeof(From)); -#if LIBC_HAS_BUILTIN(__builtin_bit_cast) +#if __has_builtin(__builtin_bit_cast) return __builtin_bit_cast(To, from); #else To to; char *dst = reinterpret_cast<char *>(&to); const char *src = reinterpret_cast<const char *>(&from); -#if LIBC_HAS_BUILTIN(__builtin_memcpy_inline) +#if __has_builtin(__builtin_memcpy_inline) __builtin_memcpy_inline(dst, src, sizeof(To)); #else for (unsigned i = 0; i < sizeof(To); ++i) dst[i] = src[i]; -#endif // LIBC_HAS_BUILTIN(__builtin_memcpy_inline) +#endif // __has_builtin(__builtin_memcpy_inline) return to; -#endif // LIBC_HAS_BUILTIN(__builtin_bit_cast) +#endif // __has_builtin(__builtin_bit_cast) } template <typename T> @@ -94,7 +93,7 @@ countr_zero(T value) { } return zero_bits; } -#if LIBC_HAS_BUILTIN(__builtin_ctzs) +#if __has_builtin(__builtin_ctzs) ADD_SPECIALIZATION(countr_zero, unsigned short, __builtin_ctzs) #endif ADD_SPECIALIZATION(countr_zero, unsigned int, __builtin_ctz) @@ -124,7 +123,7 @@ countl_zero(T value) { } return zero_bits; } -#if LIBC_HAS_BUILTIN(__builtin_clzs) +#if __has_builtin(__builtin_clzs) ADD_SPECIALIZATION(countl_zero, unsigned short, __builtin_clzs) #endif ADD_SPECIALIZATION(countl_zero, unsigned int, __builtin_clz) @@ -242,6 +241,14 @@ LIBC_INLINE constexpr To bit_or_static_cast(const From &from) { /// Count number of 1's aka population count or Hamming weight. /// /// Only unsigned integral types are allowed. +// clang-19+, gcc-14+ +#if __has_builtin(__builtin_popcountg) +template <typename T> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int> +popcount(T value) { + return __builtin_popcountg(value); +} +#else // !__has_builtin(__builtin_popcountg) template <typename T> [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int> popcount(T value) { @@ -261,7 +268,7 @@ ADD_SPECIALIZATION(unsigned short, __builtin_popcount) ADD_SPECIALIZATION(unsigned, __builtin_popcount) ADD_SPECIALIZATION(unsigned long, __builtin_popcountl) ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll) -// TODO: 128b specializations? +#endif // __builtin_popcountg #undef ADD_SPECIALIZATION } // namespace LIBC_NAMESPACE::cpp diff --git a/libc/src/__support/CPP/type_traits/add_pointer.h b/libc/src/__support/CPP/type_traits/add_pointer.h index 72a764b..1257033 100644 --- a/libc/src/__support/CPP/type_traits/add_pointer.h +++ b/libc/src/__support/CPP/type_traits/add_pointer.h @@ -10,7 +10,6 @@ #include "src/__support/CPP/type_traits/remove_reference.h" #include "src/__support/CPP/type_traits/type_identity.h" -#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE::cpp { diff --git a/libc/src/__support/CPP/type_traits/decay.h b/libc/src/__support/CPP/type_traits/decay.h index a018286..f1a1200 100644 --- a/libc/src/__support/CPP/type_traits/decay.h +++ b/libc/src/__support/CPP/type_traits/decay.h @@ -9,7 +9,6 @@ #define LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_DECAY_H #include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" #include "src/__support/CPP/type_traits/add_pointer.h" #include "src/__support/CPP/type_traits/conditional.h" diff --git a/libc/src/__support/CPP/type_traits/is_destructible.h b/libc/src/__support/CPP/type_traits/is_destructible.h index d47de1c..f94fe30 100644 --- a/libc/src/__support/CPP/type_traits/is_destructible.h +++ b/libc/src/__support/CPP/type_traits/is_destructible.h @@ -16,12 +16,11 @@ #include "src/__support/CPP/type_traits/true_type.h" #include "src/__support/CPP/type_traits/type_identity.h" #include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE::cpp { // is_destructible -#if LIBC_HAS_BUILTIN(__is_destructible) +#if __has_builtin(__is_destructible) template <typename T> struct is_destructible : bool_constant<__is_destructible(T)> {}; #else diff --git a/libc/src/__support/CPP/type_traits/is_function.h b/libc/src/__support/CPP/type_traits/is_function.h index 557b322..0eba586 100644 --- a/libc/src/__support/CPP/type_traits/is_function.h +++ b/libc/src/__support/CPP/type_traits/is_function.h @@ -12,12 +12,11 @@ #include "src/__support/CPP/type_traits/is_const.h" #include "src/__support/CPP/type_traits/is_reference.h" #include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE::cpp { // is_function -#if LIBC_HAS_BUILTIN(__is_function) +#if __has_builtin(__is_function) template <typename T> struct is_function : integral_constant<bool, __is_function(T)> {}; #else diff --git a/libc/src/__support/CPP/type_traits/is_lvalue_reference.h b/libc/src/__support/CPP/type_traits/is_lvalue_reference.h index f52e303..1dff57f 100644 --- a/libc/src/__support/CPP/type_traits/is_lvalue_reference.h +++ b/libc/src/__support/CPP/type_traits/is_lvalue_reference.h @@ -12,12 +12,11 @@ #include "src/__support/CPP/type_traits/false_type.h" #include "src/__support/CPP/type_traits/true_type.h" #include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE::cpp { // is_lvalue_reference -#if LIBC_HAS_BUILTIN(__is_lvalue_reference) +#if __has_builtin(__is_lvalue_reference) template <typename T> struct is_lvalue_reference : bool_constant<__is_lvalue_reference(T)> {}; #else diff --git a/libc/src/__support/CPP/type_traits/is_reference.h b/libc/src/__support/CPP/type_traits/is_reference.h index c017028..bbfb2b7 100644 --- a/libc/src/__support/CPP/type_traits/is_reference.h +++ b/libc/src/__support/CPP/type_traits/is_reference.h @@ -12,12 +12,11 @@ #include "src/__support/CPP/type_traits/false_type.h" #include "src/__support/CPP/type_traits/true_type.h" #include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE::cpp { // is_reference -#if LIBC_HAS_BUILTIN(__is_reference) +#if __has_builtin(__is_reference) template <typename T> struct is_reference : bool_constant<__is_reference(T)> {}; #else template <typename T> struct is_reference : public false_type {}; diff --git a/libc/src/__support/CPP/type_traits/is_rvalue_reference.h b/libc/src/__support/CPP/type_traits/is_rvalue_reference.h index f0487e4..3efbbe6 100644 --- a/libc/src/__support/CPP/type_traits/is_rvalue_reference.h +++ b/libc/src/__support/CPP/type_traits/is_rvalue_reference.h @@ -12,12 +12,11 @@ #include "src/__support/CPP/type_traits/false_type.h" #include "src/__support/CPP/type_traits/true_type.h" #include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE::cpp { // is_rvalue_reference -#if LIBC_HAS_BUILTIN(__is_rvalue_reference) +#if __has_builtin(__is_rvalue_reference) template <typename T> struct is_rvalue_reference : bool_constant<__is_rvalue_reference(T)> {}; #else diff --git a/libc/src/__support/CPP/type_traits/is_trivially_copyable.h b/libc/src/__support/CPP/type_traits/is_trivially_copyable.h index 0c3fdcc..b4c825d 100644 --- a/libc/src/__support/CPP/type_traits/is_trivially_copyable.h +++ b/libc/src/__support/CPP/type_traits/is_trivially_copyable.h @@ -9,7 +9,6 @@ #define LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H #include "src/__support/CPP/type_traits/integral_constant.h" -#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE::cpp { diff --git a/libc/src/__support/CPP/type_traits/is_trivially_destructible.h b/libc/src/__support/CPP/type_traits/is_trivially_destructible.h index 3345149..37e0e86 100644 --- a/libc/src/__support/CPP/type_traits/is_trivially_destructible.h +++ b/libc/src/__support/CPP/type_traits/is_trivially_destructible.h @@ -11,12 +11,11 @@ #include "src/__support/CPP/type_traits/bool_constant.h" #include "src/__support/CPP/type_traits/is_destructible.h" #include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE::cpp { // is_trivially_destructible -#if LIBC_HAS_BUILTIN(__is_trivially_destructible) +#if __has_builtin(__is_trivially_destructible) template <typename T> struct is_trivially_destructible : public bool_constant<__is_trivially_destructible(T)> {}; @@ -25,7 +24,7 @@ template <typename T> struct is_trivially_destructible : public bool_constant<cpp::is_destructible_v<T> &&__has_trivial_destructor( T)> {}; -#endif // LIBC_HAS_BUILTIN(__is_trivially_destructible) +#endif // __has_builtin(__is_trivially_destructible) template <typename T> LIBC_INLINE_VAR constexpr bool is_trivially_destructible_v = is_trivially_destructible<T>::value; diff --git a/libc/src/__support/CPP/type_traits/remove_all_extents.h b/libc/src/__support/CPP/type_traits/remove_all_extents.h index bff6341..5941b82 100644 --- a/libc/src/__support/CPP/type_traits/remove_all_extents.h +++ b/libc/src/__support/CPP/type_traits/remove_all_extents.h @@ -9,14 +9,13 @@ #define LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_REMOVE_ALL_EXTENTS_H #include "src/__support/CPP/type_traits/type_identity.h" -#include "src/__support/macros/config.h" #include <stddef.h> // size_t namespace LIBC_NAMESPACE::cpp { // remove_all_extents -#if LIBC_HAS_BUILTIN(__remove_all_extents) +#if __has_builtin(__remove_all_extents) template <typename T> using remove_all_extents_t = __remove_all_extents(T); template <typename T> struct remove_all_extents : cpp::type_identity<remove_all_extents_t<T>> {}; diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt index 0f43502..ff155a1 100644 --- a/libc/src/__support/FPUtil/CMakeLists.txt +++ b/libc/src/__support/FPUtil/CMakeLists.txt @@ -6,7 +6,6 @@ add_header_library( libc.include.fenv libc.include.math libc.src.__support.macros.attributes - libc.src.__support.macros.config libc.src.errno.errno ) diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h index a6a533d..6086d5d 100644 --- a/libc/src/__support/FPUtil/FEnvImpl.h +++ b/libc/src/__support/FPUtil/FEnvImpl.h @@ -11,7 +11,6 @@ #include "include/llvm-libc-macros/math-macros.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN #include "src/__support/macros/properties/architectures.h" #include "src/errno/libc_errno.h" #include <fenv.h> diff --git a/libc/src/__support/FPUtil/gpu/FMA.h b/libc/src/__support/FPUtil/gpu/FMA.h index 86bc860..ef1cd26 100644 --- a/libc/src/__support/FPUtil/gpu/FMA.h +++ b/libc/src/__support/FPUtil/gpu/FMA.h @@ -10,12 +10,12 @@ #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GPU_FMA_H #include "src/__support/CPP/type_traits.h" -#include "src/__support/macros/config.h" -// These intrinsics map to the FMA instrunctions in the target ISA for the GPU. +// These intrinsics map to the FMA instructions in the target ISA for the GPU. // The default rounding mode generated from these will be to the nearest even. -static_assert(LIBC_HAS_BUILTIN(__builtin_fma), "FMA builtins must be defined"); -static_assert(LIBC_HAS_BUILTIN(__builtin_fmaf), "FMA builtins must be defined"); +#if !__has_builtin(__builtin_fma) || !__has_builtin(__builtin_fmaf) +#error "FMA builtins must be defined"); +#endif namespace LIBC_NAMESPACE { namespace fputil { diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index df01e08..282efdb 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -1082,6 +1082,17 @@ bit_cast(const UInt<Bits> &from) { return cpp::bit_cast<To>(from.val); } +// Specialization of cpp::popcount ('bit.h') for BigInt. +template <typename T> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int> +popcount(T value) { + int bits = 0; + for (auto word : value.val) + if (word) + bits += popcount(word); + return bits; +} + // Specialization of cpp::has_single_bit ('bit.h') for BigInt. template <typename T> [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, bool> @@ -1218,6 +1229,49 @@ LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, T> mask_leading_ones() { return out; } +// Specialization of count_zeros ('math_extras.h') for BigInt. +template <typename T> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int> +count_zeros(T value) { + return cpp::popcount(~value); +} + +// Specialization of first_leading_zero ('math_extras.h') for BigInt. +template <typename T> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int> +first_leading_zero(T value) { + return value == cpp::numeric_limits<T>::max() ? 0 + : cpp::countl_one(value) + 1; +} + +// Specialization of first_leading_one ('math_extras.h') for BigInt. +template <typename T> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int> +first_leading_one(T value) { + return first_leading_zero(~value); +} + +// Specialization of first_trailing_zero ('math_extras.h') for BigInt. +template <typename T> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int> +first_trailing_zero(T value) { + return value == cpp::numeric_limits<T>::max() ? 0 + : cpp::countr_zero(~value) + 1; +} + +// Specialization of first_trailing_one ('math_extras.h') for BigInt. +template <typename T> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int> +first_trailing_one(T value) { + return value == cpp::numeric_limits<T>::max() ? 0 + : cpp::countr_zero(value) + 1; +} + } // namespace LIBC_NAMESPACE #endif // LLVM_LIBC_SRC___SUPPORT_UINT_H diff --git a/libc/src/__support/macros/config.h b/libc/src/__support/macros/config.h index 6666c13..3f200f0 100644 --- a/libc/src/__support/macros/config.h +++ b/libc/src/__support/macros/config.h @@ -13,24 +13,6 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_MACROS_CONFIG_H #define LLVM_LIBC_SRC___SUPPORT_MACROS_CONFIG_H -// LIBC_HAS_BUILTIN() -// -// Checks whether the compiler supports a Clang Feature Checking Macro, and if -// so, checks whether it supports the provided builtin function "x" where x -// is one of the functions noted in -// https://clang.llvm.org/docs/LanguageExtensions.html -// -// Note: Use this macro to avoid an extra level of #ifdef __has_builtin check. -// http://releases.llvm.org/3.3/tools/clang/docs/LanguageExtensions.html - -// Compiler builtin-detection. -// clang.llvm.org/docs/LanguageExtensions.html#has-builtin -#ifdef __has_builtin -#define LIBC_HAS_BUILTIN(x) __has_builtin(x) -#else -#define LIBC_HAS_BUILTIN(x) 0 -#endif - // Compiler feature-detection. // clang.llvm.org/docs/LanguageExtensions.html#has-feature-and-has-extension #ifdef __has_feature diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h index ae97efc..59886ca 100644 --- a/libc/src/__support/macros/optimization.h +++ b/libc/src/__support/macros/optimization.h @@ -11,7 +11,6 @@ #define LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H #include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN #include "src/__support/macros/properties/compiler.h" // LIBC_COMPILER_IS_CLANG // We use a template to implement likely/unlikely to make sure that we don't diff --git a/libc/src/__support/macros/sanitizer.h b/libc/src/__support/macros/sanitizer.h index fc66c20..bd9b62b 100644 --- a/libc/src/__support/macros/sanitizer.h +++ b/libc/src/__support/macros/sanitizer.h @@ -47,8 +47,7 @@ // Functions to unpoison memory //----------------------------------------------------------------------------- -#if defined(LIBC_HAVE_MEMORY_SANITIZER) && \ - LIBC_HAS_BUILTIN(__builtin_constant_p) +#if defined(LIBC_HAVE_MEMORY_SANITIZER) && __has_builtin(__builtin_constant_p) // Only perform MSAN unpoison in non-constexpr context. #include <sanitizer/msan_interface.h> #define MSAN_UNPOISON(addr, size) \ diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index 28ee1be..70a8800 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -14,7 +14,6 @@ #include "src/__support/CPP/limits.h" // CHAR_BIT, numeric_limits #include "src/__support/CPP/type_traits.h" // is_unsigned_v #include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN namespace LIBC_NAMESPACE { @@ -61,7 +60,7 @@ add_with_carry(T a, T b, T carry_in) { return add_with_carry_const<T>(a, b, carry_in); } -#if LIBC_HAS_BUILTIN(__builtin_addc) +#if __has_builtin(__builtin_addc) // https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins template <> @@ -129,7 +128,7 @@ add_with_carry<unsigned long long>(unsigned long long a, unsigned long long b, } } -#endif // LIBC_HAS_BUILTIN(__builtin_addc) +#endif // __has_builtin(__builtin_addc) // Subtract with borrow template <typename T> struct DiffBorrow { @@ -157,7 +156,7 @@ sub_with_borrow(T a, T b, T borrow_in) { return sub_with_borrow_const<T>(a, b, borrow_in); } -#if LIBC_HAS_BUILTIN(__builtin_subc) +#if __has_builtin(__builtin_subc) // https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins template <> @@ -225,7 +224,7 @@ sub_with_borrow<unsigned long long>(unsigned long long a, unsigned long long b, } } -#endif // LIBC_HAS_BUILTIN(__builtin_subc) +#endif // __has_builtin(__builtin_subc) template <typename T> [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int> diff --git a/libc/src/__support/memory_size.h b/libc/src/__support/memory_size.h index 7bd16a1..491123b 100644 --- a/libc/src/__support/memory_size.h +++ b/libc/src/__support/memory_size.h @@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE { namespace internal { template <class T> LIBC_INLINE bool mul_overflow(T a, T b, T *res) { -#if LIBC_HAS_BUILTIN(__builtin_mul_overflow) +#if __has_builtin(__builtin_mul_overflow) return __builtin_mul_overflow(a, b, res); #else T max = cpp::numeric_limits<T>::max(); diff --git a/libc/src/string/memory_utils/generic/builtin.h b/libc/src/string/memory_utils/generic/builtin.h index 5239329..ba4f4b8 100644 --- a/libc/src/string/memory_utils/generic/builtin.h +++ b/libc/src/string/memory_utils/generic/builtin.h @@ -10,16 +10,16 @@ #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_BUILTIN_H #include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN #include "src/string/memory_utils/utils.h" // Ptr, CPtr #include <stddef.h> // size_t namespace LIBC_NAMESPACE { -static_assert(LIBC_HAS_BUILTIN(__builtin_memcpy), "Builtin not defined"); -static_assert(LIBC_HAS_BUILTIN(__builtin_memset), "Builtin not defined"); -static_assert(LIBC_HAS_BUILTIN(__builtin_memmove), "Builtin not defined"); +#if !__has_builtin(__builtin_memcpy) || !__has_builtin(__builtin_memset) || \ + !__has_builtin(__builtin_memmove) +#error "Builtin not defined"); +#endif [[maybe_unused]] LIBC_INLINE void inline_memcpy_builtin(Ptr dst, CPtr src, size_t count, size_t offset = 0) { diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h index 79526d1..b3e1a26 100644 --- a/libc/src/string/memory_utils/utils.h +++ b/libc/src/string/memory_utils/utils.h @@ -14,7 +14,6 @@ #include "src/__support/CPP/type_traits.h" #include "src/__support/endian.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN #include "src/__support/macros/properties/architectures.h" #include <stddef.h> // size_t @@ -71,11 +70,11 @@ LIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) { return sdiff >= 0 ? size <= udiff : size <= neg_udiff; } -#if LIBC_HAS_BUILTIN(__builtin_memcpy_inline) +#if __has_builtin(__builtin_memcpy_inline) #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE #endif -#if LIBC_HAS_BUILTIN(__builtin_memset_inline) +#if __has_builtin(__builtin_memset_inline) #define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE #endif diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index cee5b90..875b47e 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -15,13 +15,6 @@ namespace LIBC_NAMESPACE::cpp { -using UnsignedTypesNoBigInt = testing::TypeList< -#if defined(LIBC_TYPES_HAS_INT128) - __uint128_t, -#endif // LIBC_TYPES_HAS_INT128 - unsigned char, unsigned short, unsigned int, unsigned long, - unsigned long long>; - using UnsignedTypes = testing::TypeList< #if defined(LIBC_TYPES_HAS_INT128) __uint128_t, @@ -228,7 +221,7 @@ TEST(LlvmLibcBitTest, Rotr) { rotr<uint64_t>(0x12345678deadbeefULL, -19)); } -TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypesNoBigInt) { +TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypes) { EXPECT_EQ(popcount(T(0)), 0); for (int i = 0; i != cpp::numeric_limits<T>::digits; ++i) EXPECT_EQ(popcount<T>(cpp::numeric_limits<T>::max() >> i), diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp index e6422488..e88b3e1 100644 --- a/libc/test/src/__support/math_extras_test.cpp +++ b/libc/test/src/__support/math_extras_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/UInt128.h" // UInt128 +#include "src/__support/UInt128.h" // UInt<128> #include "src/__support/integer_literals.h" #include "src/__support/math_extras.h" #include "test/UnitTest/Test.h" @@ -19,7 +19,7 @@ using UnsignedTypesNoBigInt = testing::TypeList< __uint128_t, #endif // LIBC_TYPES_HAS_INT128 unsigned char, unsigned short, unsigned int, unsigned long, - unsigned long long>; + unsigned long long, UInt<128>>; TEST(LlvmLibcBlockMathExtrasTest, mask_trailing_ones) { EXPECT_EQ(0_u8, (mask_leading_ones<uint8_t, 0>())); diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 90af156..46ad98f 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -6,6 +6,11 @@ // //===----------------------------------------------------------------------===// +// Workaround for missing __has_builtin in < GCC 10. +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + #include "llvmlibc_rpc_server.h" #include "src/__support/RPC/rpc.h" diff --git a/libcxx/include/__algorithm/copy.h b/libcxx/include/__algorithm/copy.h index 4c38154..0890b89 100644 --- a/libcxx/include/__algorithm/copy.h +++ b/libcxx/include/__algorithm/copy.h @@ -32,7 +32,7 @@ template <class, class _InIter, class _Sent, class _OutIter> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter); template <class _AlgPolicy> -struct __copy_loop { +struct __copy_impl { template <class _InIter, class _Sent, class _OutIter> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { @@ -94,9 +94,7 @@ struct __copy_loop { __local_first = _Traits::__begin(++__segment_iterator); } } -}; -struct __copy_trivial { // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -108,7 +106,7 @@ struct __copy_trivial { template <class _AlgPolicy, class _InIter, class _Sent, class _OutIter> pair<_InIter, _OutIter> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __copy(_InIter __first, _Sent __last, _OutIter __result) { - return std::__dispatch_copy_or_move<_AlgPolicy, __copy_loop<_AlgPolicy>, __copy_trivial>( + return std::__copy_move_unwrap_iters<__copy_impl<_AlgPolicy> >( std::move(__first), std::move(__last), std::move(__result)); } diff --git a/libcxx/include/__algorithm/copy_backward.h b/libcxx/include/__algorithm/copy_backward.h index 591dd21..73dc846 100644 --- a/libcxx/include/__algorithm/copy_backward.h +++ b/libcxx/include/__algorithm/copy_backward.h @@ -33,7 +33,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter> __copy_backward(_InIter __first, _Sent __last, _OutIter __result); template <class _AlgPolicy> -struct __copy_backward_loop { +struct __copy_backward_impl { template <class _InIter, class _Sent, class _OutIter> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { @@ -104,9 +104,7 @@ struct __copy_backward_loop { __local_last = _Traits::__end(__segment_iterator); } } -}; -struct __copy_backward_trivial { // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -118,7 +116,7 @@ struct __copy_backward_trivial { template <class _AlgPolicy, class _BidirectionalIterator1, class _Sentinel, class _BidirectionalIterator2> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_BidirectionalIterator1, _BidirectionalIterator2> __copy_backward(_BidirectionalIterator1 __first, _Sentinel __last, _BidirectionalIterator2 __result) { - return std::__dispatch_copy_or_move<_AlgPolicy, __copy_backward_loop<_AlgPolicy>, __copy_backward_trivial>( + return std::__copy_move_unwrap_iters<__copy_backward_impl<_AlgPolicy> >( std::move(__first), std::move(__last), std::move(__result)); } diff --git a/libcxx/include/__algorithm/copy_move_common.h b/libcxx/include/__algorithm/copy_move_common.h index 845967b..12a26c6 100644 --- a/libcxx/include/__algorithm/copy_move_common.h +++ b/libcxx/include/__algorithm/copy_move_common.h @@ -81,30 +81,17 @@ __copy_backward_trivial_impl(_In* __first, _In* __last, _Out* __result) { // Iterator unwrapping and dispatching to the correct overload. -template <class _F1, class _F2> -struct __overload : _F1, _F2 { - using _F1::operator(); - using _F2::operator(); -}; - -template <class _InIter, class _Sent, class _OutIter, class = void> -struct __can_rewrap : false_type {}; - -template <class _InIter, class _Sent, class _OutIter> -struct __can_rewrap<_InIter, - _Sent, - _OutIter, - // Note that sentinels are always copy-constructible. - __enable_if_t< is_copy_constructible<_InIter>::value && is_copy_constructible<_OutIter>::value > > - : true_type {}; +template <class _InIter, class _OutIter> +struct __can_rewrap + : integral_constant<bool, is_copy_constructible<_InIter>::value && is_copy_constructible<_OutIter>::value> {}; template <class _Algorithm, class _InIter, class _Sent, class _OutIter, - __enable_if_t<__can_rewrap<_InIter, _Sent, _OutIter>::value, int> = 0> + __enable_if_t<__can_rewrap<_InIter, _OutIter>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter> -__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) { +__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) { auto __range = std::__unwrap_range(__first, std::move(__last)); auto __result = _Algorithm()(std::move(__range.first), std::move(__range.second), std::__unwrap_iter(__out_first)); return std::make_pair(std::__rewrap_range<_Sent>(std::move(__first), std::move(__result.first)), @@ -115,24 +102,12 @@ template <class _Algorithm, class _InIter, class _Sent, class _OutIter, - __enable_if_t<!__can_rewrap<_InIter, _Sent, _OutIter>::value, int> = 0> + __enable_if_t<!__can_rewrap<_InIter, _OutIter>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter> -__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) { +__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) { return _Algorithm()(std::move(__first), std::move(__last), std::move(__out_first)); } -template <class _AlgPolicy, - class _NaiveAlgorithm, - class _OptimizedAlgorithm, - class _InIter, - class _Sent, - class _OutIter> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter> -__dispatch_copy_or_move(_InIter __first, _Sent __last, _OutIter __out_first) { - using _Algorithm = __overload<_NaiveAlgorithm, _OptimizedAlgorithm>; - return std::__unwrap_and_dispatch<_Algorithm>(std::move(__first), std::move(__last), std::move(__out_first)); -} - _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/libcxx/include/__algorithm/move.h b/libcxx/include/__algorithm/move.h index bf574b5..1716d43 100644 --- a/libcxx/include/__algorithm/move.h +++ b/libcxx/include/__algorithm/move.h @@ -34,7 +34,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIte __move(_InIter __first, _Sent __last, _OutIter __result); template <class _AlgPolicy> -struct __move_loop { +struct __move_impl { template <class _InIter, class _Sent, class _OutIter> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { @@ -95,9 +95,7 @@ struct __move_loop { __local_first = _Traits::__begin(++__segment_iterator); } } -}; -struct __move_trivial { // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template <class _In, class _Out, __enable_if_t<__can_lower_move_assignment_to_memmove<_In, _Out>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -109,7 +107,7 @@ struct __move_trivial { template <class _AlgPolicy, class _InIter, class _Sent, class _OutIter> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __move(_InIter __first, _Sent __last, _OutIter __result) { - return std::__dispatch_copy_or_move<_AlgPolicy, __move_loop<_AlgPolicy>, __move_trivial>( + return std::__copy_move_unwrap_iters<__move_impl<_AlgPolicy> >( std::move(__first), std::move(__last), std::move(__result)); } diff --git a/libcxx/include/__algorithm/move_backward.h b/libcxx/include/__algorithm/move_backward.h index 6bb7c91..4beb7bdb 100644 --- a/libcxx/include/__algorithm/move_backward.h +++ b/libcxx/include/__algorithm/move_backward.h @@ -33,7 +33,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_BidirectionalIterator1 __move_backward(_BidirectionalIterator1 __first, _Sentinel __last, _BidirectionalIterator2 __result); template <class _AlgPolicy> -struct __move_backward_loop { +struct __move_backward_impl { template <class _InIter, class _Sent, class _OutIter> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { @@ -104,9 +104,7 @@ struct __move_backward_loop { __local_last = _Traits::__end(--__segment_iterator); } } -}; -struct __move_backward_trivial { // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template <class _In, class _Out, __enable_if_t<__can_lower_move_assignment_to_memmove<_In, _Out>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -122,7 +120,7 @@ __move_backward(_BidirectionalIterator1 __first, _Sentinel __last, _Bidirectiona std::is_copy_constructible<_BidirectionalIterator1>::value, "Iterators must be copy constructible."); - return std::__dispatch_copy_or_move<_AlgPolicy, __move_backward_loop<_AlgPolicy>, __move_backward_trivial>( + return std::__copy_move_unwrap_iters<__move_backward_impl<_AlgPolicy> >( std::move(__first), std::move(__last), std::move(__result)); } diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.sat/saturate_cast.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.sat/saturate_cast.pass.cpp index c06a9ed..cbca37e 100644 --- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.sat/saturate_cast.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.sat/saturate_cast.pass.cpp @@ -329,7 +329,7 @@ constexpr bool test() { { [[maybe_unused]] std::same_as<unsigned long int> decltype(auto) _ = std::saturate_cast<unsigned long int>(sBigMax); } assert(std::saturate_cast<unsigned long int>( sBigMin) == 0UL); // saturated assert(std::saturate_cast<unsigned long int>( sZero) == 0UL); - assert(std::saturate_cast<unsigned long int>( sBigMax) == ULONG_MAX); // saturated + assert(std::saturate_cast<unsigned long int>( sBigMax) == (sizeof(UIntT) > sizeof(unsigned long int) ? ULONG_MAX : LONG_MAX)); // saturated depending on underlying types { [[maybe_unused]] std::same_as<unsigned long int> decltype(auto) _ = std::saturate_cast<unsigned long int>(uBigMax); } assert(std::saturate_cast<unsigned long int>( uZero) == 0UL); diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 8f85929..917f88f 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -54,6 +54,7 @@ enum class EmitKind { Obj, LLVM, ASM }; struct Export { StringRef name; // N in /export:N or /export:E=N StringRef extName; // E in /export:E=N + StringRef exportAs; // E in /export:N,EXPORTAS,E StringRef aliasTarget; // GNU specific: N in "alias == N" Symbol *sym = nullptr; uint16_t ordinal = 0; @@ -73,10 +74,9 @@ struct Export { StringRef exportName; // Name in DLL bool operator==(const Export &e) const { - return (name == e.name && extName == e.extName && - aliasTarget == e.aliasTarget && - ordinal == e.ordinal && noname == e.noname && - data == e.data && isPrivate == e.isPrivate); + return (name == e.name && extName == e.extName && exportAs == e.exportAs && + aliasTarget == e.aliasTarget && ordinal == e.ordinal && + noname == e.noname && data == e.data && isPrivate == e.isPrivate); } }; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 1814929..2b1d4ab 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -945,6 +945,7 @@ void LinkerDriver::createImportLibrary(bool asLib) { e2.Name = std::string(e1.name); e2.SymbolName = std::string(e1.symbolName); e2.ExtName = std::string(e1.extName); + e2.ExportAs = std::string(e1.exportAs); e2.AliasTarget = std::string(e1.aliasTarget); e2.Ordinal = e1.ordinal; e2.Noname = e1.noname; @@ -1044,6 +1045,7 @@ void LinkerDriver::parseModuleDefs(StringRef path) { e2.name = saver().save(e1.Name); e2.extName = saver().save(e1.ExtName); } + e2.exportAs = saver().save(e1.ExportAs); e2.aliasTarget = saver().save(e1.AliasTarget); e2.ordinal = e1.Ordinal; e2.noname = e1.Noname; diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index 0fa4769..b4ff31a 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -585,7 +585,8 @@ Export LinkerDriver::parseExport(StringRef arg) { } } - // Optional parameters "[,@ordinal[,NONAME]][,DATA][,PRIVATE]" + // Optional parameters + // "[,@ordinal[,NONAME]][,DATA][,PRIVATE][,EXPORTAS,exportname]" while (!rest.empty()) { StringRef tok; std::tie(tok, rest) = rest.split(","); @@ -607,6 +608,13 @@ Export LinkerDriver::parseExport(StringRef arg) { e.isPrivate = true; continue; } + if (tok.equals_insensitive("exportas")) { + if (!rest.empty() && !rest.contains(',')) + e.exportAs = rest; + else + error("invalid EXPORTAS value: " + rest); + break; + } if (tok.starts_with("@")) { int32_t ord; if (tok.substr(1).getAsInteger(0, ord)) @@ -683,7 +691,9 @@ void LinkerDriver::fixupExports() { } for (Export &e : ctx.config.exports) { - if (!e.forwardTo.empty()) { + if (!e.exportAs.empty()) { + e.exportName = e.exportAs; + } else if (!e.forwardTo.empty()) { e.exportName = undecorate(ctx, e.name); } else { e.exportName = undecorate(ctx, e.extName.empty() ? e.name : e.extName); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 33c5013..92f2e20 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1659,10 +1659,17 @@ static bool handleNonPreemptibleIfunc(Symbol &sym, uint16_t flags) { // original section/value pairs. For non-GOT non-PLT relocation case below, we // may alter section/value, so create a copy of the symbol to make // section/value fixed. + // + // Prior to Android V, there was a bug that caused RELR relocations to be + // applied after packed relocations. This meant that resolvers referenced by + // IRELATIVE relocations in the packed relocation section would read + // unrelocated globals with RELR relocations when + // --pack-relative-relocs=android+relr is enabled. Work around this by placing + // IRELATIVE in .rela.plt. auto *directSym = makeDefined(cast<Defined>(sym)); directSym->allocateAux(); - addPltEntry(*in.iplt, *in.igotPlt, *mainPart->relaDyn, target->iRelativeRel, - *directSym); + auto &dyn = config->androidPackDynRelocs ? *in.relaPlt : *mainPart->relaDyn; + addPltEntry(*in.iplt, *in.igotPlt, dyn, target->iRelativeRel, *directSym); sym.allocateAux(); symAux.back().pltIdx = symAux[directSym->auxIdx].pltIdx; diff --git a/lld/test/COFF/exportas.test b/lld/test/COFF/exportas.test index c0295c3..d70547c 100644 --- a/lld/test/COFF/exportas.test +++ b/lld/test/COFF/exportas.test @@ -9,6 +9,77 @@ RUN: lld-link -out:out1.dll -dll -noentry test.obj test.lib RUN: llvm-readobj --coff-imports out1.dll | FileCheck --check-prefix=IMPORT %s IMPORT: Symbol: expfunc +Pass -export argument with EXPORTAS. + +RUN: llvm-mc -filetype=obj -triple=x86_64-windows func.s -o func.obj +RUN: lld-link -out:out2.dll -dll -noentry func.obj -export:func,EXPORTAS,expfunc +RUN: llvm-readobj --coff-exports out2.dll | FileCheck --check-prefix=EXPORT %s +EXPORT: Name: expfunc + +RUN: llvm-readobj out2.lib | FileCheck --check-prefix=IMPLIB %s +IMPLIB: Name type: export as +IMPLIB-NEXT: Export name: expfunc +IMPLIB-NEXT: Symbol: __imp_func +IMPLIB-NEXT: Symbol: func + +Use .drectve section with EXPORTAS. + +RUN: llvm-mc -filetype=obj -triple=x86_64-windows drectve.s -o drectve.obj +RUN: lld-link -out:out3.dll -dll -noentry func.obj drectve.obj +RUN: llvm-readobj --coff-exports out3.dll | FileCheck --check-prefix=EXPORT %s +RUN: llvm-readobj out3.lib | FileCheck --check-prefix=IMPLIB %s + +Use a .def file with EXPORTAS. + +RUN: lld-link -out:out4.dll -dll -noentry func.obj -def:test.def +RUN: llvm-readobj --coff-exports out4.dll | FileCheck --check-prefix=EXPORT %s +RUN: llvm-readobj out4.lib | FileCheck --check-prefix=IMPLIB %s + +Use a .def file with EXPORTAS in a forwarding export. + +RUN: lld-link -out:out5.dll -dll -noentry func.obj -def:test2.def +RUN: llvm-readobj --coff-exports out5.dll | FileCheck --check-prefix=FORWARD-EXPORT %s +FORWARD-EXPORT: Export { +FORWARD-EXPORT-NEXT: Ordinal: 1 +FORWARD-EXPORT-NEXT: Name: expfunc +FORWARD-EXPORT-NEXT: ForwardedTo: otherdll.otherfunc +FORWARD-EXPORT-NEXT: } + +RUN: llvm-readobj out5.lib | FileCheck --check-prefix=FORWARD-IMPLIB %s +FORWARD-IMPLIB: Name type: export as +FORWARD-IMPLIB-NEXT: Export name: expfunc +FORWARD-IMPLIB-NEXT: Symbol: __imp_func +FORWARD-IMPLIB-NEXT: Symbol: func + +Pass -export argument with EXPORTAS in a forwarding export. + +RUN: lld-link -out:out6.dll -dll -noentry func.obj -export:func=otherdll.otherfunc,EXPORTAS,expfunc +RUN: llvm-readobj --coff-exports out6.dll | FileCheck --check-prefix=FORWARD-EXPORT %s +RUN: llvm-readobj out6.lib | FileCheck --check-prefix=FORWARD-IMPLIB %s + +Pass -export argument with EXPORTAS in a data export. + +RUN: lld-link -out:out7.dll -dll -noentry func.obj -export:func,DATA,@5,EXPORTAS,expfunc +RUN: llvm-readobj --coff-exports out7.dll | FileCheck --check-prefix=ORD %s +ORD: Ordinal: 5 +ORD-NEXT: Name: expfunc + +RUN: llvm-readobj out7.lib | FileCheck --check-prefix=ORD-IMPLIB %s +ORD-IMPLIB: Type: data +ORD-IMPLIB-NEXT: Name type: export as +ORD-IMPLIB-NEXT: Export name: expfunc +ORD-IMPLIB-NEXT: Symbol: __imp_func + +Check invalid EXPORTAS syntax. + +RUN: not lld-link -out:err1.dll -dll -noentry func.obj -export:func,EXPORTAS, 2>&1 | \ +RUN: FileCheck --check-prefix=ERR1 %s +ERR1: error: invalid EXPORTAS value: {{$}} + +RUN: not lld-link -out:err2.dll -dll -noentry func.obj -export:func,EXPORTAS,expfunc,DATA 2>&1 | \ +RUN: FileCheck --check-prefix=ERR2 %s +ERR2: error: invalid EXPORTAS value: expfunc,DATA + #--- test.s .section ".test", "rd" .rva __imp_func @@ -17,3 +88,20 @@ IMPORT: Symbol: expfunc LIBRARY test.dll EXPORTS func EXPORTAS expfunc + +#--- test2.def +LIBRARY test.dll +EXPORTS + func=otherdll.otherfunc EXPORTAS expfunc + +#--- func.s + .text + .globl func + .p2align 2, 0x0 +func: + movl $1, %eax + retq + +#--- drectve.s + .section .drectve, "yn" + .ascii " -export:func,EXPORTAS,expfunc" diff --git a/lld/test/ELF/pack-dyn-relocs-ifunc.s b/lld/test/ELF/pack-dyn-relocs-ifunc.s new file mode 100644 index 0000000..6168d06 --- /dev/null +++ b/lld/test/ELF/pack-dyn-relocs-ifunc.s @@ -0,0 +1,49 @@ +# REQUIRES: aarch64 +## Prior to Android V, there was a bug that caused RELR relocations to be +## applied after packed relocations. This meant that resolvers referenced by +## IRELATIVE relocations in the packed relocation section would read unrelocated +## globals when --pack-relative-relocs=android+relr is enabled. Work around this +## by placing IRELATIVE in .rela.plt. + +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-android a.s -o a.o +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-android b.s -o b.o +# RUN: ld.lld -shared b.o -o b.so +# RUN: ld.lld -pie --pack-dyn-relocs=android+relr -z separate-loadable-segments a.o b.so -o a +# RUN: llvm-readobj -r a | FileCheck %s +# RUN: llvm-objdump -d a | FileCheck %s --check-prefix=ASM + +# CHECK: .relr.dyn { +# CHECK-NEXT: 0x30000 R_AARCH64_RELATIVE - +# CHECK-NEXT: } +# CHECK: .rela.plt { +# CHECK-NEXT: 0x30020 R_AARCH64_JUMP_SLOT bar 0x0 +# CHECK-NEXT: 0x30028 R_AARCH64_IRELATIVE - 0x10000 +# CHECK-NEXT: } + +# ASM: <.iplt>: +# ASM-NEXT: adrp x16, 0x30000 +# ASM-NEXT: ldr x17, [x16, #0x28] +# ASM-NEXT: add x16, x16, #0x28 +# ASM-NEXT: br x17 + +#--- a.s +.text +.type foo, %gnu_indirect_function +.globl foo +foo: + ret + +.globl _start +_start: + bl foo + bl bar + +.data +.balign 8 +.quad .data + +#--- b.s +.globl bar +bar: + ret diff --git a/lldb/include/lldb/Symbol/UnwindTable.h b/lldb/include/lldb/Symbol/UnwindTable.h index f0ce704..26826e5 100644 --- a/lldb/include/lldb/Symbol/UnwindTable.h +++ b/lldb/include/lldb/Symbol/UnwindTable.h @@ -57,6 +57,10 @@ public: ArchSpec GetArchitecture(); + /// Called after a SymbolFile has been added to a Module to add any new + /// unwind sections that may now be available. + void Update(); + private: void Dump(Stream &s); diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index a520523..9c105b3 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -1009,6 +1009,8 @@ SymbolFile *Module::GetSymbolFile(bool can_create, Stream *feedback_strm) { m_symfile_up.reset( SymbolVendor::FindPlugin(shared_from_this(), feedback_strm)); m_did_load_symfile = true; + if (m_unwind_table) + m_unwind_table->Update(); } } } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 5f67658..1164bc6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -693,6 +693,7 @@ llvm::DWARFDebugAbbrev *SymbolFileDWARF::DebugAbbrev() { if (debug_abbrev_data.GetByteSize() == 0) return nullptr; + ElapsedTime elapsed(m_parse_time); auto abbr = std::make_unique<llvm::DWARFDebugAbbrev>(debug_abbrev_data.GetAsLLVM()); llvm::Error error = abbr->parse(); diff --git a/lldb/source/Symbol/UnwindTable.cpp b/lldb/source/Symbol/UnwindTable.cpp index 3c1a518..11bedf3 100644 --- a/lldb/source/Symbol/UnwindTable.cpp +++ b/lldb/source/Symbol/UnwindTable.cpp @@ -84,6 +84,51 @@ void UnwindTable::Initialize() { } } +void UnwindTable::Update() { + if (!m_initialized) + return Initialize(); + + std::lock_guard<std::mutex> guard(m_mutex); + + ObjectFile *object_file = m_module.GetObjectFile(); + if (!object_file) + return; + + if (!m_object_file_unwind_up) + m_object_file_unwind_up = object_file->CreateCallFrameInfo(); + + SectionList *sl = m_module.GetSectionList(); + if (!sl) + return; + + SectionSP sect = sl->FindSectionByType(eSectionTypeEHFrame, true); + if (!m_eh_frame_up && sect) { + m_eh_frame_up = std::make_unique<DWARFCallFrameInfo>( + *object_file, sect, DWARFCallFrameInfo::EH); + } + + sect = sl->FindSectionByType(eSectionTypeDWARFDebugFrame, true); + if (!m_debug_frame_up && sect) { + m_debug_frame_up = std::make_unique<DWARFCallFrameInfo>( + *object_file, sect, DWARFCallFrameInfo::DWARF); + } + + sect = sl->FindSectionByType(eSectionTypeCompactUnwind, true); + if (!m_compact_unwind_up && sect) { + m_compact_unwind_up = + std::make_unique<CompactUnwindInfo>(*object_file, sect); + } + + sect = sl->FindSectionByType(eSectionTypeARMexidx, true); + if (!m_arm_unwind_up && sect) { + SectionSP sect_extab = sl->FindSectionByType(eSectionTypeARMextab, true); + if (sect_extab.get()) { + m_arm_unwind_up = + std::make_unique<ArmUnwindInfo>(*object_file, sect, sect_extab); + } + } +} + UnwindTable::~UnwindTable() = default; std::optional<AddressRange> diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp index 3af62f5..03a74f2 100644 --- a/lldb/source/Target/StackFrame.cpp +++ b/lldb/source/Target/StackFrame.cpp @@ -1800,7 +1800,6 @@ void StackFrame::DumpUsingSettingsFormat(Stream *strm, bool show_unique, return; ExecutionContext exe_ctx(shared_from_this()); - StreamString s; const FormatEntity::Entry *frame_format = nullptr; Target *target = exe_ctx.GetTargetPtr(); diff --git a/lldb/test/Shell/SymbolFile/target-symbols-add-unwind.test b/lldb/test/Shell/SymbolFile/target-symbols-add-unwind.test new file mode 100644 index 0000000..5420213 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/target-symbols-add-unwind.test @@ -0,0 +1,27 @@ +# TODO: When it's possible to run "image show-unwind" without a running +# process, we can remove the unsupported line below, and hard-code an ELF +# triple in the test. +# UNSUPPORTED: system-windows, system-darwin + +# RUN: cd %T +# RUN: %clang_host %S/Inputs/target-symbols-add-unwind.c -g \ +# RUN: -fno-unwind-tables -fno-asynchronous-unwind-tables \ +# RUN: -o target-symbols-add-unwind.debug +# RUN: llvm-objcopy --strip-debug target-symbols-add-unwind.debug \ +# RUN: target-symbols-add-unwind.stripped +# RUN: %lldb target-symbols-add-unwind.stripped -s %s -o quit | FileCheck %s + +process launch --stop-at-entry +image show-unwind -n main +# CHECK-LABEL: image show-unwind -n main +# CHECK-NOT: debug_frame UnwindPlan: + +target symbols add -s target-symbols-add-unwind.stripped target-symbols-add-unwind.debug +# CHECK-LABEL: target symbols add +# CHECK: symbol file {{.*}} has been added to {{.*}} + +image show-unwind -n main +# CHECK-LABEL: image show-unwind -n main +# CHECK: debug_frame UnwindPlan: +# CHECK-NEXT: This UnwindPlan originally sourced from DWARF CFI +# CHECK-NEXT: This UnwindPlan is sourced from the compiler: yes. diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 030b769..a4be315 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12815,10 +12815,11 @@ Variable argument support is defined in LLVM with the functions. These functions are related to the similarly named macros defined in the ``<stdarg.h>`` header file. -All of these functions operate on arguments that use a target-specific +All of these functions take as arguments pointers to a target-specific value type "``va_list``". The LLVM assembly language reference manual does not define what this type is, so all transformations should be -prepared to handle these functions regardless of the type used. +prepared to handle these functions regardless of the type used. The intrinsics +are overloaded, and can be used for pointers to different address spaces. This example shows how the :ref:`va_arg <i_va_arg>` instruction and the variable argument handling intrinsic functions are used. @@ -12835,24 +12836,24 @@ variable argument handling intrinsic functions are used. define i32 @test(i32 %X, ...) { ; Initialize variable argument processing %ap = alloca %struct.va_list - call void @llvm.va_start(ptr %ap) + call void @llvm.va_start.p0(ptr %ap) ; Read a single integer argument %tmp = va_arg ptr %ap, i32 ; Demonstrate usage of llvm.va_copy and llvm.va_end %aq = alloca ptr - call void @llvm.va_copy(ptr %aq, ptr %ap) - call void @llvm.va_end(ptr %aq) + call void @llvm.va_copy.p0(ptr %aq, ptr %ap) + call void @llvm.va_end.p0(ptr %aq) ; Stop processing of arguments. - call void @llvm.va_end(ptr %ap) + call void @llvm.va_end.p0(ptr %ap) ret i32 %tmp } - declare void @llvm.va_start(ptr) - declare void @llvm.va_copy(ptr, ptr) - declare void @llvm.va_end(ptr) + declare void @llvm.va_start.p0(ptr) + declare void @llvm.va_copy.p0(ptr, ptr) + declare void @llvm.va_end.p0(ptr) .. _int_va_start: @@ -12864,7 +12865,8 @@ Syntax: :: - declare void @llvm.va_start(ptr <arglist>) + declare void @llvm.va_start.p0(ptr <arglist>) + declare void @llvm.va_start.p5(ptr addrspace(5) <arglist>) Overview: """"""""" @@ -12896,7 +12898,8 @@ Syntax: :: - declare void @llvm.va_end(ptr <arglist>) + declare void @llvm.va_end.p0(ptr <arglist>) + declare void @llvm.va_end.p5(ptr addrspace(5) <arglist>) Overview: """"""""" @@ -12929,7 +12932,8 @@ Syntax: :: - declare void @llvm.va_copy(ptr <destarglist>, ptr <srcarglist>) + declare void @llvm.va_copy.p0(ptr <destarglist>, ptr <srcarglist>) + declare void @llvm.va_copy.p5(ptr addrspace(5) <destarglist>, ptr addrspace(5) <srcarglist>) Overview: """"""""" @@ -12942,6 +12946,7 @@ Arguments: The first argument is a pointer to a ``va_list`` element to initialize. The second argument is a pointer to a ``va_list`` element to copy from. +The address spaces of the two arguments must match. Semantics: """""""""" diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index c2b1a9d..7588048 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -76,6 +76,7 @@ Changes to the AMDGPU Backend Changes to the ARM Backend -------------------------- +* FEAT_F32MM is no longer activated by default when using `+sve` on v8.6-A or greater. The feature is still available and can be used by adding `+f32mm` to the command line options. Changes to the AVR Backend -------------------------- diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 5863a8d..65ccb1b 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -181,6 +181,7 @@ enum Kind { kw_tailcc, kw_m68k_rtdcc, kw_graalcc, + kw_riscv_vector_cc, // Attributes: kw_attributes, diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index e70b58d..d8927c6 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -1040,6 +1040,7 @@ HANDLE_DW_CC(0xca, LLVM_PreserveAll) HANDLE_DW_CC(0xcb, LLVM_X86RegCall) HANDLE_DW_CC(0xcc, LLVM_M68kRTD) HANDLE_DW_CC(0xcd, LLVM_PreserveNone) +HANDLE_DW_CC(0xce, LLVM_RISCVVectorCall) // From GCC source code (include/dwarf2.h): This DW_CC_ value is not currently // generated by any toolchain. It is used internally to GDB to indicate OpenCL // C functions that have been compiled with the IBM XL C for OpenCL compiler and diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index ef8aaf5..a05d1a4 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -264,6 +264,9 @@ namespace CallingConv { /// except that the first parameter is mapped to x9. ARM64EC_Thunk_Native = 109, + /// Calling convention used for RISC-V V-extension. + RISCV_VectorCall = 110, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index d0ef9c2..7649024 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -700,10 +700,13 @@ class MSBuiltin<string name> { //===--------------- Variable Argument Handling Intrinsics ----------------===// // -def int_vastart : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">; -def int_vacopy : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], - "llvm.va_copy">; -def int_vaend : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">; +def int_vastart : DefaultAttrsIntrinsic<[], + [llvm_anyptr_ty], [], "llvm.va_start">; +def int_vacopy : DefaultAttrsIntrinsic<[], + [llvm_anyptr_ty, LLVMMatchType<0>], [], + "llvm.va_copy">; +def int_vaend : DefaultAttrsIntrinsic<[], + [llvm_anyptr_ty], [], "llvm.va_end">; //===------------------- Garbage Collection Intrinsics --------------------===// // diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h index b25f8eb..b7db6e0 100644 --- a/llvm/include/llvm/IR/Verifier.h +++ b/llvm/include/llvm/IR/Verifier.h @@ -77,6 +77,7 @@ public: /// Visit an instruction and return true if it is valid, return false if an /// invalid TBAA is attached. bool visitTBAAMetadata(Instruction &I, const MDNode *MD); + bool visitTBAAStructMetadata(Instruction &I, const MDNode *MD); }; /// Check a function for errors, useful for use when debugging a diff --git a/llvm/include/llvm/Object/GOFF.h b/llvm/include/llvm/Object/GOFF.h index 9176245..9fb8876 100644 --- a/llvm/include/llvm/Object/GOFF.h +++ b/llvm/include/llvm/Object/GOFF.h @@ -73,6 +73,26 @@ protected: } }; +class TXTRecord : public Record { +public: + /// \brief Maximum length of data; any more must go in continuation. + static const uint8_t TXTMaxDataLength = 56; + + static Error getData(const uint8_t *Record, SmallString<256> &CompleteData); + + static void getElementEsdId(const uint8_t *Record, uint32_t &EsdId) { + get<uint32_t>(Record, 4, EsdId); + } + + static void getOffset(const uint8_t *Record, uint32_t &Offset) { + get<uint32_t>(Record, 12, Offset); + } + + static void getDataLength(const uint8_t *Record, uint16_t &Length) { + get<uint16_t>(Record, 22, Length); + } +}; + class HDRRecord : public Record { public: static Error getData(const uint8_t *Record, SmallString<256> &CompleteData); diff --git a/llvm/include/llvm/Object/GOFFObjectFile.h b/llvm/include/llvm/Object/GOFFObjectFile.h index 7e1ceb9..6871641 100644 --- a/llvm/include/llvm/Object/GOFFObjectFile.h +++ b/llvm/include/llvm/Object/GOFFObjectFile.h @@ -29,7 +29,10 @@ namespace llvm { namespace object { class GOFFObjectFile : public ObjectFile { + friend class GOFFSymbolRef; + IndexedMap<const uint8_t *> EsdPtrs; // Indexed by EsdId. + SmallVector<const uint8_t *, 256> TextPtrs; mutable DenseMap<uint32_t, std::pair<size_t, std::unique_ptr<char[]>>> EsdNamesCache; @@ -38,7 +41,7 @@ class GOFFObjectFile : public ObjectFile { // (EDID, 0) code, r/o data section // (EDID,PRID) r/w data section SmallVector<SectionEntryImpl, 256> SectionList; - mutable DenseMap<uint32_t, std::string> SectionDataCache; + mutable DenseMap<uint32_t, SmallVector<uint8_t>> SectionDataCache; public: Expected<StringRef> getSymbolName(SymbolRef Symbol) const; @@ -66,6 +69,10 @@ public: return true; } + bool isSectionNoLoad(DataRefImpl Sec) const; + bool isSectionReadOnlyData(DataRefImpl Sec) const; + bool isSectionZeroInit(DataRefImpl Sec) const; + private: // SymbolRef. Expected<StringRef> getSymbolName(DataRefImpl Symb) const override; @@ -75,27 +82,24 @@ private: Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override; Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override; Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override; + uint64_t getSymbolSize(DataRefImpl Symb) const; const uint8_t *getSymbolEsdRecord(DataRefImpl Symb) const; bool isSymbolUnresolved(DataRefImpl Symb) const; bool isSymbolIndirect(DataRefImpl Symb) const; // SectionRef. - void moveSectionNext(DataRefImpl &Sec) const override {} - virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const override { - return StringRef(); - } - uint64_t getSectionAddress(DataRefImpl Sec) const override { return 0; } - uint64_t getSectionSize(DataRefImpl Sec) const override { return 0; } + void moveSectionNext(DataRefImpl &Sec) const override; + virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const override; + uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionSize(DataRefImpl Sec) const override; virtual Expected<ArrayRef<uint8_t>> - getSectionContents(DataRefImpl Sec) const override { - return ArrayRef<uint8_t>(); - } - uint64_t getSectionIndex(DataRefImpl Sec) const override { return 0; } - uint64_t getSectionAlignment(DataRefImpl Sec) const override { return 0; } + getSectionContents(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override { return Sec.d.a; } + uint64_t getSectionAlignment(DataRefImpl Sec) const override; bool isSectionCompressed(DataRefImpl Sec) const override { return false; } - bool isSectionText(DataRefImpl Sec) const override { return false; } - bool isSectionData(DataRefImpl Sec) const override { return false; } + bool isSectionText(DataRefImpl Sec) const override; + bool isSectionData(DataRefImpl Sec) const override; bool isSectionBSS(DataRefImpl Sec) const override { return false; } bool isSectionVirtual(DataRefImpl Sec) const override { return false; } relocation_iterator section_rel_begin(DataRefImpl Sec) const override { @@ -109,6 +113,7 @@ private: const uint8_t *getSectionPrEsdRecord(DataRefImpl &Sec) const; const uint8_t *getSectionEdEsdRecord(uint32_t SectionIndex) const; const uint8_t *getSectionPrEsdRecord(uint32_t SectionIndex) const; + uint32_t getSectionDefEsdId(DataRefImpl &Sec) const; // RelocationRef. void moveRelocationNext(DataRefImpl &Rel) const override {} @@ -122,6 +127,29 @@ private: SmallVectorImpl<char> &Result) const override {} }; +class GOFFSymbolRef : public SymbolRef { +public: + GOFFSymbolRef(const SymbolRef &B) : SymbolRef(B) { + assert(isa<GOFFObjectFile>(SymbolRef::getObject())); + } + + const GOFFObjectFile *getObject() const { + return cast<GOFFObjectFile>(BasicSymbolRef::getObject()); + } + + Expected<uint32_t> getSymbolGOFFFlags() const { + return getObject()->getSymbolFlags(getRawDataRefImpl()); + } + + Expected<SymbolRef::Type> getSymbolGOFFType() const { + return getObject()->getSymbolType(getRawDataRefImpl()); + } + + uint64_t getSize() const { + return getObject()->getSymbolSize(getRawDataRefImpl()); + } +}; + } // namespace object } // namespace llvm diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 02f64fc..2301a27 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -640,6 +640,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(tailcc); KEYWORD(m68k_rtdcc); KEYWORD(graalcc); + KEYWORD(riscv_vector_cc); KEYWORD(cc); KEYWORD(c); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index f0be021..41d48e5 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2143,6 +2143,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'tailcc' /// ::= 'm68k_rtdcc' /// ::= 'graalcc' +/// ::= 'riscv_vector_cc' /// ::= 'cc' UINT /// bool LLParser::parseOptionalCallingConv(unsigned &CC) { @@ -2213,6 +2214,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) { case lltok::kw_tailcc: CC = CallingConv::Tail; break; case lltok::kw_m68k_rtdcc: CC = CallingConv::M68k_RTD; break; case lltok::kw_graalcc: CC = CallingConv::GRAAL; break; + case lltok::kw_riscv_vector_cc: + CC = CallingConv::RISCV_VectorCall; + break; case lltok::kw_cc: { Lex.Lex(); return parseUInt32(CC); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 39ee95d..36abe27 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2555,7 +2555,8 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) { /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into /// a shift and add with a different constant. -static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { +static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Expecting add or sub"); @@ -2583,7 +2584,6 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { // Eliminate the 'not' by adjusting the shift and add/sub constant: // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) - SDLoc DL(N); if (SDValue NewC = DAG.FoldConstantArithmetic( IsAdd ? ISD::ADD : ISD::SUB, DL, VT, {ConstantOp, DAG.getConstant(1, DL, VT)})) { @@ -2878,7 +2878,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG)) return V; - if (SDValue V = foldAddSubOfSignBit(N, DAG)) + if (SDValue V = foldAddSubOfSignBit(N, DL, DAG)) return V; // Try to match AVGFLOOR fixedwidth pattern @@ -3877,14 +3877,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG)) return V; - if (SDValue V = foldAddSubOfSignBit(N, DAG)) + if (SDValue V = foldAddSubOfSignBit(N, DL, DAG)) return V; // Try to match AVGCEIL fixedwidth pattern if (SDValue V = foldSubToAvg(N, DL)) return V; - if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N))) + if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL)) return V; if (SDValue V = foldSubToUSubSat(VT, N, DL)) @@ -3949,7 +3949,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) if (C->getAPIntValue() == (BitWidth - 1)) - return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + return DAG.getNode(ISD::ABS, DL, VT, S0); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b1f6fd6..e10b8bc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -118,14 +118,7 @@ private: void LegalizeLoadOps(SDNode *Node); void LegalizeStoreOps(SDNode *Node); - /// Some targets cannot handle a variable - /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it - /// is necessary to spill the vector being inserted into to memory, perform - /// the insert there, and then read the result back. - SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, - const SDLoc &dl); - SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, - const SDLoc &dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Op); /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type @@ -378,45 +371,12 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { return Result; } -/// Some target cannot handle a variable insertion index for the -/// INSERT_VECTOR_ELT instruction. In this case, it -/// is necessary to spill the vector being inserted into to memory, perform -/// the insert there, and then read the result back. -SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, - SDValue Val, - SDValue Idx, - const SDLoc &dl) { - // If the target doesn't support this, we have to spill the input vector - // to a temporary stack slot, update the element, then reload it. This is - // badness. We could also load the value into a vector register (either - // with a "move to register" or "extload into register" instruction, then - // permute it into place, if the idx is a constant and if the idx is - // supported by the target. - EVT VT = Vec.getValueType(); - EVT EltVT = VT.getVectorElementType(); - SDValue StackPtr = DAG.CreateStackTemporary(VT); - - int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - - // Store the vector. - SDValue Ch = DAG.getStore( - DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); - - SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Idx); - - // Store the scalar value. - Ch = DAG.getTruncStore( - Ch, dl, Val, StackPtr2, - MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); - // Load the updated vector. - return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), SPFI)); -} +SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Op) { + SDValue Vec = Op.getOperand(0); + SDValue Val = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + SDLoc dl(Op); -SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, - SDValue Idx, - const SDLoc &dl) { if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for @@ -438,7 +398,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps); } } - return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); + return ExpandInsertToVectorThroughStack(Op); } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { @@ -1486,7 +1446,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // Store the value to a temporary stack slot, then LOAD the returned part. EVT VecVT = Vec.getValueType(); - EVT SubVecVT = Part.getValueType(); + EVT PartVT = Part.getValueType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = @@ -1496,13 +1456,24 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Then store the inserted part. - SDValue SubStackPtr = - TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx); + if (PartVT.isVector()) { + SDValue SubStackPtr = + TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, PartVT, Idx); + + // Store the subvector. + Ch = DAG.getStore( + Ch, dl, Part, SubStackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + } else { + SDValue SubStackPtr = + TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - // Store the subvector. - Ch = DAG.getStore( - Ch, dl, Part, SubStackPtr, - MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + // Store the scalar value. + Ch = DAG.getTruncStore( + Ch, dl, Part, SubStackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), + VecVT.getVectorElementType()); + } // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); @@ -3416,9 +3387,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); break; case ISD::INSERT_VECTOR_ELT: - Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), - Node->getOperand(1), - Node->getOperand(2), dl)); + Results.push_back(ExpandINSERT_VECTOR_ELT(SDValue(Node, 0))); break; case ISD::VECTOR_SHUFFLE: { SmallVector<int, 32> NewMask; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 9990556..b16e78d 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -2073,7 +2073,8 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const { // FreeBSD has "__stack_chk_guard" defined externally on libc.so if (M.getDirectAccessExternalData() && !TM.getTargetTriple().isWindowsGNUEnvironment() && - !TM.getTargetTriple().isOSFreeBSD() && + !(TM.getTargetTriple().isPPC64() && + TM.getTargetTriple().isOSFreeBSD()) && (!TM.getTargetTriple().isOSDarwin() || TM.getRelocationModel() == Reloc::Static)) GV->setDSOLocal(true); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 38c191a..84690f0 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -363,6 +363,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break; case CallingConv::M68k_RTD: Out << "m68k_rtdcc"; break; + case CallingConv::RISCV_VectorCall: + Out << "riscv_vector_cc"; + break; } } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 33f3584..e165725 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5096,6 +5096,9 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa)) TBAAVerifyHelper.visitTBAAMetadata(I, TBAA); + if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa_struct)) + TBAAVerifyHelper.visitTBAAStructMetadata(I, TBAA); + if (MDNode *MD = I.getMetadata(LLVMContext::MD_noalias)) visitAliasScopeListMetadata(MD); if (MDNode *MD = I.getMetadata(LLVMContext::MD_alias_scope)) @@ -7419,6 +7422,35 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { return true; } +bool TBAAVerifier::visitTBAAStructMetadata(Instruction &I, const MDNode *MD) { + CheckTBAA(MD->getNumOperands() % 3 == 0, + "tbaa.struct operands must occur in groups of three", &I, MD); + + // Each group of three operands must consist of two integers and a + // tbaa node. Moreover, the regions described by the offset and size + // operands must be non-overlapping. + std::optional<APInt> NextFree; + for (unsigned int Idx = 0; Idx < MD->getNumOperands(); Idx += 3) { + auto *OffsetCI = + mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(Idx)); + CheckTBAA(OffsetCI, "Offset must be a constant integer", &I, MD); + + auto *SizeCI = + mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(Idx + 1)); + CheckTBAA(SizeCI, "Size must be a constant integer", &I, MD); + + MDNode *TBAA = dyn_cast_or_null<MDNode>(MD->getOperand(Idx + 2)); + CheckTBAA(TBAA, "TBAA tag missing", &I, MD); + visitTBAAMetadata(I, TBAA); + + bool NonOverlapping = !NextFree || NextFree->ule(OffsetCI->getValue()); + CheckTBAA(NonOverlapping, "Overlapping tbaa.struct regions", &I, MD); + + NextFree = OffsetCI->getValue() + SizeCI->getValue(); + } + return true; +} + char VerifierLegacyPass::ID = 0; INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false) diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp index 8224a14..477c5bf 100644 --- a/llvm/lib/Object/COFFImportFile.cpp +++ b/llvm/lib/Object/COFFImportFile.cpp @@ -690,12 +690,12 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path, if (ImportType == IMPORT_CODE && isArm64EC(M)) { if (std::optional<std::string> MangledName = getArm64ECMangledFunctionName(Name)) { - if (ExportName.empty()) { + if (!E.Noname && ExportName.empty()) { NameType = IMPORT_NAME_EXPORTAS; ExportName.swap(Name); } Name = std::move(*MangledName); - } else if (ExportName.empty()) { + } else if (!E.Noname && ExportName.empty()) { NameType = IMPORT_NAME_EXPORTAS; ExportName = std::move(*getArm64ECDemangledFunctionName(Name)); } diff --git a/llvm/lib/Object/GOFFObjectFile.cpp b/llvm/lib/Object/GOFFObjectFile.cpp index 76a1355..2845d93 100644 --- a/llvm/lib/Object/GOFFObjectFile.cpp +++ b/llvm/lib/Object/GOFFObjectFile.cpp @@ -168,6 +168,11 @@ GOFFObjectFile::GOFFObjectFile(MemoryBufferRef Object, Error &Err) LLVM_DEBUG(dbgs() << " -- ESD " << EsdId << "\n"); break; } + case GOFF::RT_TXT: + // Save TXT records. + TextPtrs.emplace_back(I); + LLVM_DEBUG(dbgs() << " -- TXT\n"); + break; case GOFF::RT_END: LLVM_DEBUG(dbgs() << " -- END (GOFF record type) unhandled\n"); break; @@ -364,6 +369,13 @@ GOFFObjectFile::getSymbolSection(DataRefImpl Symb) const { std::to_string(SymEdId)); } +uint64_t GOFFObjectFile::getSymbolSize(DataRefImpl Symb) const { + const uint8_t *Record = getSymbolEsdRecord(Symb); + uint32_t Length; + ESDRecord::getLength(Record, Length); + return Length; +} + const uint8_t *GOFFObjectFile::getSectionEdEsdRecord(DataRefImpl &Sec) const { SectionEntryImpl EsdIds = SectionList[Sec.d.a]; const uint8_t *EsdRecord = EsdPtrs[EsdIds.d.a]; @@ -394,6 +406,154 @@ GOFFObjectFile::getSectionPrEsdRecord(uint32_t SectionIndex) const { return EsdRecord; } +uint32_t GOFFObjectFile::getSectionDefEsdId(DataRefImpl &Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + uint32_t Length; + ESDRecord::getLength(EsdRecord, Length); + if (Length == 0) { + const uint8_t *PrEsdRecord = getSectionPrEsdRecord(Sec); + if (PrEsdRecord) + EsdRecord = PrEsdRecord; + } + + uint32_t DefEsdId; + ESDRecord::getEsdId(EsdRecord, DefEsdId); + LLVM_DEBUG(dbgs() << "Got def EsdId: " << DefEsdId << '\n'); + return DefEsdId; +} + +void GOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const { + Sec.d.a++; + if ((Sec.d.a) >= SectionList.size()) + Sec.d.a = 0; +} + +Expected<StringRef> GOFFObjectFile::getSectionName(DataRefImpl Sec) const { + DataRefImpl EdSym; + SectionEntryImpl EsdIds = SectionList[Sec.d.a]; + EdSym.d.a = EsdIds.d.a; + Expected<StringRef> Name = getSymbolName(EdSym); + if (Name) { + StringRef Res = *Name; + LLVM_DEBUG(dbgs() << "Got section: " << Res << '\n'); + LLVM_DEBUG(dbgs() << "Final section name: " << Res << '\n'); + Name = Res; + } + return Name; +} + +uint64_t GOFFObjectFile::getSectionAddress(DataRefImpl Sec) const { + uint32_t Offset; + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + ESDRecord::getOffset(EsdRecord, Offset); + return Offset; +} + +uint64_t GOFFObjectFile::getSectionSize(DataRefImpl Sec) const { + uint32_t Length; + uint32_t DefEsdId = getSectionDefEsdId(Sec); + const uint8_t *EsdRecord = EsdPtrs[DefEsdId]; + ESDRecord::getLength(EsdRecord, Length); + LLVM_DEBUG(dbgs() << "Got section size: " << Length << '\n'); + return static_cast<uint64_t>(Length); +} + +// Unravel TXT records and expand fill characters to produce +// a contiguous sequence of bytes. +Expected<ArrayRef<uint8_t>> +GOFFObjectFile::getSectionContents(DataRefImpl Sec) const { + if (SectionDataCache.count(Sec.d.a)) { + auto &Buf = SectionDataCache[Sec.d.a]; + return ArrayRef<uint8_t>(Buf); + } + uint64_t SectionSize = getSectionSize(Sec); + uint32_t DefEsdId = getSectionDefEsdId(Sec); + + const uint8_t *EdEsdRecord = getSectionEdEsdRecord(Sec); + bool FillBytePresent; + ESDRecord::getFillBytePresent(EdEsdRecord, FillBytePresent); + uint8_t FillByte = '\0'; + if (FillBytePresent) + ESDRecord::getFillByteValue(EdEsdRecord, FillByte); + + // Initialize section with fill byte. + SmallVector<uint8_t> Data(SectionSize, FillByte); + + // Replace section with content from text records. + for (const uint8_t *TxtRecordInt : TextPtrs) { + const uint8_t *TxtRecordPtr = TxtRecordInt; + uint32_t TxtEsdId; + TXTRecord::getElementEsdId(TxtRecordPtr, TxtEsdId); + LLVM_DEBUG(dbgs() << "Got txt EsdId: " << TxtEsdId << '\n'); + + if (TxtEsdId != DefEsdId) + continue; + + uint32_t TxtDataOffset; + TXTRecord::getOffset(TxtRecordPtr, TxtDataOffset); + + uint16_t TxtDataSize; + TXTRecord::getDataLength(TxtRecordPtr, TxtDataSize); + + LLVM_DEBUG(dbgs() << "Record offset " << TxtDataOffset << ", data size " + << TxtDataSize << "\n"); + + SmallString<256> CompleteData; + CompleteData.reserve(TxtDataSize); + if (Error Err = TXTRecord::getData(TxtRecordPtr, CompleteData)) + return std::move(Err); + assert(CompleteData.size() == TxtDataSize && "Wrong length of data"); + std::copy(CompleteData.data(), CompleteData.data() + TxtDataSize, + Data.begin() + TxtDataOffset); + } + SectionDataCache[Sec.d.a] = Data; + return ArrayRef<uint8_t>(Data); +} + +uint64_t GOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDAlignment Pow2Alignment; + ESDRecord::getAlignment(EsdRecord, Pow2Alignment); + return 1ULL << static_cast<uint64_t>(Pow2Alignment); +} + +bool GOFFObjectFile::isSectionText(DataRefImpl Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDExecutable Executable; + ESDRecord::getExecutable(EsdRecord, Executable); + return Executable == GOFF::ESD_EXE_CODE; +} + +bool GOFFObjectFile::isSectionData(DataRefImpl Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDExecutable Executable; + ESDRecord::getExecutable(EsdRecord, Executable); + return Executable == GOFF::ESD_EXE_DATA; +} + +bool GOFFObjectFile::isSectionNoLoad(DataRefImpl Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDLoadingBehavior LoadingBehavior; + ESDRecord::getLoadingBehavior(EsdRecord, LoadingBehavior); + return LoadingBehavior == GOFF::ESD_LB_NoLoad; +} + +bool GOFFObjectFile::isSectionReadOnlyData(DataRefImpl Sec) const { + if (!isSectionData(Sec)) + return false; + + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDLoadingBehavior LoadingBehavior; + ESDRecord::getLoadingBehavior(EsdRecord, LoadingBehavior); + return LoadingBehavior == GOFF::ESD_LB_Initial; +} + +bool GOFFObjectFile::isSectionZeroInit(DataRefImpl Sec) const { + // GOFF uses fill characters and fill characters are applied + // on getSectionContents() - so we say false to zero init. + return false; +} + section_iterator GOFFObjectFile::section_begin() const { DataRefImpl Sec; moveSectionNext(Sec); @@ -476,6 +636,13 @@ Error ESDRecord::getData(const uint8_t *Record, return getContinuousData(Record, DataSize, 72, CompleteData); } +Error TXTRecord::getData(const uint8_t *Record, + SmallString<256> &CompleteData) { + uint16_t Length; + getDataLength(Record, Length); + return getContinuousData(Record, Length, 24, CompleteData); +} + Error ENDRecord::getData(const uint8_t *Record, SmallString<256> &CompleteData) { uint16_t Length = getNameLength(Record); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 72e8b59..052b231 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -22,6 +22,7 @@ #include "AMDKernelCodeT.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "R600AsmPrinter.h" #include "SIMachineFunctionInfo.h" @@ -428,38 +429,43 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( return KernelCodeProperties; } -amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor( - const MachineFunction &MF, - const SIProgramInfo &PI) const { +MCKernelDescriptor +AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF, + const SIProgramInfo &PI) const { const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); const Function &F = MF.getFunction(); const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + MCContext &Ctx = MF.getContext(); - amdhsa::kernel_descriptor_t KernelDescriptor; - memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor)); + MCKernelDescriptor KernelDescriptor; assert(isUInt<32>(PI.ScratchSize)); assert(isUInt<32>(PI.getComputePGMRSrc1(STM))); assert(isUInt<32>(PI.getComputePGMRSrc2())); - KernelDescriptor.group_segment_fixed_size = PI.LDSSize; - KernelDescriptor.private_segment_fixed_size = PI.ScratchSize; + KernelDescriptor.group_segment_fixed_size = + MCConstantExpr::create(PI.LDSSize, Ctx); + KernelDescriptor.private_segment_fixed_size = + MCConstantExpr::create(PI.ScratchSize, Ctx); Align MaxKernArgAlign; - KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign); + KernelDescriptor.kernarg_size = MCConstantExpr::create( + STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx); - KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM); - KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(); - KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF); + KernelDescriptor.compute_pgm_rsrc1 = + MCConstantExpr::create(PI.getComputePGMRSrc1(STM), Ctx); + KernelDescriptor.compute_pgm_rsrc2 = + MCConstantExpr::create(PI.getComputePGMRSrc2(), Ctx); + KernelDescriptor.kernel_code_properties = + MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx); assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0); - if (STM.hasGFX90AInsts()) - KernelDescriptor.compute_pgm_rsrc3 = - CurrentProgramInfo.ComputePGMRSrc3GFX90A; + KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create( + STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0, Ctx); - if (AMDGPU::hasKernargPreload(STM)) - KernelDescriptor.kernarg_preload = - static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs()); + KernelDescriptor.kernarg_preload = MCConstantExpr::create( + AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0, + Ctx); return KernelDescriptor; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 79326cd..b8b2718 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -28,15 +28,12 @@ class MCCodeEmitter; class MCOperand; namespace AMDGPU { +struct MCKernelDescriptor; namespace HSAMD { class MetadataStreamer; } } // namespace AMDGPU -namespace amdhsa { -struct kernel_descriptor_t; -} - class AMDGPUAsmPrinter final : public AsmPrinter { private: unsigned CodeObjectVersion; @@ -75,9 +72,9 @@ private: uint16_t getAmdhsaKernelCodeProperties( const MachineFunction &MF) const; - amdhsa::kernel_descriptor_t getAmdhsaKernelDescriptor( - const MachineFunction &MF, - const SIProgramInfo &PI) const; + AMDGPU::MCKernelDescriptor + getAmdhsaKernelDescriptor(const MachineFunction &MF, + const SIProgramInfo &PI) const; void initTargetStreamer(Module &M); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 84b4ccc..5aa35bec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -657,6 +657,8 @@ bool AMDGPULibCalls::fold(CallInst *CI) { return true; IRBuilder<> B(CI); + if (CI->isStrictFP()) + B.setIsFPConstrained(true); if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) { // Under unsafe-math, evaluate calls if possible. diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 4648df1..294fc68 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -8,6 +8,7 @@ #include "AMDKernelCodeT.h" #include "MCTargetDesc/AMDGPUMCExpr.h" +#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" @@ -5417,7 +5418,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (getParser().parseIdentifier(KernelName)) return true; - kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); + AMDGPU::MCKernelDescriptor KD = + AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor( + &getSTI(), getContext()); StringSet<> Seen; @@ -5457,89 +5460,111 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { return TokError(".amdhsa_ directives cannot be repeated"); SMLoc ValStart = getLoc(); - int64_t IVal; - if (getParser().parseAbsoluteExpression(IVal)) + const MCExpr *ExprVal; + if (getParser().parseExpression(ExprVal)) return true; SMLoc ValEnd = getLoc(); SMRange ValRange = SMRange(ValStart, ValEnd); - if (IVal < 0) - return OutOfRangeError(ValRange); - + int64_t IVal = 0; uint64_t Val = IVal; + bool EvaluatableExpr; + if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) { + if (IVal < 0) + return OutOfRangeError(ValRange); + Val = IVal; + } #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ - if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ + if (!isUInt<ENTRY##_WIDTH>(Val)) \ return OutOfRangeError(RANGE); \ - AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); + AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \ + getContext()); + +// Some fields use the parsed value immediately which requires the expression to +// be solvable. +#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \ + if (!(RESOLVED)) \ + return Error(IDRange.Start, "directive should have resolvable expression", \ + IDRange); if (ID == ".amdhsa_group_segment_fixed_size") { - if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) + if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) * + CHAR_BIT>(Val)) return OutOfRangeError(ValRange); - KD.group_segment_fixed_size = Val; + KD.group_segment_fixed_size = ExprVal; } else if (ID == ".amdhsa_private_segment_fixed_size") { - if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) + if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) * + CHAR_BIT>(Val)) return OutOfRangeError(ValRange); - KD.private_segment_fixed_size = Val; + KD.private_segment_fixed_size = ExprVal; } else if (ID == ".amdhsa_kernarg_size") { - if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) + if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val)) return OutOfRangeError(ValRange); - KD.kernarg_size = Val; + KD.kernarg_size = ExprVal; } else if (ID == ".amdhsa_user_sgpr_count") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); ExplicitUserSGPRCount = Val; } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (hasArchitectedFlatScratch()) return Error(IDRange.Start, "directive is not supported with architected flat scratch", IDRange); PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, - Val, ValRange); + ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 4; } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (!hasKernargPreload()) return Error(IDRange.Start, "directive requires gfx90a+", IDRange); if (Val > getMaxNumUserSGPRs()) return OutOfRangeError(ValRange); - PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val, + PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal, ValRange); if (Val) { ImpliedUserSGPRCount += Val; PreloadLength = Val; } } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (!hasKernargPreload()) return Error(IDRange.Start, "directive requires gfx90a+", IDRange); if (Val >= 1024) return OutOfRangeError(ValRange); - PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val, + PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal, ValRange); if (Val) PreloadOffset = Val; } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, - Val, ValRange); + ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; @@ -5548,34 +5573,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { return Error(IDRange.Start, "directive is not supported with architected flat scratch", IDRange); + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, - ValRange); + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, + ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, - Val, ValRange); + ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 1; } else if (ID == ".amdhsa_wavefront_size32") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); EnableWavefrontSize32 = Val; PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, - Val, ValRange); + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal, + ValRange); } else if (ID == ".amdhsa_uses_dynamic_stack") { PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); + KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal, + ValRange); } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { if (hasArchitectedFlatScratch()) return Error(IDRange.Start, "directive is not supported with architected flat scratch", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); + COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, + ValRange); } else if (ID == ".amdhsa_enable_private_segment") { if (!hasArchitectedFlatScratch()) return Error( @@ -5583,42 +5613,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { "directive is not supported without architected flat scratch", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); + COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, + ValRange); } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal, ValRange); } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal, ValRange); } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal, ValRange); } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal, ValRange); } else if (ID == ".amdhsa_system_vgpr_workitem_id") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, + COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal, ValRange); } else if (ID == ".amdhsa_next_free_vgpr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); VGPRRange = ValRange; NextFreeVGPR = Val; } else if (ID == ".amdhsa_next_free_sgpr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); SGPRRange = ValRange; NextFreeSGPR = Val; } else if (ID == ".amdhsa_accum_offset") { if (!isGFX90A()) return Error(IDRange.Start, "directive requires gfx90a+", IDRange); + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); AccumOffset = Val; } else if (ID == ".amdhsa_reserve_vcc") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (!isUInt<1>(Val)) return OutOfRangeError(ValRange); ReserveVCC = Val; } else if (ID == ".amdhsa_reserve_flat_scratch") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (IVersion.Major < 7) return Error(IDRange.Start, "directive requires gfx7+", IDRange); if (hasArchitectedFlatScratch()) @@ -5638,97 +5674,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { IDRange); } else if (ID == ".amdhsa_float_round_mode_32") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal, + ValRange); } else if (ID == ".amdhsa_float_round_mode_16_64") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal, + ValRange); } else if (ID == ".amdhsa_float_denorm_mode_32") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal, + ValRange); } else if (ID == ".amdhsa_float_denorm_mode_16_64") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, ValRange); } else if (ID == ".amdhsa_dx10_clamp") { if (IVersion.Major >= 12) return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val, + COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, ValRange); } else if (ID == ".amdhsa_ieee_mode") { if (IVersion.Major >= 12) return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val, + COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal, ValRange); } else if (ID == ".amdhsa_fp16_overflow") { if (IVersion.Major < 9) return Error(IDRange.Start, "directive requires gfx9+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val, + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal, ValRange); } else if (ID == ".amdhsa_tg_split") { if (!isGFX90A()) return Error(IDRange.Start, "directive requires gfx90a+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, - ValRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, + ExprVal, ValRange); } else if (ID == ".amdhsa_workgroup_processor_mode") { if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val, + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal, ValRange); } else if (ID == ".amdhsa_memory_ordered") { if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val, + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal, ValRange); } else if (ID == ".amdhsa_forward_progress") { if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val, + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal, ValRange); } else if (ID == ".amdhsa_shared_vgpr_count") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (IVersion.Major < 10 || IVersion.Major >= 12) return Error(IDRange.Start, "directive requires gfx10 or gfx11", IDRange); SharedVGPRCount = Val; PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, - COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val, + COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, - ValRange); + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_denorm_src") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, - ValRange); + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_int_div_zero") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_round_robin_scheduling") { if (IVersion.Major < 12) return Error(IDRange.Start, "directive requires gfx12+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val, + COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal, ValRange); } else { return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); @@ -5755,15 +5799,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( VGPRBlocks)) return OutOfRangeError(VGPRRange); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); + AMDGPU::MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()), + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT, + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext()); if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( SGPRBlocks)) return OutOfRangeError(SGPRRange); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, - SGPRBlocks); + AMDGPU::MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()), + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT, + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext()); if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) return TokError("amdgpu_user_sgpr_count smaller than than implied by " @@ -5774,11 +5821,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) return TokError("too many user SGPRs enabled"); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, - UserSGPRCount); - - if (PreloadLength && KD.kernarg_size && - (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size)) + AMDGPU::MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()), + COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, + COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext()); + + int64_t IVal = 0; + if (!KD.kernarg_size->evaluateAsAbsolute(IVal)) + return TokError("Kernarg size should be resolvable"); + uint64_t kernarg_size = IVal; + if (PreloadLength && kernarg_size && + (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size)) return TokError("Kernarg preload length + offset is larger than the " "kernarg segment size"); @@ -5790,8 +5843,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { "increments of 4"); if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) return TokError("accum_offset exceeds total VGPR allocation"); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, - (AccumOffset / 4 - 1)); + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc3, + MCConstantExpr::create(AccumOffset / 4 - 1, getContext()), + COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, + COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext()); } if (IVersion.Major >= 10 && IVersion.Major < 12) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp new file mode 100644 index 0000000..77e7e30 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp @@ -0,0 +1,98 @@ +//===--- AMDHSAKernelDescriptor.h -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMCKernelDescriptor.h" +#include "AMDGPUMCTargetDesc.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/TargetParser/TargetParser.h" + +using namespace llvm; +using namespace llvm::AMDGPU; + +MCKernelDescriptor +MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, + MCContext &Ctx) { + IsaVersion Version = getIsaVersion(STI->getCPU()); + + MCKernelDescriptor KD; + const MCExpr *ZeroMCExpr = MCConstantExpr::create(0, Ctx); + const MCExpr *OneMCExpr = MCConstantExpr::create(1, Ctx); + + KD.group_segment_fixed_size = ZeroMCExpr; + KD.private_segment_fixed_size = ZeroMCExpr; + KD.compute_pgm_rsrc1 = ZeroMCExpr; + KD.compute_pgm_rsrc2 = ZeroMCExpr; + KD.compute_pgm_rsrc3 = ZeroMCExpr; + KD.kernarg_size = ZeroMCExpr; + KD.kernel_code_properties = ZeroMCExpr; + KD.kernarg_preload = ZeroMCExpr; + + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, + MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx), + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx); + if (Version.Major < 12) { + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Ctx); + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Ctx); + } + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc2, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Ctx); + if (Version.Major >= 10) { + if (STI->getFeatureBits().test(FeatureWavefrontSize32)) + MCKernelDescriptor::bits_set( + KD.kernel_code_properties, OneMCExpr, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Ctx); + if (!STI->getFeatureBits().test(FeatureCuMode)) + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Ctx); + + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Ctx); + } + if (AMDGPU::isGFX90A(*STI) && STI->getFeatureBits().test(FeatureTgSplit)) + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc3, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx); + return KD; +} + +void MCKernelDescriptor::bits_set(const MCExpr *&Dst, const MCExpr *Value, + uint32_t Shift, uint32_t Mask, + MCContext &Ctx) { + auto Sft = MCConstantExpr::create(Shift, Ctx); + auto Msk = MCConstantExpr::create(Mask, Ctx); + Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx); + Dst = MCBinaryExpr::createOr(Dst, MCBinaryExpr::createShl(Value, Sft, Ctx), + Ctx); +} + +const MCExpr *MCKernelDescriptor::bits_get(const MCExpr *Src, uint32_t Shift, + uint32_t Mask, MCContext &Ctx) { + auto Sft = MCConstantExpr::create(Shift, Ctx); + auto Msk = MCConstantExpr::create(Mask, Ctx); + return MCBinaryExpr::createLShr(MCBinaryExpr::createAnd(Src, Msk, Ctx), Sft, + Ctx); +} diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h new file mode 100644 index 0000000..26958ac --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h @@ -0,0 +1,54 @@ +//===--- AMDGPUMCKernelDescriptor.h ---------------------------*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// AMDHSA kernel descriptor MCExpr struct for use in MC layer. Uses +/// AMDHSAKernelDescriptor.h for sizes and constants. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H + +#include "llvm/Support/AMDHSAKernelDescriptor.h" + +namespace llvm { +class MCExpr; +class MCContext; +class MCSubtargetInfo; +namespace AMDGPU { + +struct MCKernelDescriptor { + const MCExpr *group_segment_fixed_size = nullptr; + const MCExpr *private_segment_fixed_size = nullptr; + const MCExpr *kernarg_size = nullptr; + const MCExpr *compute_pgm_rsrc3 = nullptr; + const MCExpr *compute_pgm_rsrc1 = nullptr; + const MCExpr *compute_pgm_rsrc2 = nullptr; + const MCExpr *kernel_code_properties = nullptr; + const MCExpr *kernarg_preload = nullptr; + + static MCKernelDescriptor + getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx); + // MCExpr for: + // Dst = Dst & ~Mask + // Dst = Dst | (Value << Shift) + static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, + uint32_t Mask, MCContext &Ctx); + + // MCExpr for: + // return (Src & Mask) >> Shift + static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift, + uint32_t Mask, MCContext &Ctx); +}; + +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 4742b0b..3006fcd 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetStreamer.h" +#include "AMDGPUMCKernelDescriptor.h" #include "AMDGPUPTNote.h" #include "AMDKernelCodeT.h" #include "Utils/AMDGPUBaseInfo.h" @@ -307,94 +308,142 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, + const MCKernelDescriptor &KD, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) { IsaVersion IVersion = getIsaVersion(STI.getCPU()); + const MCAsmInfo *MAI = getContext().getAsmInfo(); OS << "\t.amdhsa_kernel " << KernelName << '\n'; -#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \ - STREAM << "\t\t" << DIRECTIVE << " " \ - << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n'; - - OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size - << '\n'; - OS << "\t\t.amdhsa_private_segment_fixed_size " - << KD.private_segment_fixed_size << '\n'; - OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n'; - - PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT); + auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask, + StringRef Directive) { + int64_t IVal; + OS << "\t\t" << Directive << ' '; + const MCExpr *pgm_rsrc1_bits = + MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext()); + if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal)) + OS << static_cast<uint64_t>(IVal); + else + pgm_rsrc1_bits->print(OS, MAI); + OS << '\n'; + }; + + OS << "\t\t.amdhsa_group_segment_fixed_size "; + KD.group_segment_fixed_size->print(OS, MAI); + OS << '\n'; + + OS << "\t\t.amdhsa_private_segment_fixed_size "; + KD.private_segment_fixed_size->print(OS, MAI); + OS << '\n'; + + OS << "\t\t.amdhsa_kernarg_size "; + KD.kernarg_size->print(OS, MAI); + OS << '\n'; + + PrintField( + KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count"); if (!hasArchitectedFlatScratch(STI)) - PRINT_FIELD( - OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); + PrintField( + KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, + ".amdhsa_user_sgpr_private_segment_buffer"); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, + ".amdhsa_user_sgpr_dispatch_ptr"); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, + ".amdhsa_user_sgpr_queue_ptr"); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, + ".amdhsa_user_sgpr_kernarg_segment_ptr"); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, + ".amdhsa_user_sgpr_dispatch_id"); if (!hasArchitectedFlatScratch(STI)) - PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, + ".amdhsa_user_sgpr_flat_scratch_init"); if (hasKernargPreload(STI)) { - PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD, - kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD, - kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET); + PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT, + amdhsa::KERNARG_PRELOAD_SPEC_LENGTH, + ".amdhsa_user_sgpr_kernarg_preload_length"); + PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT, + amdhsa::KERNARG_PRELOAD_SPEC_OFFSET, + ".amdhsa_user_sgpr_kernarg_preload_offset"); } - PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + PrintField( + KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, + ".amdhsa_user_sgpr_private_segment_size"); if (IVersion.Major >= 10) - PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, + ".amdhsa_wavefront_size32"); if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) - PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); - PRINT_FIELD(OS, - (hasArchitectedFlatScratch(STI) - ? ".amdhsa_enable_private_segment" - : ".amdhsa_system_sgpr_private_segment_wavefront_offset"), - KD, compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); - PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); - PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); - PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); - PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); - PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, + ".amdhsa_uses_dynamic_stack"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, + (hasArchitectedFlatScratch(STI) + ? ".amdhsa_enable_private_segment" + : ".amdhsa_system_sgpr_private_segment_wavefront_offset")); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, + ".amdhsa_system_sgpr_workgroup_id_x"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, + ".amdhsa_system_sgpr_workgroup_id_y"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, + ".amdhsa_system_sgpr_workgroup_id_z"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, + ".amdhsa_system_sgpr_workgroup_info"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, + ".amdhsa_system_vgpr_workitem_id"); // These directives are required. OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n'; OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n'; - if (AMDGPU::isGFX90A(STI)) - OS << "\t\t.amdhsa_accum_offset " << - (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4 - << '\n'; + if (AMDGPU::isGFX90A(STI)) { + // MCExpr equivalent of taking the (accum_offset + 1) * 4. + const MCExpr *accum_bits = MCKernelDescriptor::bits_get( + KD.compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext()); + accum_bits = MCBinaryExpr::createAdd( + accum_bits, MCConstantExpr::create(1, getContext()), getContext()); + accum_bits = MCBinaryExpr::createMul( + accum_bits, MCConstantExpr::create(4, getContext()), getContext()); + OS << "\t\t.amdhsa_accum_offset "; + int64_t IVal; + if (accum_bits->evaluateAsAbsolute(IVal)) { + OS << static_cast<uint64_t>(IVal); + } else { + accum_bits->print(OS, MAI); + } + OS << '\n'; + } if (!ReserveVCC) OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; @@ -411,74 +460,105 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( break; } - PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); - PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); - PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); - PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, + ".amdhsa_float_round_mode_32"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, + ".amdhsa_float_round_mode_16_64"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, + ".amdhsa_float_denorm_mode_32"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, + ".amdhsa_float_denorm_mode_16_64"); if (IVersion.Major < 12) { - PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP); - PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, + ".amdhsa_dx10_clamp"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, + ".amdhsa_ieee_mode"); + } + if (IVersion.Major >= 9) { + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, + ".amdhsa_fp16_overflow"); } - if (IVersion.Major >= 9) - PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL); if (AMDGPU::isGFX90A(STI)) - PRINT_FIELD(OS, ".amdhsa_tg_split", KD, - compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT); + PrintField(KD.compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split"); if (IVersion.Major >= 10) { - PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE); - PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED); - PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, + ".amdhsa_workgroup_processor_mode"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, + ".amdhsa_memory_ordered"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, + ".amdhsa_forward_progress"); } if (IVersion.Major >= 10 && IVersion.Major < 12) { - PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); + PrintField(KD.compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, + ".amdhsa_shared_vgpr_count"); } - if (IVersion.Major >= 12) - PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN); - PRINT_FIELD( - OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); - PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); - PRINT_FIELD( - OS, ".amdhsa_exception_fp_ieee_div_zero", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); - PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); - PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); - PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); - PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); -#undef PRINT_FIELD + if (IVersion.Major >= 12) { + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, + ".amdhsa_round_robin_scheduling"); + } + PrintField( + KD.compute_pgm_rsrc2, + amdhsa:: + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, + ".amdhsa_exception_fp_ieee_invalid_op"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, + ".amdhsa_exception_fp_denorm_src"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa:: + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, + ".amdhsa_exception_fp_ieee_div_zero"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, + ".amdhsa_exception_fp_ieee_overflow"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, + ".amdhsa_exception_fp_ieee_underflow"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, + ".amdhsa_exception_fp_ieee_inexact"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, + ".amdhsa_exception_int_div_zero"); OS << "\t.end_amdhsa_kernel\n"; } @@ -835,7 +915,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, + const MCKernelDescriptor &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) { auto &Streamer = getStreamer(); auto &Context = Streamer.getContext(); @@ -853,7 +933,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( // Kernel descriptor symbol's type and size are fixed. KernelDescriptorSymbol->setType(ELF::STT_OBJECT); KernelDescriptorSymbol->setSize( - MCConstantExpr::create(sizeof(KernelDescriptor), Context)); + MCConstantExpr::create(sizeof(amdhsa::kernel_descriptor_t), Context)); // The visibility of the kernel code symbol must be protected or less to allow // static relocations from the kernel descriptor to be used. @@ -861,31 +941,43 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED); Streamer.emitLabel(KernelDescriptorSymbol); - Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size); - Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size); - Streamer.emitInt32(KernelDescriptor.kernarg_size); - - for (uint8_t Res : KernelDescriptor.reserved0) - Streamer.emitInt8(Res); + Streamer.emitValue( + KernelDescriptor.group_segment_fixed_size, + sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size)); + Streamer.emitValue( + KernelDescriptor.private_segment_fixed_size, + sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size)); + Streamer.emitValue(KernelDescriptor.kernarg_size, + sizeof(amdhsa::kernel_descriptor_t::kernarg_size)); + + for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i) + Streamer.emitInt8(0u); // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The // expression being created is: // (start of kernel code) - (start of kernel descriptor) // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. - Streamer.emitValue(MCBinaryExpr::createSub( - MCSymbolRefExpr::create( - KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context), - MCSymbolRefExpr::create( - KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context), - Context), - sizeof(KernelDescriptor.kernel_code_entry_byte_offset)); - for (uint8_t Res : KernelDescriptor.reserved1) - Streamer.emitInt8(Res); - Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3); - Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1); - Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2); - Streamer.emitInt16(KernelDescriptor.kernel_code_properties); - Streamer.emitInt16(KernelDescriptor.kernarg_preload); - for (uint8_t Res : KernelDescriptor.reserved3) - Streamer.emitInt8(Res); + Streamer.emitValue( + MCBinaryExpr::createSub( + MCSymbolRefExpr::create(KernelCodeSymbol, + MCSymbolRefExpr::VK_AMDGPU_REL64, Context), + MCSymbolRefExpr::create(KernelDescriptorSymbol, + MCSymbolRefExpr::VK_None, Context), + Context), + sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset)); + for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i) + Streamer.emitInt8(0u); + Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3, + sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3)); + Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1, + sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1)); + Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2, + sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2)); + Streamer.emitValue( + KernelDescriptor.kernel_code_properties, + sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties)); + Streamer.emitValue(KernelDescriptor.kernarg_preload, + sizeof(amdhsa::kernel_descriptor_t::kernarg_preload)); + for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i) + Streamer.emitInt8(0u); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 5aa80ff..706897a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -22,15 +22,13 @@ class MCSymbol; class formatted_raw_ostream; namespace AMDGPU { + +struct MCKernelDescriptor; namespace HSAMD { struct Metadata; } } // namespace AMDGPU -namespace amdhsa { -struct kernel_descriptor_t; -} - class AMDGPUTargetStreamer : public MCTargetStreamer { AMDGPUPALMetadata PALMetadata; @@ -94,10 +92,11 @@ public: return true; } - virtual void EmitAmdhsaKernelDescriptor( - const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {} + virtual void + EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, + const AMDGPU::MCKernelDescriptor &KernelDescriptor, + uint64_t NextVGPR, uint64_t NextSGPR, + bool ReserveVCC, bool ReserveFlatScr) {} static StringRef getArchNameFromElfMach(unsigned ElfMach); static unsigned getElfMach(StringRef GPU); @@ -150,10 +149,11 @@ public: bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override; - void EmitAmdhsaKernelDescriptor( - const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; + void + EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, + const AMDGPU::MCKernelDescriptor &KernelDescriptor, + uint64_t NextVGPR, uint64_t NextSGPR, + bool ReserveVCC, bool ReserveFlatScr) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { @@ -205,10 +205,11 @@ public: bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override; - void EmitAmdhsaKernelDescriptor( - const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; + void + EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, + const AMDGPU::MCKernelDescriptor &KernelDescriptor, + uint64_t NextVGPR, uint64_t NextSGPR, + bool ReserveVCC, bool ReserveFlatScr) override; }; } #endif diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt index 0842a58..14a02b6 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMAMDGPUDesc AMDGPUMCExpr.cpp AMDGPUMCTargetDesc.cpp AMDGPUTargetStreamer.cpp + AMDGPUMCKernelDescriptor.cpp R600InstPrinter.cpp R600MCCodeEmitter.cpp R600MCTargetDesc.cpp diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 7bb84d7..5d44396 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1221,47 +1221,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, } } -amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( - const MCSubtargetInfo *STI) { - IsaVersion Version = getIsaVersion(STI->getCPU()); - - amdhsa::kernel_descriptor_t KD; - memset(&KD, 0, sizeof(KD)); - - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, - amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); - if (Version.Major >= 12) { - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0); - } else { - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1); - } - AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); - if (Version.Major >= 10) { - AMDHSA_BITS_SET(KD.kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, - STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, - STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1); - } - if (AMDGPU::isGFX90A(*STI)) { - AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, - STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0); - } - return KD; -} - bool isGroupSegment(const GlobalValue *GV) { return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index f4f9a78..943588f 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -34,10 +34,6 @@ class StringRef; class Triple; class raw_ostream; -namespace amdhsa { -struct kernel_descriptor_t; -} - namespace AMDGPU { struct IsaVersion; @@ -855,9 +851,6 @@ unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI); -amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( - const MCSubtargetInfo *STI); - bool isGroupSegment(const GlobalValue *GV); bool isGlobalSegment(const GlobalValue *GV); bool isReadOnlySegment(const GlobalValue *GV); diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp index 5c113cc..e8d2cba 100644 --- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp +++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp @@ -958,6 +958,7 @@ bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB, unsigned NotImm = ~Imm & 0xffff; if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) { + MRI->clearKillFlags(LastVPTReg); Instr.getOperand(PIdx + 1).setReg(LastVPTReg); if (MRI->use_empty(VPR)) { DeadInstructions.insert(Copy); diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 0f4ece64..047c673 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -612,11 +612,11 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { static void findTemporariesForLR(const BitVector &GPRsNoLRSP, const BitVector &PopFriendly, - const LivePhysRegs &UsedRegs, unsigned &PopReg, + const LiveRegUnits &UsedRegs, unsigned &PopReg, unsigned &TmpReg, MachineRegisterInfo &MRI) { PopReg = TmpReg = 0; for (auto Reg : GPRsNoLRSP.set_bits()) { - if (UsedRegs.available(MRI, Reg)) { + if (UsedRegs.available(Reg)) { // Remember the first pop-friendly register and exit. if (PopFriendly.test(Reg)) { PopReg = Reg; @@ -684,7 +684,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Look for a temporary register to use. // First, compute the liveness information. const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - LivePhysRegs UsedRegs(TRI); + LiveRegUnits UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function @@ -710,11 +710,6 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, unsigned TemporaryReg = 0; BitVector PopFriendly = TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); - // R7 may be used as a frame pointer, hence marked as not generally - // allocatable, however there's no reason to not use it as a temporary for - // restoring LR. - if (STI.getFramePointerReg() == ARM::R7) - PopFriendly.set(ARM::R7); assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td index 11b716f..ad06f47 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -26,6 +26,19 @@ def CSR_ILP32D_LP64D : CalleeSavedRegs<(add CSR_ILP32_LP64, F8_D, F9_D, (sequence "F%u_D", 18, 27))>; +defvar CSR_V = (add (sequence "V%u", 1, 7), (sequence "V%u", 24, 31), + V2M2, V4M2, V6M2, V24M2, V26M2, V28M2, V30M2, + V4M4, V24M4, V28M4, V24M8); + +def CSR_ILP32_LP64_V + : CalleeSavedRegs<(add CSR_ILP32_LP64, CSR_V)>; + +def CSR_ILP32F_LP64F_V + : CalleeSavedRegs<(add CSR_ILP32F_LP64F, CSR_V)>; + +def CSR_ILP32D_LP64D_V + : CalleeSavedRegs<(add CSR_ILP32D_LP64D, CSR_V)>; + // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 39f2b3f..39075c81 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -388,6 +388,21 @@ getUnmanagedCSI(const MachineFunction &MF, return NonLibcallCSI; } +static SmallVector<CalleeSavedInfo, 8> +getRVVCalleeSavedInfo(const MachineFunction &MF, + const std::vector<CalleeSavedInfo> &CSI) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + SmallVector<CalleeSavedInfo, 8> RVVCSI; + + for (auto &CS : CSI) { + int FI = CS.getFrameIdx(); + if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) + RVVCSI.push_back(CS); + } + + return RVVCSI; +} + void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -590,6 +605,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // directives. for (const auto &Entry : CSI) { int FrameIdx = Entry.getFrameIdx(); + if (FrameIdx >= 0 && + MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) + continue; + int64_t Offset = MFI.getObjectOffset(FrameIdx); Register Reg = Entry.getReg(); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( @@ -726,7 +745,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo()); - // Skip to before the restores of callee-saved registers + // Skip to before the restores of scalar callee-saved registers // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. auto LastFrameDestroy = MBBI; @@ -1029,15 +1048,24 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { MachineFrameInfo &MFI = MF.getFrameInfo(); // Create a buffer of RVV objects to allocate. SmallVector<int, 8> ObjectsToAllocate; - for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { - unsigned StackID = MFI.getStackID(I); - if (StackID != TargetStackID::ScalableVector) - continue; - if (MFI.isDeadObjectIndex(I)) - continue; + auto pushRVVObjects = [&](int FIBegin, int FIEnd) { + for (int I = FIBegin, E = FIEnd; I != E; ++I) { + unsigned StackID = MFI.getStackID(I); + if (StackID != TargetStackID::ScalableVector) + continue; + if (MFI.isDeadObjectIndex(I)) + continue; - ObjectsToAllocate.push_back(I); - } + ObjectsToAllocate.push_back(I); + } + }; + // First push RVV Callee Saved object, then push RVV stack object + std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo(); + const auto &RVVCSI = getRVVCalleeSavedInfo(MF, CSI); + if (!RVVCSI.empty()) + pushRVVObjects(RVVCSI[0].getFrameIdx(), + RVVCSI[RVVCSI.size() - 1].getFrameIdx() + 1); + pushRVVObjects(0, MFI.getObjectIndexEnd() - RVVCSI.size()); // The minimum alignment is 16 bytes. Align RVVStackAlign(16); @@ -1487,13 +1515,19 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( // Manually spill values not spilled by libcall & Push/Pop. const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI); - for (auto &CS : UnmanagedCSI) { - // Insert the spill to the stack frame. - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), - RC, TRI, Register()); - } + const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI); + + auto storeRegToStackSlot = [&](decltype(UnmanagedCSI) CSInfo) { + for (auto &CS : CSInfo) { + // Insert the spill to the stack frame. + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), + CS.getFrameIdx(), RC, TRI, Register()); + } + }; + storeRegToStackSlot(UnmanagedCSI); + storeRegToStackSlot(RVVCSI); return true; } @@ -1511,19 +1545,26 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters( DL = MI->getDebugLoc(); // Manually restore values not restored by libcall & Push/Pop. - // Keep the same order as in the prologue. There is no need to reverse the - // order in the epilogue. In addition, the return address will be restored - // first in the epilogue. It increases the opportunity to avoid the - // load-to-use data hazard between loading RA and return by RA. - // loadRegFromStackSlot can insert multiple instructions. + // Reverse the restore order in epilog. In addition, the return + // address will be restored first in the epilogue. It increases + // the opportunity to avoid the load-to-use data hazard between + // loading RA and return by RA. loadRegFromStackSlot can insert + // multiple instructions. const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI); - for (auto &CS : UnmanagedCSI) { - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, - Register()); - assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); - } + const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI); + + auto loadRegFromStackSlot = [&](decltype(UnmanagedCSI) CSInfo) { + for (auto &CS : CSInfo) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, + Register()); + assert(MI != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); + } + }; + loadRegFromStackSlot(RVVCSI); + loadRegFromStackSlot(UnmanagedCSI); RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); if (RVFI->isPushable(*MF)) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ca78648c..564fda6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -18724,6 +18724,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( case CallingConv::Fast: case CallingConv::SPIR_KERNEL: case CallingConv::GRAAL: + case CallingConv::RISCV_VectorCall: break; case CallingConv::GHC: if (Subtarget.isRVE()) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 74d6532..11c3f2d 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -71,6 +71,9 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { : CSR_Interrupt_SaveList; } + bool HasVectorCSR = + MF->getFunction().getCallingConv() == CallingConv::RISCV_VectorCall; + switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); @@ -79,12 +82,18 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_ILP32E_LP64E_SaveList; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: + if (HasVectorCSR) + return CSR_ILP32_LP64_V_SaveList; return CSR_ILP32_LP64_SaveList; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: + if (HasVectorCSR) + return CSR_ILP32F_LP64F_V_SaveList; return CSR_ILP32F_LP64F_SaveList; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: + if (HasVectorCSR) + return CSR_ILP32D_LP64D_V_SaveList; return CSR_ILP32D_LP64D_SaveList; } } @@ -665,12 +674,18 @@ RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF, return CSR_ILP32E_LP64E_RegMask; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: + if (CC == CallingConv::RISCV_VectorCall) + return CSR_ILP32_LP64_V_RegMask; return CSR_ILP32_LP64_RegMask; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: + if (CC == CallingConv::RISCV_VectorCall) + return CSR_ILP32F_LP64F_V_RegMask; return CSR_ILP32F_LP64F_RegMask; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: + if (CC == CallingConv::RISCV_VectorCall) + return CSR_ILP32D_LP64D_V_RegMask; return CSR_ILP32D_LP64D_RegMask; } } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 000d01b..38cdf3c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -909,23 +909,33 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (!IsTypeLegal) return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst); + int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - // FIXME: Need to consider vsetvli and lmul. int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) - (int)Log2_32(Src->getScalarSizeInBits()); switch (ISD) { case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - if (Src->getScalarSizeInBits() == 1) { + case ISD::ZERO_EXTEND: { + const unsigned SrcEltSize = Src->getScalarSizeInBits(); + if (SrcEltSize == 1) { // We do not use vsext/vzext to extend from mask vector. // Instead we use the following instructions to extend from mask vector: // vmv.v.i v8, 0 // vmerge.vim v8, v8, -1, v0 - return 2; + return getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM}, + DstLT.second, CostKind); } - return 1; + if ((PowDiff < 1) || (PowDiff > 3)) + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8}; + unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8}; + unsigned Op = + (ISD == ISD::SIGN_EXTEND) ? SExtOp[PowDiff - 1] : ZExtOp[PowDiff - 1]; + return getRISCVInstructionCost(Op, DstLT.second, CostKind); + } case ISD::TRUNCATE: if (Dst->getScalarSizeInBits() == 1) { // We do not use several vncvt to truncate to mask vector. So we could diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9bad38f..9d98d31 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43995,6 +43995,50 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, Extract->getOperand(1)); } +// If this extract is from a loaded vector value and will be used as an +// integer, that requires a potentially expensive XMM -> GPR transfer. +// Additionally, if we can convert to a scalar integer load, that will likely +// be folded into a subsequent integer op. +// Note: SrcVec might not have a VecVT type, but it must be the same size. +// Note: Unlike the related fold for this in DAGCombiner, this is not limited +// to a single-use of the loaded vector. For the reasons above, we +// expect this to be profitable even if it creates an extra load. +static SDValue +combineExtractFromVectorLoad(SDNode *N, EVT VecVT, SDValue SrcVec, uint64_t Idx, + const SDLoc &dl, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + "Only EXTRACT_VECTOR_ELT supported so far"); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = N->getValueType(0); + + bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) { + return Use->getOpcode() == ISD::STORE || + Use->getOpcode() == ISD::INSERT_VECTOR_ELT || + Use->getOpcode() == ISD::SCALAR_TO_VECTOR; + }); + + auto *LoadVec = dyn_cast<LoadSDNode>(SrcVec); + if (LoadVec && ISD::isNormalLoad(LoadVec) && VT.isInteger() && + VecVT.getVectorElementType() == VT && + VecVT.getSizeInBits() == SrcVec.getValueSizeInBits() && + DCI.isAfterLegalizeDAG() && !LikelyUsedAsVector && LoadVec->isSimple()) { + SDValue NewPtr = TLI.getVectorElementPointer( + DAG, LoadVec->getBasePtr(), VecVT, DAG.getVectorIdxConstant(Idx, dl)); + unsigned PtrOff = VT.getSizeInBits() * Idx / 8; + MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff); + Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff); + SDValue Load = + DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment, + LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo()); + DAG.makeEquivalentMemoryOrdering(LoadVec, Load); + return Load; + } + + return SDValue(); +} + // Attempt to peek through a target shuffle and extract the scalar from the // source. static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, @@ -44191,6 +44235,11 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, if (SDValue V = GetLegalExtract(SrcOp, ExtractVT, ExtractIdx)) return DAG.getZExtOrTrunc(V, dl, VT); + if (N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && ExtractVT == SrcVT) + if (SDValue V = combineExtractFromVectorLoad( + N, SrcVT, peekThroughBitcasts(SrcOp), ExtractIdx, dl, DAG, DCI)) + return V; + return SDValue(); } @@ -44600,6 +44649,12 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget)) return V; + if (CIdx) + if (SDValue V = combineExtractFromVectorLoad( + N, InputVector.getValueType(), InputVector, CIdx->getZExtValue(), + dl, DAG, DCI)) + return V; + // Attempt to extract a i1 element by using MOVMSK to extract the signbits // and then testing the relevant element. // @@ -44645,34 +44700,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, } } - // If this extract is from a loaded vector value and will be used as an - // integer, that requires a potentially expensive XMM -> GPR transfer. - // Additionally, if we can convert to a scalar integer load, that will likely - // be folded into a subsequent integer op. - // Note: Unlike the related fold for this in DAGCombiner, this is not limited - // to a single-use of the loaded vector. For the reasons above, we - // expect this to be profitable even if it creates an extra load. - bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) { - return Use->getOpcode() == ISD::STORE || - Use->getOpcode() == ISD::INSERT_VECTOR_ELT || - Use->getOpcode() == ISD::SCALAR_TO_VECTOR; - }); - auto *LoadVec = dyn_cast<LoadSDNode>(InputVector); - if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() && - SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() && - !LikelyUsedAsVector && LoadVec->isSimple()) { - SDValue NewPtr = - TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx); - unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8; - MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff); - Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff); - SDValue Load = - DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment, - LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo()); - DAG.makeEquivalentMemoryOrdering(LoadVec, Load); - return Load; - } - return SDValue(); } @@ -48273,7 +48300,7 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG, // We do not split for SSE at all, but we need to split vectors for AVX1 and // AVX2. - if (!Subtarget.useAVX512Regs() && VT.is512BitVector() && + if (!Subtarget.useAVX512Regs() && VT.is512BitVector() && TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) { SDValue LoX, HiX; std::tie(LoX, HiX) = splitVector(X, DAG, DL); diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp index e36832f..7109946 100644 --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -186,11 +186,6 @@ void AArch64::ExtensionSet::enable(ArchExtKind E) { // Special cases for dependencies which vary depending on the base // architecture version. if (BaseArch) { - // +sve implies +f32mm if the base architecture is v8.6A+ or v9.1A+ - // It isn't the case in general that sve implies both f64mm and f32mm - if (E == AEK_SVE && BaseArch->is_superset(ARMV8_6A)) - enable(AEK_F32MM); - // +fp16 implies +fp16fml for v8.4A+, but not v9.0-A+ if (E == AEK_FP16 && BaseArch->is_superset(ARMV8_4A) && !BaseArch->is_superset(ARMV9A)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index af238a4..8c698e5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -611,6 +611,18 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) { Y->getType() == Z->getType()) return createPowiExpr(I, *this, X, Y, Z); + // powi(X, Y) / X --> powi(X, Y-1) + // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan + // are required. + // TODO: Multi-use may be also better off creating Powi(x,y-1) + if (I.hasAllowReassoc() && I.hasNoNaNs() && + match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>( + m_Specific(Op1), m_Value(Y))))) && + willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) { + Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType()); + return createPowiExpr(I, *this, Op1, Y, NegOne); + } + return nullptr; } @@ -1904,20 +1916,8 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { return replaceInstUsesWith(I, Pow); } - // powi(X, Y) / X --> powi(X, Y-1) - // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan - // are required. - // TODO: Multi-use may be also better off creating Powi(x,y-1) - if (I.hasAllowReassoc() && I.hasNoNaNs() && - match(Op0, m_OneUse(m_Intrinsic<Intrinsic::powi>(m_Specific(Op1), - m_Value(Y)))) && - willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) { - Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType()); - Value *Y1 = Builder.CreateAdd(Y, NegOne); - Type *Types[] = {Op1->getType(), Y1->getType()}; - Value *Pow = Builder.CreateIntrinsic(Intrinsic::powi, Types, {Op1, Y1}, &I); - return replaceInstUsesWith(I, Pow); - } + if (Instruction *FoldedPowi = foldPowiReassoc(I)) + return FoldedPowi; return nullptr; } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fbf1cb6..e1f26b9 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11926,7 +11926,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Builder.SetCurrentDebugLocation(PH->getDebugLoc()); Value *Vec = vectorizeOperand(E, I, /*PostponedPHIs=*/true); if (VecTy != Vec->getType()) { - assert((getOperandEntry(E, I)->State == TreeEntry::NeedToGather || + assert((It != MinBWs.end() || + getOperandEntry(E, I)->State == TreeEntry::NeedToGather || MinBWs.contains(getOperandEntry(E, I))) && "Expected item in MinBWs."); Vec = Builder.CreateIntCast(Vec, VecTy, GetOperandSignedness(I)); diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll index bd26c19..14da9a3 100644 --- a/llvm/test/Analysis/CostModel/RISCV/cast.ll +++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll @@ -16,74 +16,74 @@ define void @sext() { ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = sext <2 x i1> undef to <2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = sext <4 x i8> undef to <4 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = sext <4 x i8> undef to <4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i32 = sext <4 x i16> undef to <4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i64 = sext <4 x i16> undef to <4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4i64 = sext <4 x i32> undef to <4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i64 = sext <4 x i16> undef to <4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i64 = sext <4 x i32> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i8 = sext <4 x i1> undef to <4 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = sext <4 x i1> undef to <4 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = sext <4 x i1> undef to <4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = sext <8 x i8> undef to <8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i32 = sext <8 x i16> undef to <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i64 = sext <8 x i16> undef to <8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i64 = sext <8 x i32> undef to <8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i32 = sext <8 x i16> undef to <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_v8i64 = sext <8 x i16> undef to <8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i64 = sext <8 x i32> undef to <8 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i8 = sext <8 x i1> undef to <8 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = sext <8 x i1> undef to <8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i32 = sext <16 x i16> undef to <16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i64 = sext <16 x i16> undef to <16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16i64 = sext <16 x i32> undef to <16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16i32 = sext <16 x i16> undef to <16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_v16i64 = sext <16 x i16> undef to <16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16i64 = sext <16 x i32> undef to <16 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i8 = sext <16 x i1> undef to <16 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i32 = sext <32 x i16> undef to <32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_v32i64 = sext <32 x i16> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i64 = sext <32 x i32> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i8 = sext <32 x i1> undef to <32 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64i32 = sext <64 x i16> undef to <64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i16_v64i64 = sext <64 x i16> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64i64 = sext <64 x i32> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i8 = sext <64 x i1> undef to <64 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128i32 = sext <128 x i16> undef to <128 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v128i16_v128i64 = sext <128 x i16> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128i64 = sext <128 x i32> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i1_v128i8 = sext <128 x i1> undef to <128 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i16_v256i32 = sext <256 x i16> undef to <256 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v256i16_v256i64 = sext <256 x i16> undef to <256 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i32_v256i64 = sext <256 x i32> undef to <256 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i1_v256i8 = sext <256 x i1> undef to <256 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32i32 = sext <32 x i16> undef to <32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i16_v32i64 = sext <32 x i16> undef to <32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32i64 = sext <32 x i32> undef to <32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_v32i8 = sext <32 x i1> undef to <32 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64i32 = sext <64 x i16> undef to <64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i16_v64i64 = sext <64 x i16> undef to <64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64i64 = sext <64 x i32> undef to <64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i1_v64i8 = sext <64 x i1> undef to <64 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128i32 = sext <128 x i16> undef to <128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v128i16_v128i64 = sext <128 x i16> undef to <128 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128i64 = sext <128 x i32> undef to <128 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i1_v128i8 = sext <128 x i1> undef to <128 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v256i16_v256i32 = sext <256 x i16> undef to <256 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v256i16_v256i64 = sext <256 x i16> undef to <256 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v256i32_v256i64 = sext <256 x i32> undef to <256 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i1_v256i8 = sext <256 x i1> undef to <256 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = sext <vscale x 1 x i8> undef to <vscale x 1 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = sext <vscale x 1 x i8> undef to <vscale x 1 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = sext <vscale x 1 x i8> undef to <vscale x 1 x i64> @@ -96,73 +96,73 @@ define void @sext() { ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = sext <vscale x 1 x i1> undef to <vscale x 1 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = sext <vscale x 2 x i8> undef to <vscale x 2 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = sext <vscale x 2 x i8> undef to <vscale x 2 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i32 = sext <vscale x 2 x i16> undef to <vscale x 2 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i64 = sext <vscale x 2 x i16> undef to <vscale x 2 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2i64 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i64 = sext <vscale x 2 x i16> undef to <vscale x 2 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i64 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i8 = sext <vscale x 2 x i1> undef to <vscale x 2 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = sext <vscale x 2 x i1> undef to <vscale x 2 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = sext <vscale x 2 x i1> undef to <vscale x 2 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = sext <vscale x 4 x i8> undef to <vscale x 4 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i32 = sext <vscale x 4 x i16> undef to <vscale x 4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i32 = sext <vscale x 4 x i16> undef to <vscale x 4 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i16_nxv4i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i8 = sext <vscale x 4 x i1> undef to <vscale x 4 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = sext <vscale x 4 x i1> undef to <vscale x 4 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i64 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i16_nxv8i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i64 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i8 = sext <vscale x 8 x i1> undef to <vscale x 8 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i32 = sext <vscale x 16 x i16> undef to <vscale x 16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_nxv16i64 = sext <vscale x 16 x i16> undef to <vscale x 16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16i64 = sext <vscale x 16 x i32> undef to <vscale x 16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i8 = sext <vscale x 16 x i1> undef to <vscale x 16 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32i32 = sext <vscale x 32 x i16> undef to <vscale x 32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i16_nxv32i64 = sext <vscale x 32 x i16> undef to <vscale x 32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32i64 = sext <vscale x 32 x i32> undef to <vscale x 32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i8 = sext <vscale x 32 x i1> undef to <vscale x 32 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i32 = sext <vscale x 16 x i16> undef to <vscale x 16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i16_nxv16i64 = sext <vscale x 16 x i16> undef to <vscale x 16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16i64 = sext <vscale x 16 x i32> undef to <vscale x 16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_nxv16i8 = sext <vscale x 16 x i1> undef to <vscale x 16 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32i32 = sext <vscale x 32 x i16> undef to <vscale x 32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i16_nxv32i64 = sext <vscale x 32 x i16> undef to <vscale x 32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32i64 = sext <vscale x 32 x i32> undef to <vscale x 32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i1_nxv32i8 = sext <vscale x 32 x i1> undef to <vscale x 32 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i8_nxv64i64 = sext <vscale x 64 x i8> undef to <vscale x 64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64i32 = sext <vscale x 64 x i16> undef to <vscale x 64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i16_nxv64i64 = sext <vscale x 64 x i16> undef to <vscale x 64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv64i32_nxv64i64 = sext <vscale x 64 x i32> undef to <vscale x 64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i1_nxv64i8 = sext <vscale x 64 x i1> undef to <vscale x 64 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64i32 = sext <vscale x 64 x i16> undef to <vscale x 64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %nxv64i16_nxv64i64 = sext <vscale x 64 x i16> undef to <vscale x 64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %nxv64i32_nxv64i64 = sext <vscale x 64 x i32> undef to <vscale x 64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i1_nxv64i8 = sext <vscale x 64 x i1> undef to <vscale x 64 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i1_nxv64i64 = sext <vscale x 64 x i1> undef to <vscale x 64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = sext <vscale x 128 x i8> undef to <vscale x 128 x i128> -; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv128i16_nxv128i32 = sext <vscale x 128 x i16> undef to <vscale x 128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv128i16_nxv128i32 = sext <vscale x 128 x i16> undef to <vscale x 128 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i16_nxv128i128 = sext <vscale x 128 x i16> undef to <vscale x 128 x i128> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i32_nxv128i128 = sext <vscale x 128 x i32> undef to <vscale x 128 x i128> -; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv128i1_nxv128i8 = sext <vscale x 128 x i1> undef to <vscale x 128 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %nxv128i1_nxv128i8 = sext <vscale x 128 x i1> undef to <vscale x 128 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = sext <vscale x 128 x i1> undef to <vscale x 128 x i128> ; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -179,74 +179,74 @@ define void @sext() { ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = sext <2 x i1> undef to <2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = sext <4 x i8> undef to <4 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = sext <4 x i8> undef to <4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i32 = sext <4 x i16> undef to <4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i64 = sext <4 x i16> undef to <4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4i64 = sext <4 x i32> undef to <4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i64 = sext <4 x i16> undef to <4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i64 = sext <4 x i32> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i8 = sext <4 x i1> undef to <4 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = sext <4 x i1> undef to <4 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = sext <4 x i1> undef to <4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = sext <8 x i8> undef to <8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i32 = sext <8 x i16> undef to <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i64 = sext <8 x i16> undef to <8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i64 = sext <8 x i32> undef to <8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i32 = sext <8 x i16> undef to <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_v8i64 = sext <8 x i16> undef to <8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i64 = sext <8 x i32> undef to <8 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i8 = sext <8 x i1> undef to <8 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = sext <8 x i1> undef to <8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i32 = sext <16 x i16> undef to <16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i64 = sext <16 x i16> undef to <16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16i64 = sext <16 x i32> undef to <16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16i32 = sext <16 x i16> undef to <16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_v16i64 = sext <16 x i16> undef to <16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16i64 = sext <16 x i32> undef to <16 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i8 = sext <16 x i1> undef to <16 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i32 = sext <32 x i16> undef to <32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_v32i64 = sext <32 x i16> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i64 = sext <32 x i32> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i8 = sext <32 x i1> undef to <32 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64i32 = sext <64 x i16> undef to <64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i16_v64i64 = sext <64 x i16> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64i64 = sext <64 x i32> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i8 = sext <64 x i1> undef to <64 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128i32 = sext <128 x i16> undef to <128 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v128i16_v128i64 = sext <128 x i16> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128i64 = sext <128 x i32> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i1_v128i8 = sext <128 x i1> undef to <128 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i16_v256i32 = sext <256 x i16> undef to <256 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v256i16_v256i64 = sext <256 x i16> undef to <256 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i32_v256i64 = sext <256 x i32> undef to <256 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i1_v256i8 = sext <256 x i1> undef to <256 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32i32 = sext <32 x i16> undef to <32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i16_v32i64 = sext <32 x i16> undef to <32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32i64 = sext <32 x i32> undef to <32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_v32i8 = sext <32 x i1> undef to <32 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64i32 = sext <64 x i16> undef to <64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i16_v64i64 = sext <64 x i16> undef to <64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64i64 = sext <64 x i32> undef to <64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i1_v64i8 = sext <64 x i1> undef to <64 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128i32 = sext <128 x i16> undef to <128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v128i16_v128i64 = sext <128 x i16> undef to <128 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128i64 = sext <128 x i32> undef to <128 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i1_v128i8 = sext <128 x i1> undef to <128 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v256i16_v256i32 = sext <256 x i16> undef to <256 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v256i16_v256i64 = sext <256 x i16> undef to <256 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v256i32_v256i64 = sext <256 x i32> undef to <256 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i1_v256i8 = sext <256 x i1> undef to <256 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = sext <vscale x 1 x i8> undef to <vscale x 1 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = sext <vscale x 1 x i8> undef to <vscale x 1 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = sext <vscale x 1 x i8> undef to <vscale x 1 x i64> @@ -259,73 +259,73 @@ define void @sext() { ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = sext <vscale x 1 x i1> undef to <vscale x 1 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = sext <vscale x 2 x i8> undef to <vscale x 2 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = sext <vscale x 2 x i8> undef to <vscale x 2 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i32 = sext <vscale x 2 x i16> undef to <vscale x 2 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i64 = sext <vscale x 2 x i16> undef to <vscale x 2 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2i64 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i64 = sext <vscale x 2 x i16> undef to <vscale x 2 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i64 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i8 = sext <vscale x 2 x i1> undef to <vscale x 2 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = sext <vscale x 2 x i1> undef to <vscale x 2 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = sext <vscale x 2 x i1> undef to <vscale x 2 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = sext <vscale x 4 x i8> undef to <vscale x 4 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i32 = sext <vscale x 4 x i16> undef to <vscale x 4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i32 = sext <vscale x 4 x i16> undef to <vscale x 4 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i16_nxv4i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i8 = sext <vscale x 4 x i1> undef to <vscale x 4 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = sext <vscale x 4 x i1> undef to <vscale x 4 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i64 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i16_nxv8i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i64 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i8 = sext <vscale x 8 x i1> undef to <vscale x 8 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i32 = sext <vscale x 16 x i16> undef to <vscale x 16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_nxv16i64 = sext <vscale x 16 x i16> undef to <vscale x 16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16i64 = sext <vscale x 16 x i32> undef to <vscale x 16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i8 = sext <vscale x 16 x i1> undef to <vscale x 16 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32i32 = sext <vscale x 32 x i16> undef to <vscale x 32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i16_nxv32i64 = sext <vscale x 32 x i16> undef to <vscale x 32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32i64 = sext <vscale x 32 x i32> undef to <vscale x 32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i8 = sext <vscale x 32 x i1> undef to <vscale x 32 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i8_nxv64i64 = sext <vscale x 64 x i8> undef to <vscale x 64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64i32 = sext <vscale x 64 x i16> undef to <vscale x 64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv64i16_nxv64i64 = sext <vscale x 64 x i16> undef to <vscale x 64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i32_nxv64i64 = sext <vscale x 64 x i32> undef to <vscale x 64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i1_nxv64i8 = sext <vscale x 64 x i1> undef to <vscale x 64 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i1_nxv64i64 = sext <vscale x 64 x i1> undef to <vscale x 64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i32 = sext <vscale x 16 x i16> undef to <vscale x 16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i16_nxv16i64 = sext <vscale x 16 x i16> undef to <vscale x 16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16i64 = sext <vscale x 16 x i32> undef to <vscale x 16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_nxv16i8 = sext <vscale x 16 x i1> undef to <vscale x 16 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32i32 = sext <vscale x 32 x i16> undef to <vscale x 32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i16_nxv32i64 = sext <vscale x 32 x i16> undef to <vscale x 32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32i64 = sext <vscale x 32 x i32> undef to <vscale x 32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i1_nxv32i8 = sext <vscale x 32 x i1> undef to <vscale x 32 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %nxv64i8_nxv64i64 = sext <vscale x 64 x i8> undef to <vscale x 64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64i32 = sext <vscale x 64 x i16> undef to <vscale x 64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv64i16_nxv64i64 = sext <vscale x 64 x i16> undef to <vscale x 64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv64i32_nxv64i64 = sext <vscale x 64 x i32> undef to <vscale x 64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i1_nxv64i8 = sext <vscale x 64 x i1> undef to <vscale x 64 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64i64 = sext <vscale x 64 x i1> undef to <vscale x 64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32> ; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = sext <vscale x 128 x i8> undef to <vscale x 128 x i128> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv128i16_nxv128i32 = sext <vscale x 128 x i16> undef to <vscale x 128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv128i16_nxv128i32 = sext <vscale x 128 x i16> undef to <vscale x 128 x i32> ; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i16_nxv128i128 = sext <vscale x 128 x i16> undef to <vscale x 128 x i128> ; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i32_nxv128i128 = sext <vscale x 128 x i32> undef to <vscale x 128 x i128> -; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv128i1_nxv128i8 = sext <vscale x 128 x i1> undef to <vscale x 128 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %nxv128i1_nxv128i8 = sext <vscale x 128 x i1> undef to <vscale x 128 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32> ; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = sext <vscale x 128 x i1> undef to <vscale x 128 x i128> ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -522,74 +522,74 @@ define void @zext() { ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = zext <2 x i1> undef to <2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = zext <4 x i8> undef to <4 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = zext <4 x i8> undef to <4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i32 = zext <4 x i16> undef to <4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i64 = zext <4 x i16> undef to <4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4i64 = zext <4 x i32> undef to <4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i64 = zext <4 x i16> undef to <4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i64 = zext <4 x i32> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i8 = zext <4 x i1> undef to <4 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = zext <4 x i1> undef to <4 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = zext <4 x i1> undef to <4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = zext <8 x i8> undef to <8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i32 = zext <8 x i16> undef to <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i64 = zext <8 x i16> undef to <8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i64 = zext <8 x i32> undef to <8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i32 = zext <8 x i16> undef to <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_v8i64 = zext <8 x i16> undef to <8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i64 = zext <8 x i32> undef to <8 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i8 = zext <8 x i1> undef to <8 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = zext <8 x i1> undef to <8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i32 = zext <16 x i16> undef to <16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i64 = zext <16 x i16> undef to <16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16i64 = zext <16 x i32> undef to <16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16i32 = zext <16 x i16> undef to <16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_v16i64 = zext <16 x i16> undef to <16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16i64 = zext <16 x i32> undef to <16 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i8 = zext <16 x i1> undef to <16 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i32 = zext <32 x i16> undef to <32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_v32i64 = zext <32 x i16> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i64 = zext <32 x i32> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i8 = zext <32 x i1> undef to <32 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64i32 = zext <64 x i16> undef to <64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i16_v64i64 = zext <64 x i16> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64i64 = zext <64 x i32> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i8 = zext <64 x i1> undef to <64 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128i32 = zext <128 x i16> undef to <128 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v128i16_v128i64 = zext <128 x i16> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128i64 = zext <128 x i32> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i1_v128i8 = zext <128 x i1> undef to <128 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i16_v256i32 = zext <256 x i16> undef to <256 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v256i16_v256i64 = zext <256 x i16> undef to <256 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i32_v256i64 = zext <256 x i32> undef to <256 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i1_v256i8 = zext <256 x i1> undef to <256 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32i32 = zext <32 x i16> undef to <32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i16_v32i64 = zext <32 x i16> undef to <32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32i64 = zext <32 x i32> undef to <32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_v32i8 = zext <32 x i1> undef to <32 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64i32 = zext <64 x i16> undef to <64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i16_v64i64 = zext <64 x i16> undef to <64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64i64 = zext <64 x i32> undef to <64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i1_v64i8 = zext <64 x i1> undef to <64 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128i32 = zext <128 x i16> undef to <128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v128i16_v128i64 = zext <128 x i16> undef to <128 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128i64 = zext <128 x i32> undef to <128 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i1_v128i8 = zext <128 x i1> undef to <128 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v256i16_v256i32 = zext <256 x i16> undef to <256 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v256i16_v256i64 = zext <256 x i16> undef to <256 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v256i32_v256i64 = zext <256 x i32> undef to <256 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i1_v256i8 = zext <256 x i1> undef to <256 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = zext <vscale x 1 x i8> undef to <vscale x 1 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = zext <vscale x 1 x i8> undef to <vscale x 1 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = zext <vscale x 1 x i8> undef to <vscale x 1 x i64> @@ -602,73 +602,73 @@ define void @zext() { ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = zext <vscale x 1 x i1> undef to <vscale x 1 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = zext <vscale x 2 x i8> undef to <vscale x 2 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = zext <vscale x 2 x i8> undef to <vscale x 2 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i32 = zext <vscale x 2 x i16> undef to <vscale x 2 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i64 = zext <vscale x 2 x i16> undef to <vscale x 2 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2i64 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i64 = zext <vscale x 2 x i16> undef to <vscale x 2 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i64 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i8 = zext <vscale x 2 x i1> undef to <vscale x 2 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = zext <vscale x 2 x i1> undef to <vscale x 2 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = zext <vscale x 2 x i1> undef to <vscale x 2 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = zext <vscale x 4 x i8> undef to <vscale x 4 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i32 = zext <vscale x 4 x i16> undef to <vscale x 4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i32 = zext <vscale x 4 x i16> undef to <vscale x 4 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i16_nxv4i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i8 = zext <vscale x 4 x i1> undef to <vscale x 4 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = zext <vscale x 4 x i1> undef to <vscale x 4 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i64 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i16_nxv8i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i64 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i8 = zext <vscale x 8 x i1> undef to <vscale x 8 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i32 = zext <vscale x 16 x i16> undef to <vscale x 16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_nxv16i64 = zext <vscale x 16 x i16> undef to <vscale x 16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16i64 = zext <vscale x 16 x i32> undef to <vscale x 16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i8 = zext <vscale x 16 x i1> undef to <vscale x 16 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32i32 = zext <vscale x 32 x i16> undef to <vscale x 32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i16_nxv32i64 = zext <vscale x 32 x i16> undef to <vscale x 32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32i64 = zext <vscale x 32 x i32> undef to <vscale x 32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i8 = zext <vscale x 32 x i1> undef to <vscale x 32 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i32 = zext <vscale x 16 x i16> undef to <vscale x 16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i16_nxv16i64 = zext <vscale x 16 x i16> undef to <vscale x 16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16i64 = zext <vscale x 16 x i32> undef to <vscale x 16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_nxv16i8 = zext <vscale x 16 x i1> undef to <vscale x 16 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32i32 = zext <vscale x 32 x i16> undef to <vscale x 32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i16_nxv32i64 = zext <vscale x 32 x i16> undef to <vscale x 32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32i64 = zext <vscale x 32 x i32> undef to <vscale x 32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i1_nxv32i8 = zext <vscale x 32 x i1> undef to <vscale x 32 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i8_nxv64i64 = zext <vscale x 64 x i8> undef to <vscale x 64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64i32 = zext <vscale x 64 x i16> undef to <vscale x 64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i16_nxv64i64 = zext <vscale x 64 x i16> undef to <vscale x 64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv64i32_nxv64i64 = zext <vscale x 64 x i32> undef to <vscale x 64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i1_nxv64i8 = zext <vscale x 64 x i1> undef to <vscale x 64 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64i32 = zext <vscale x 64 x i16> undef to <vscale x 64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %nxv64i16_nxv64i64 = zext <vscale x 64 x i16> undef to <vscale x 64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %nxv64i32_nxv64i64 = zext <vscale x 64 x i32> undef to <vscale x 64 x i64> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i1_nxv64i8 = zext <vscale x 64 x i1> undef to <vscale x 64 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i1_nxv64i64 = zext <vscale x 64 x i1> undef to <vscale x 64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = zext <vscale x 128 x i8> undef to <vscale x 128 x i128> -; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv128i16_nxv128i32 = zext <vscale x 128 x i16> undef to <vscale x 128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv128i16_nxv128i32 = zext <vscale x 128 x i16> undef to <vscale x 128 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i16_nxv128i128 = zext <vscale x 128 x i16> undef to <vscale x 128 x i128> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i32_nxv128i128 = zext <vscale x 128 x i32> undef to <vscale x 128 x i128> -; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv128i1_nxv128i8 = zext <vscale x 128 x i1> undef to <vscale x 128 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %nxv128i1_nxv128i8 = zext <vscale x 128 x i1> undef to <vscale x 128 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32> ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = zext <vscale x 128 x i1> undef to <vscale x 128 x i128> ; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -685,74 +685,74 @@ define void @zext() { ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = zext <2 x i1> undef to <2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = zext <4 x i8> undef to <4 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = zext <4 x i8> undef to <4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i32 = zext <4 x i16> undef to <4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i64 = zext <4 x i16> undef to <4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4i64 = zext <4 x i32> undef to <4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i64 = zext <4 x i16> undef to <4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i64 = zext <4 x i32> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i8 = zext <4 x i1> undef to <4 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = zext <4 x i1> undef to <4 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = zext <4 x i1> undef to <4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = zext <8 x i8> undef to <8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i32 = zext <8 x i16> undef to <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i64 = zext <8 x i16> undef to <8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i64 = zext <8 x i32> undef to <8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i32 = zext <8 x i16> undef to <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_v8i64 = zext <8 x i16> undef to <8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i64 = zext <8 x i32> undef to <8 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i8 = zext <8 x i1> undef to <8 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = zext <8 x i1> undef to <8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i32 = zext <16 x i16> undef to <16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i64 = zext <16 x i16> undef to <16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16i64 = zext <16 x i32> undef to <16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16i32 = zext <16 x i16> undef to <16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_v16i64 = zext <16 x i16> undef to <16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16i64 = zext <16 x i32> undef to <16 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i8 = zext <16 x i1> undef to <16 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i32 = zext <32 x i16> undef to <32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_v32i64 = zext <32 x i16> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i64 = zext <32 x i32> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i8 = zext <32 x i1> undef to <32 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64i32 = zext <64 x i16> undef to <64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i16_v64i64 = zext <64 x i16> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64i64 = zext <64 x i32> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i8 = zext <64 x i1> undef to <64 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128i32 = zext <128 x i16> undef to <128 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v128i16_v128i64 = zext <128 x i16> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128i64 = zext <128 x i32> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i1_v128i8 = zext <128 x i1> undef to <128 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i16_v256i32 = zext <256 x i16> undef to <256 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v256i16_v256i64 = zext <256 x i16> undef to <256 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i32_v256i64 = zext <256 x i32> undef to <256 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i1_v256i8 = zext <256 x i1> undef to <256 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32i32 = zext <32 x i16> undef to <32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i16_v32i64 = zext <32 x i16> undef to <32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32i64 = zext <32 x i32> undef to <32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_v32i8 = zext <32 x i1> undef to <32 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64i32 = zext <64 x i16> undef to <64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i16_v64i64 = zext <64 x i16> undef to <64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64i64 = zext <64 x i32> undef to <64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i1_v64i8 = zext <64 x i1> undef to <64 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128i32 = zext <128 x i16> undef to <128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v128i16_v128i64 = zext <128 x i16> undef to <128 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128i64 = zext <128 x i32> undef to <128 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i1_v128i8 = zext <128 x i1> undef to <128 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v256i16_v256i32 = zext <256 x i16> undef to <256 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v256i16_v256i64 = zext <256 x i16> undef to <256 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v256i32_v256i64 = zext <256 x i32> undef to <256 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i1_v256i8 = zext <256 x i1> undef to <256 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = zext <vscale x 1 x i8> undef to <vscale x 1 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = zext <vscale x 1 x i8> undef to <vscale x 1 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = zext <vscale x 1 x i8> undef to <vscale x 1 x i64> @@ -765,73 +765,73 @@ define void @zext() { ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = zext <vscale x 1 x i1> undef to <vscale x 1 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = zext <vscale x 2 x i8> undef to <vscale x 2 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = zext <vscale x 2 x i8> undef to <vscale x 2 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i32 = zext <vscale x 2 x i16> undef to <vscale x 2 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i64 = zext <vscale x 2 x i16> undef to <vscale x 2 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2i64 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i64 = zext <vscale x 2 x i16> undef to <vscale x 2 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i64 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i8 = zext <vscale x 2 x i1> undef to <vscale x 2 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = zext <vscale x 2 x i1> undef to <vscale x 2 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = zext <vscale x 2 x i1> undef to <vscale x 2 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = zext <vscale x 4 x i8> undef to <vscale x 4 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i32 = zext <vscale x 4 x i16> undef to <vscale x 4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i32 = zext <vscale x 4 x i16> undef to <vscale x 4 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i16_nxv4i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i8 = zext <vscale x 4 x i1> undef to <vscale x 4 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = zext <vscale x 4 x i1> undef to <vscale x 4 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i64 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i16_nxv8i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i64 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i8 = zext <vscale x 8 x i1> undef to <vscale x 8 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i32 = zext <vscale x 16 x i16> undef to <vscale x 16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_nxv16i64 = zext <vscale x 16 x i16> undef to <vscale x 16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16i64 = zext <vscale x 16 x i32> undef to <vscale x 16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i8 = zext <vscale x 16 x i1> undef to <vscale x 16 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32i32 = zext <vscale x 32 x i16> undef to <vscale x 32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i16_nxv32i64 = zext <vscale x 32 x i16> undef to <vscale x 32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32i64 = zext <vscale x 32 x i32> undef to <vscale x 32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i8 = zext <vscale x 32 x i1> undef to <vscale x 32 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i8_nxv64i64 = zext <vscale x 64 x i8> undef to <vscale x 64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64i32 = zext <vscale x 64 x i16> undef to <vscale x 64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv64i16_nxv64i64 = zext <vscale x 64 x i16> undef to <vscale x 64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i32_nxv64i64 = zext <vscale x 64 x i32> undef to <vscale x 64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i1_nxv64i8 = zext <vscale x 64 x i1> undef to <vscale x 64 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i1_nxv64i64 = zext <vscale x 64 x i1> undef to <vscale x 64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i32 = zext <vscale x 16 x i16> undef to <vscale x 16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i16_nxv16i64 = zext <vscale x 16 x i16> undef to <vscale x 16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16i64 = zext <vscale x 16 x i32> undef to <vscale x 16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_nxv16i8 = zext <vscale x 16 x i1> undef to <vscale x 16 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32i32 = zext <vscale x 32 x i16> undef to <vscale x 32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i16_nxv32i64 = zext <vscale x 32 x i16> undef to <vscale x 32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32i64 = zext <vscale x 32 x i32> undef to <vscale x 32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i1_nxv32i8 = zext <vscale x 32 x i1> undef to <vscale x 32 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %nxv64i8_nxv64i64 = zext <vscale x 64 x i8> undef to <vscale x 64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64i32 = zext <vscale x 64 x i16> undef to <vscale x 64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv64i16_nxv64i64 = zext <vscale x 64 x i16> undef to <vscale x 64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv64i32_nxv64i64 = zext <vscale x 64 x i32> undef to <vscale x 64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i1_nxv64i8 = zext <vscale x 64 x i1> undef to <vscale x 64 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64i64 = zext <vscale x 64 x i1> undef to <vscale x 64 x i64> +; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32> ; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = zext <vscale x 128 x i8> undef to <vscale x 128 x i128> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv128i16_nxv128i32 = zext <vscale x 128 x i16> undef to <vscale x 128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv128i16_nxv128i32 = zext <vscale x 128 x i16> undef to <vscale x 128 x i32> ; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i16_nxv128i128 = zext <vscale x 128 x i16> undef to <vscale x 128 x i128> ; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i32_nxv128i128 = zext <vscale x 128 x i32> undef to <vscale x 128 x i128> -; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv128i1_nxv128i8 = zext <vscale x 128 x i1> undef to <vscale x 128 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %nxv128i1_nxv128i8 = zext <vscale x 128 x i1> undef to <vscale x 128 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32> ; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = zext <vscale x 128 x i1> undef to <vscale x 128 x i128> ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll index 80efe912..30cb32c 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll @@ -1141,7 +1141,7 @@ define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) { define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) { ; CHECK-LABEL: 'vwreduce_add_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; @@ -1157,7 +1157,7 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) { define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; @@ -1445,7 +1445,7 @@ define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) { define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) { ; CHECK-LABEL: 'vwreduce_add_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; @@ -1461,7 +1461,7 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) { define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; @@ -1597,7 +1597,7 @@ define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) { define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) { ; CHECK-LABEL: 'vwreduce_add_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; @@ -1613,7 +1613,7 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) { define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll index 225bad6..aa7a90b 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll @@ -12,12 +12,12 @@ define void @extractelement_int(i32 %x) { ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0 -; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0 +; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_0 = extractelement <vscale x 2 x i1> undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_0 = extractelement <vscale x 4 x i1> undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_0 = extractelement <vscale x 8 x i1> undef, i32 0 -; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0 -; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0 +; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0 +; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0 @@ -66,12 +66,12 @@ define void @extractelement_int(i32 %x) { ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_1 = extractelement <4 x i1> undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_1 = extractelement <8 x i1> undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1 -; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1 +; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_1 = extractelement <vscale x 2 x i1> undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_1 = extractelement <vscale x 4 x i1> undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_1 = extractelement <vscale x 8 x i1> undef, i32 1 -; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1 -; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1 +; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1 +; RV32V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1 @@ -120,12 +120,12 @@ define void @extractelement_int(i32 %x) { ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_x = extractelement <4 x i1> undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_x = extractelement <8 x i1> undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x -; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x +; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_x = extractelement <vscale x 2 x i1> undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_x = extractelement <vscale x 4 x i1> undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_x = extractelement <vscale x 8 x i1> undef, i32 %x -; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x -; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x +; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x +; RV32V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_x = extractelement <2 x i8> undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_x = extractelement <4 x i8> undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_x = extractelement <8 x i8> undef, i32 %x @@ -177,12 +177,12 @@ define void @extractelement_int(i32 %x) { ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0 -; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0 +; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_0 = extractelement <vscale x 2 x i1> undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_0 = extractelement <vscale x 4 x i1> undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_0 = extractelement <vscale x 8 x i1> undef, i32 0 -; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0 -; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0 +; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0 +; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0 @@ -231,12 +231,12 @@ define void @extractelement_int(i32 %x) { ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_1 = extractelement <4 x i1> undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_1 = extractelement <8 x i1> undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1 -; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1 +; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_1 = extractelement <vscale x 2 x i1> undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_1 = extractelement <vscale x 4 x i1> undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_1 = extractelement <vscale x 8 x i1> undef, i32 1 -; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1 -; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1 +; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1 +; RV64V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1 @@ -285,12 +285,12 @@ define void @extractelement_int(i32 %x) { ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_x = extractelement <4 x i1> undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_x = extractelement <8 x i1> undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x -; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x +; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_x = extractelement <vscale x 2 x i1> undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_x = extractelement <vscale x 4 x i1> undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_x = extractelement <vscale x 8 x i1> undef, i32 %x -; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x -; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x +; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x +; RV64V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_x = extractelement <2 x i8> undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_x = extractelement <4 x i8> undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_x = extractelement <8 x i8> undef, i32 %x @@ -341,13 +341,13 @@ define void @extractelement_int(i32 %x) { ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_0 = extractelement <vscale x 2 x i1> undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_0 = extractelement <vscale x 4 x i1> undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_0 = extractelement <vscale x 8 x i1> undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0 @@ -395,13 +395,13 @@ define void @extractelement_int(i32 %x) { ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_1 = extractelement <4 x i1> undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_1 = extractelement <8 x i1> undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_1 = extractelement <vscale x 2 x i1> undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_1 = extractelement <vscale x 4 x i1> undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_1 = extractelement <vscale x 8 x i1> undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1 @@ -449,13 +449,13 @@ define void @extractelement_int(i32 %x) { ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i1_x = extractelement <2 x i1> undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_x = extractelement <4 x i1> undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_x = extractelement <8 x i1> undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_x = extractelement <vscale x 2 x i1> undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_x = extractelement <vscale x 4 x i1> undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_x = extractelement <vscale x 8 x i1> undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_x = extractelement <2 x i8> undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_x = extractelement <4 x i8> undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_x = extractelement <8 x i8> undef, i32 %x @@ -506,13 +506,13 @@ define void @extractelement_int(i32 %x) { ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_0 = extractelement <vscale x 2 x i1> undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_0 = extractelement <vscale x 4 x i1> undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_0 = extractelement <vscale x 8 x i1> undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0 @@ -560,13 +560,13 @@ define void @extractelement_int(i32 %x) { ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_1 = extractelement <4 x i1> undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_1 = extractelement <8 x i1> undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_1 = extractelement <vscale x 2 x i1> undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_1 = extractelement <vscale x 4 x i1> undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_1 = extractelement <vscale x 8 x i1> undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1 @@ -614,13 +614,13 @@ define void @extractelement_int(i32 %x) { ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i1_x = extractelement <2 x i1> undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_x = extractelement <4 x i1> undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_x = extractelement <8 x i1> undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_x = extractelement <vscale x 2 x i1> undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_x = extractelement <vscale x 4 x i1> undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_x = extractelement <vscale x 8 x i1> undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_x = extractelement <2 x i8> undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_x = extractelement <4 x i8> undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_x = extractelement <8 x i8> undef, i32 %x diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll index 5387c8d..6e1ae02 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll @@ -12,12 +12,12 @@ define void @insertelement_int(i32 %x) { ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0 -; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0 -; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0 +; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0 +; RV32V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 @@ -66,12 +66,12 @@ define void @insertelement_int(i32 %x) { ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 -; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 +; RV32V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1 -; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1 -; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1 +; RV32V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1 +; RV32V-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1 @@ -120,12 +120,12 @@ define void @insertelement_int(i32 %x) { ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x -; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x +; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x -; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x -; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x +; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x +; RV32V-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x @@ -177,12 +177,12 @@ define void @insertelement_int(i32 %x) { ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0 -; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0 -; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0 +; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0 +; RV64V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 @@ -231,12 +231,12 @@ define void @insertelement_int(i32 %x) { ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 -; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 +; RV64V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1 -; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1 -; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1 +; RV64V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1 +; RV64V-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1 @@ -285,12 +285,12 @@ define void @insertelement_int(i32 %x) { ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x -; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x +; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x -; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x -; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x +; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x +; RV64V-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x @@ -341,13 +341,13 @@ define void @insertelement_int(i32 %x) { ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 @@ -395,13 +395,13 @@ define void @insertelement_int(i32 %x) { ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1 @@ -449,13 +449,13 @@ define void @insertelement_int(i32 %x) { ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i1_x = insertelement <2 x i1> undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x @@ -506,13 +506,13 @@ define void @insertelement_int(i32 %x) { ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 @@ -560,13 +560,13 @@ define void @insertelement_int(i32 %x) { ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1 @@ -614,13 +614,13 @@ define void @insertelement_int(i32 %x) { ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i1_x = insertelement <2 x i1> undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll index 46bf315..b763198 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll @@ -197,7 +197,7 @@ define void @broadcast_fixed() #0{ ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = shufflevector <32 x i1> undef, <32 x i1> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = shufflevector <64 x i1> undef, <64 x i1> undef, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %43 = shufflevector <128 x i1> undef, <128 x i1> undef, <128 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ins1 = insertelement <128 x i1> poison, i1 poison, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ins1 = insertelement <128 x i1> poison, i1 poison, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %44 = shufflevector <128 x i1> %ins1, <128 x i1> poison, <128 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ins2 = insertelement <2 x i8> poison, i8 3, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = shufflevector <2 x i8> %ins2, <2 x i8> undef, <2 x i32> zeroinitializer diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll index b1f4abf..2190e2f 100644 --- a/llvm/test/Bitcode/compatibility-3.6.ll +++ b/llvm/test/Bitcode/compatibility-3.6.ll @@ -1061,16 +1061,16 @@ define void @instructions.va_arg(i8* %v, ...) { %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - ; CHECK: call void @llvm.va_start(ptr %ap2) + ; CHECK: call void @llvm.va_start.p0(ptr %ap2) va_arg i8* %ap2, i32 ; CHECK: va_arg ptr %ap2, i32 call void @llvm.va_copy(i8* %v, i8* %ap2) - ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2) + ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2) call void @llvm.va_end(i8* %ap2) - ; CHECK: call void @llvm.va_end(ptr %ap2) + ; CHECK: call void @llvm.va_end.p0(ptr %ap2) ret void } @@ -1178,11 +1178,11 @@ define void @intrinsics.codegen() { ; CHECK: attributes #27 = { uwtable } ; CHECK: attributes #28 = { "cpu"="cortex-a8" } ; CHECK: attributes #29 = { nocallback nofree nosync nounwind willreturn memory(none) } -; CHECK: attributes #30 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #31 = { nounwind memory(argmem: read) } -; CHECK: attributes #32 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn memory(read) } -; CHECK: attributes #34 = { nocallback nounwind } +; CHECK: attributes #30 = { nounwind memory(argmem: read) } +; CHECK: attributes #31 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #32 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #33 = { nocallback nounwind } +; CHECK: attributes #34 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #36 = { builtin } diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll index 91e55f6..7e59b5c 100644 --- a/llvm/test/Bitcode/compatibility-3.7.ll +++ b/llvm/test/Bitcode/compatibility-3.7.ll @@ -1092,16 +1092,16 @@ define void @instructions.va_arg(i8* %v, ...) { %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - ; CHECK: call void @llvm.va_start(ptr %ap2) + ; CHECK: call void @llvm.va_start.p0(ptr %ap2) va_arg i8* %ap2, i32 ; CHECK: va_arg ptr %ap2, i32 call void @llvm.va_copy(i8* %v, i8* %ap2) - ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2) + ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2) call void @llvm.va_end(i8* %ap2) - ; CHECK: call void @llvm.va_end(ptr %ap2) + ; CHECK: call void @llvm.va_end.p0(ptr %ap2) ret void } @@ -1241,11 +1241,11 @@ define void @misc.metadata() { ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { nocallback nofree nosync nounwind willreturn memory(none) } -; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #34 = { nounwind memory(argmem: read) } -; CHECK: attributes #35 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn memory(read) } -; CHECK: attributes #37 = { nocallback nounwind } +; CHECK: attributes #33 = { nounwind memory(argmem: read) } +; CHECK: attributes #34 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #36 = { nocallback nounwind } +; CHECK: attributes #37 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #39 = { builtin } diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll index aa4d8b1..ebd1f2f 100644 --- a/llvm/test/Bitcode/compatibility-3.8.ll +++ b/llvm/test/Bitcode/compatibility-3.8.ll @@ -1247,16 +1247,16 @@ define void @instructions.va_arg(i8* %v, ...) { %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - ; CHECK: call void @llvm.va_start(ptr %ap2) + ; CHECK: call void @llvm.va_start.p0(ptr %ap2) va_arg i8* %ap2, i32 ; CHECK: va_arg ptr %ap2, i32 call void @llvm.va_copy(i8* %v, i8* %ap2) - ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2) + ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2) call void @llvm.va_end(i8* %ap2) - ; CHECK: call void @llvm.va_end(ptr %ap2) + ; CHECK: call void @llvm.va_end.p0(ptr %ap2) ret void } @@ -1551,11 +1551,11 @@ normal: ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } -; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { nounwind memory(argmem: read) } -; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } -; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #36 = { nounwind memory(argmem: read) } +; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #39 = { nocallback nounwind } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #42 = { builtin } diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll index e3c84f6..c34f04c 100644 --- a/llvm/test/Bitcode/compatibility-3.9.ll +++ b/llvm/test/Bitcode/compatibility-3.9.ll @@ -1318,16 +1318,16 @@ define void @instructions.va_arg(i8* %v, ...) { %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - ; CHECK: call void @llvm.va_start(ptr %ap2) + ; CHECK: call void @llvm.va_start.p0(ptr %ap2) va_arg i8* %ap2, i32 ; CHECK: va_arg ptr %ap2, i32 call void @llvm.va_copy(i8* %v, i8* %ap2) - ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2) + ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2) call void @llvm.va_end(i8* %ap2) - ; CHECK: call void @llvm.va_end(ptr %ap2) + ; CHECK: call void @llvm.va_end.p0(ptr %ap2) ret void } @@ -1624,11 +1624,11 @@ declare void @f.writeonly() writeonly ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } -; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { nounwind memory(argmem: read) } -; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } -; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #36 = { nounwind memory(argmem: read) } +; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #39 = { nocallback nounwind } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #41 = { memory(write) } ; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #43 = { builtin } diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll index 06cb8420..05bffda 100644 --- a/llvm/test/Bitcode/compatibility-4.0.ll +++ b/llvm/test/Bitcode/compatibility-4.0.ll @@ -1318,16 +1318,16 @@ define void @instructions.va_arg(i8* %v, ...) { %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - ; CHECK: call void @llvm.va_start(ptr %ap2) + ; CHECK: call void @llvm.va_start.p0(ptr %ap2) va_arg i8* %ap2, i32 ; CHECK: va_arg ptr %ap2, i32 call void @llvm.va_copy(i8* %v, i8* %ap2) - ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2) + ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2) call void @llvm.va_end(i8* %ap2) - ; CHECK: call void @llvm.va_end(ptr %ap2) + ; CHECK: call void @llvm.va_end.p0(ptr %ap2) ret void } @@ -1649,11 +1649,11 @@ define i8** @constexpr() { ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } -; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { nounwind memory(argmem: read) } -; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } -; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #36 = { nounwind memory(argmem: read) } +; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #39 = { nocallback nounwind } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #41 = { memory(write) } ; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #43 = { builtin } diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll index f9ae558..0c87228 100644 --- a/llvm/test/Bitcode/compatibility-5.0.ll +++ b/llvm/test/Bitcode/compatibility-5.0.ll @@ -1330,16 +1330,16 @@ define void @instructions.va_arg(i8* %v, ...) { %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - ; CHECK: call void @llvm.va_start(ptr %ap2) + ; CHECK: call void @llvm.va_start.p0(ptr %ap2) va_arg i8* %ap2, i32 ; CHECK: va_arg ptr %ap2, i32 call void @llvm.va_copy(i8* %v, i8* %ap2) - ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2) + ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2) call void @llvm.va_end(i8* %ap2) - ; CHECK: call void @llvm.va_end(ptr %ap2) + ; CHECK: call void @llvm.va_end.p0(ptr %ap2) ret void } @@ -1664,11 +1664,11 @@ define i8** @constexpr() { ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } -; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { nounwind memory(argmem: read) } -; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } -; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #36 = { nounwind memory(argmem: read) } +; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #39 = { nocallback nounwind } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #41 = { memory(write) } ; CHECK: attributes #42 = { speculatable } ; CHECK: attributes #43 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll index 1458e1b..44c6808 100644 --- a/llvm/test/Bitcode/compatibility-6.0.ll +++ b/llvm/test/Bitcode/compatibility-6.0.ll @@ -1340,16 +1340,16 @@ define void @instructions.va_arg(i8* %v, ...) { %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - ; CHECK: call void @llvm.va_start(ptr %ap2) + ; CHECK: call void @llvm.va_start.p0(ptr %ap2) va_arg i8* %ap2, i32 ; CHECK: va_arg ptr %ap2, i32 call void @llvm.va_copy(i8* %v, i8* %ap2) - ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2) + ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2) call void @llvm.va_end(i8* %ap2) - ; CHECK: call void @llvm.va_end(ptr %ap2) + ; CHECK: call void @llvm.va_end.p0(ptr %ap2) ret void } @@ -1674,11 +1674,11 @@ define i8** @constexpr() { ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } -; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { nounwind memory(argmem: read) } -; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } -; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #36 = { nounwind memory(argmem: read) } +; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #39 = { nocallback nounwind } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #41 = { memory(write) } ; CHECK: attributes #42 = { speculatable } ; CHECK: attributes #43 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index fa8b052..b374924 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1648,16 +1648,16 @@ define void @instructions.va_arg(ptr %v, ...) { %ap = alloca ptr call void @llvm.va_start(ptr %ap) - ; CHECK: call void @llvm.va_start(ptr %ap) + ; CHECK: call void @llvm.va_start.p0(ptr %ap) va_arg ptr %ap, i32 ; CHECK: va_arg ptr %ap, i32 call void @llvm.va_copy(ptr %v, ptr %ap) - ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap) + ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap) call void @llvm.va_end(ptr %ap) - ; CHECK: call void @llvm.va_end(ptr %ap) + ; CHECK: call void @llvm.va_end.p0(ptr %ap) ret void } @@ -2091,12 +2091,12 @@ define float @nofpclass_callsites(float %arg) { ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } -; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { nounwind memory(argmem: read) } -; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } -; CHECK: attributes #40 = { nocallback nounwind } -; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #36 = { nounwind memory(argmem: read) } +; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #39 = { nocallback nounwind } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #42 = { memory(write) } ; CHECK: attributes #43 = { speculatable } ; CHECK: attributes #44 = { strictfp } diff --git a/llvm/test/Bitcode/thinlto-function-summary.ll b/llvm/test/Bitcode/thinlto-function-summary.ll index 799759e..13c6611 100644 --- a/llvm/test/Bitcode/thinlto-function-summary.ll +++ b/llvm/test/Bitcode/thinlto-function-summary.ll @@ -13,9 +13,9 @@ ; "variadic" ; BC-NEXT: <FUNCTION op0=46 op1=8 ; "llvm.va_start" -; BC-NEXT: <FUNCTION op0=54 op1=13 +; BC-NEXT: <FUNCTION op0=54 op1=16 ; "f" -; BC-NEXT: <ALIAS op0=67 op1=1 +; BC-NEXT: <ALIAS op0=70 op1=1 ; BC: <GLOBALVAL_SUMMARY_BLOCK ; BC-NEXT: <VERSION ; BC-NEXT: <FLAGS @@ -26,7 +26,7 @@ ; BC-NEXT: <ALIAS {{.*}} op0=6 op1=0 op2=3 ; BC-NEXT: </GLOBALVAL_SUMMARY_BLOCK ; BC: <STRTAB_BLOCK -; BC-NEXT: blob data = 'hfoobaranon.{{................................}}.0variadicllvm.va_startf{{.*}}' +; BC-NEXT: blob data = 'hfoobaranon.{{................................}}.0variadicllvm.va_start.p{{[0-9]+}}f{{.*}}' ; RUN: opt -passes=name-anon-globals -module-summary < %s | llvm-dis | FileCheck %s diff --git a/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll b/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll index fad7b8e..fd3f500 100644 --- a/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll +++ b/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll @@ -10,7 +10,7 @@ define i32 @varArgIntrinsic(i32 %X, ...) { %ap = alloca i8* %ap2 = bitcast i8** %ap to i8* -; CHECK: call void @llvm.va_start(ptr %ap2) +; CHECK: call void @llvm.va_start.p0(ptr %ap2) call void @llvm.va_start(i8* %ap2) ; CHECK-NEXT: %tmp = va_arg ptr %ap, i32 @@ -19,12 +19,12 @@ define i32 @varArgIntrinsic(i32 %X, ...) { %aq = alloca i8* %aq2 = bitcast i8** %aq to i8* -; CHECK: call void @llvm.va_copy(ptr %aq2, ptr %ap2) +; CHECK: call void @llvm.va_copy.p0(ptr %aq2, ptr %ap2) call void @llvm.va_copy(i8* %aq2, i8* %ap2) -; CHECK-NEXT: call void @llvm.va_end(ptr %aq2) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr %aq2) call void @llvm.va_end(i8* %aq2) -; CHECK-NEXT: call void @llvm.va_end(ptr %ap2) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr %ap2) call void @llvm.va_end(i8* %ap2) ret i32 %tmp } diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll index 089e171..c9fd2d3 100644 --- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll @@ -518,4 +518,6 @@ attributes #5 = { nobuiltin } !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"} !3 = !{!"short", !1} -!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3} +!4 = !{i64 0, i64 4, !5, i64 4, i64 2, !6, i64 8, i64 4, !5, i64 12, i64 2, !6, i64 16, i64 4, !5, i64 20, i64 2, !6} +!5 = !{!0, !0, i64 0} +!6 = !{!3, !3, i64 0} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll index 942f459..8ddaf24 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll @@ -808,7 +808,7 @@ define float @test_pown_fast_f32_nobuiltin(float %x, i32 %y) { ; CHECK-LABEL: define float @test_pown_fast_f32_nobuiltin ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @_Z4pownfi(float [[X]], i32 [[Y]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @_Z4pownfi(float [[X]], i32 [[Y]]) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: ret float [[CALL]] ; entry: @@ -820,11 +820,11 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 { ; CHECK-LABEL: define float @test_pown_fast_f32_strictfp ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float -; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]] -; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]]) +; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]]) #[[ATTR0]] +; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]]) #[[ATTR0]] +; CHECK-NEXT: [[POWNI2F:%.*]] = call fast float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[Y]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[__YLOGX:%.*]] = call fast float @llvm.experimental.constrained.fmul.f32(float [[POWNI2F]], float [[__LOG2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]]) #[[ATTR0]] ; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32 ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll index 2ffa647..2e64a34 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll @@ -896,7 +896,7 @@ define float @test_rootn_f32__y_neg2__strictfp(float %x) #1 { ; CHECK-LABEL: define float @test_rootn_f32__y_neg2__strictfp( ; CHECK-SAME: float [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = call float @_Z5rsqrtf(float [[X]]) +; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = call float @_Z5rsqrtf(float [[X]]) #[[ATTR0]] ; CHECK-NEXT: ret float [[__ROOTN2RSQRT]] ; entry: diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll index 76ec1cc..99d02ff 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll @@ -358,65 +358,6 @@ define amdgpu_gfx i32 @global_atomic_xchg_i32_ret_offset_scalar(ptr addrspace(1) ; --------------------------------------------------------------------- define void @global_atomic_xchg_f32_noret(ptr addrspace(1) %ptr, float %in) { -; GCN1-LABEL: global_atomic_xchg_f32_noret: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_load_dword v3, v[0:1] -; GCN1-NEXT: s_mov_b64 s[4:5], 0 -; GCN1-NEXT: .LBB0_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN1-NEXT: v_mov_b32_e32 v3, v4 -; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_cbranch_execnz .LBB0_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f32_noret: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_load_dword v3, v[0:1] -; GCN2-NEXT: s_mov_b64 s[4:5], 0 -; GCN2-NEXT: .LBB0_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN2-NEXT: v_mov_b32_e32 v3, v4 -; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_cbranch_execnz .LBB0_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f32_noret: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_load_dword v3, v[0:1] -; GCN3-NEXT: s_mov_b64 s[4:5], 0 -; GCN3-NEXT: .LBB0_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN3-NEXT: v_mov_b32_e32 v3, v4 -; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_cbranch_execnz .LBB0_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f32_noret: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -450,69 +391,6 @@ define void @global_atomic_xchg_f32_noret(ptr addrspace(1) %ptr, float %in) { } define void @global_atomic_xchg_f32_noret_offset(ptr addrspace(1) %out, float %in) { -; GCN1-LABEL: global_atomic_xchg_f32_noret_offset: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_add_f32_e32 v0, vcc, 16, v0 -; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: global_load_dword v3, v[0:1] -; GCN1-NEXT: s_mov_b64 s[4:5], 0 -; GCN1-NEXT: .LBB1_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN1-NEXT: v_mov_b32_e32 v3, v4 -; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_cbranch_execnz .LBB1_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f32_noret_offset: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 -; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: global_load_dword v3, v[0:1] -; GCN2-NEXT: s_mov_b64 s[4:5], 0 -; GCN2-NEXT: .LBB1_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN2-NEXT: v_mov_b32_e32 v3, v4 -; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_cbranch_execnz .LBB1_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f32_noret_offset: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_load_dword v3, v[0:1] offset:16 -; GCN3-NEXT: s_mov_b64 s[4:5], 0 -; GCN3-NEXT: .LBB1_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] offset:16 glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN3-NEXT: v_mov_b32_e32 v3, v4 -; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_cbranch_execnz .LBB1_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f32_noret_offset: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -549,71 +427,6 @@ define void @global_atomic_xchg_f32_noret_offset(ptr addrspace(1) %out, float %i } define float @global_atomic_xchg_f32_ret(ptr addrspace(1) %ptr, float %in) { -; GCN1-LABEL: global_atomic_xchg_f32_ret: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_load_dword v4, v[0:1] -; GCN1-NEXT: s_mov_b64 s[4:5], 0 -; GCN1-NEXT: .LBB2_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v3, v4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_cbranch_execnz .LBB2_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN1-NEXT: v_mov_b32_e32 v0, v4 -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f32_ret: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_load_dword v4, v[0:1] -; GCN2-NEXT: s_mov_b64 s[4:5], 0 -; GCN2-NEXT: .LBB2_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v3, v4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_cbranch_execnz .LBB2_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN2-NEXT: v_mov_b32_e32 v0, v4 -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f32_ret: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_load_dword v4, v[0:1] -; GCN3-NEXT: s_mov_b64 s[4:5], 0 -; GCN3-NEXT: .LBB2_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v3, v4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_cbranch_execnz .LBB2_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN3-NEXT: v_mov_b32_e32 v0, v4 -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f32_ret: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -648,73 +461,6 @@ define float @global_atomic_xchg_f32_ret(ptr addrspace(1) %ptr, float %in) { } define float @global_atomic_xchg_f32_ret_offset(ptr addrspace(1) %out, float %in) { -; GCN1-LABEL: global_atomic_xchg_f32_ret_offset: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_add_f32_e32 v4, vcc, 16, v0 -; GCN1-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc -; GCN1-NEXT: global_load_dword v0, v[4:5] -; GCN1-NEXT: s_mov_b64 s[4:5], 0 -; GCN1-NEXT: .LBB3_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v3, v0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[4:5], v[2:3] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_cbranch_execnz .LBB3_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f32_ret_offset: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_add_u32_e32 v4, vcc, 16, v0 -; GCN2-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc -; GCN2-NEXT: global_load_dword v0, v[4:5] -; GCN2-NEXT: s_mov_b64 s[4:5], 0 -; GCN2-NEXT: .LBB3_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v3, v0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[4:5], v[2:3] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_cbranch_execnz .LBB3_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f32_ret_offset: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_load_dword v4, v[0:1] offset:16 -; GCN3-NEXT: s_mov_b64 s[4:5], 0 -; GCN3-NEXT: .LBB3_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v3, v4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] offset:16 glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_cbranch_execnz .LBB3_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN3-NEXT: v_mov_b32_e32 v0, v4 -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f32_ret_offset: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -752,80 +498,6 @@ define float @global_atomic_xchg_f32_ret_offset(ptr addrspace(1) %out, float %in } define amdgpu_gfx void @global_atomic_xchg_f32_noret_scalar(ptr addrspace(1) inreg %ptr, float inreg %in) { -; GCN1-LABEL: global_atomic_xchg_f32_noret_scalar: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v0, s4 -; GCN1-NEXT: v_mov_b32_e32 v1, s5 -; GCN1-NEXT: global_load_dword v1, v[0:1] -; GCN1-NEXT: s_mov_b64 s[34:35], 0 -; GCN1-NEXT: .LBB4_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: v_mov_b32_e32 v0, s6 -; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN1-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN1-NEXT: v_mov_b32_e32 v1, v0 -; GCN1-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN1-NEXT: s_cbranch_execnz .LBB4_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f32_noret_scalar: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v0, s4 -; GCN2-NEXT: v_mov_b32_e32 v1, s5 -; GCN2-NEXT: global_load_dword v1, v[0:1] -; GCN2-NEXT: s_mov_b64 s[34:35], 0 -; GCN2-NEXT: .LBB4_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: v_mov_b32_e32 v0, s6 -; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN2-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN2-NEXT: v_mov_b32_e32 v1, v0 -; GCN2-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN2-NEXT: s_cbranch_execnz .LBB4_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f32_noret_scalar: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v0, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s5 -; GCN3-NEXT: global_load_dword v1, v[0:1] -; GCN3-NEXT: s_mov_b64 s[34:35], 0 -; GCN3-NEXT: .LBB4_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: v_mov_b32_e32 v0, s6 -; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN3-NEXT: v_mov_b32_e32 v1, v0 -; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_cbranch_execnz .LBB4_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f32_noret_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -876,84 +548,6 @@ define amdgpu_gfx void @global_atomic_xchg_f32_noret_scalar(ptr addrspace(1) inr } define amdgpu_gfx void @global_atomic_xchg_f32_noret_offset_scalar(ptr addrspace(1) inreg %out, float inreg %in) { -; GCN1-LABEL: global_atomic_xchg_f32_noret_offset_scalar: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: s_add_u32 s34, s4, 16 -; GCN1-NEXT: s_addc_u32 s35, s5, 0 -; GCN1-NEXT: v_mov_b32_e32 v0, s34 -; GCN1-NEXT: v_mov_b32_e32 v1, s35 -; GCN1-NEXT: global_load_dword v1, v[0:1] -; GCN1-NEXT: s_mov_b64 s[36:37], 0 -; GCN1-NEXT: .LBB5_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: v_mov_b32_e32 v2, s34 -; GCN1-NEXT: v_mov_b32_e32 v0, s6 -; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN1-NEXT: s_or_b64 s[36:37], vcc, s[36:37] -; GCN1-NEXT: v_mov_b32_e32 v1, v0 -; GCN1-NEXT: s_andn2_b64 exec, exec, s[36:37] -; GCN1-NEXT: s_cbranch_execnz .LBB5_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[36:37] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f32_noret_offset_scalar: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: s_add_u32 s34, s4, 16 -; GCN2-NEXT: s_addc_u32 s35, s5, 0 -; GCN2-NEXT: v_mov_b32_e32 v0, s34 -; GCN2-NEXT: v_mov_b32_e32 v1, s35 -; GCN2-NEXT: global_load_dword v1, v[0:1] -; GCN2-NEXT: s_mov_b64 s[36:37], 0 -; GCN2-NEXT: .LBB5_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: v_mov_b32_e32 v2, s34 -; GCN2-NEXT: v_mov_b32_e32 v0, s6 -; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN2-NEXT: s_or_b64 s[36:37], vcc, s[36:37] -; GCN2-NEXT: v_mov_b32_e32 v1, v0 -; GCN2-NEXT: s_andn2_b64 exec, exec, s[36:37] -; GCN2-NEXT: s_cbranch_execnz .LBB5_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[36:37] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f32_noret_offset_scalar: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v0, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s5 -; GCN3-NEXT: global_load_dword v1, v[0:1] offset:16 -; GCN3-NEXT: s_mov_b64 s[34:35], 0 -; GCN3-NEXT: .LBB5_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: v_mov_b32_e32 v0, s6 -; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN3-NEXT: v_mov_b32_e32 v1, v0 -; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_cbranch_execnz .LBB5_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f32_noret_offset_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1007,83 +601,6 @@ define amdgpu_gfx void @global_atomic_xchg_f32_noret_offset_scalar(ptr addrspace } define amdgpu_gfx float @global_atomic_xchg_f32_ret_scalar(ptr addrspace(1) inreg %ptr, float inreg %in) { -; GCN1-LABEL: global_atomic_xchg_f32_ret_scalar: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v0, s4 -; GCN1-NEXT: v_mov_b32_e32 v1, s5 -; GCN1-NEXT: global_load_dword v0, v[0:1] -; GCN1-NEXT: s_mov_b64 s[34:35], 0 -; GCN1-NEXT: .LBB6_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: v_mov_b32_e32 v3, s4 -; GCN1-NEXT: v_mov_b32_e32 v1, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v2, v0 -; GCN1-NEXT: v_mov_b32_e32 v4, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN1-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN1-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN1-NEXT: s_cbranch_execnz .LBB6_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f32_ret_scalar: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v0, s4 -; GCN2-NEXT: v_mov_b32_e32 v1, s5 -; GCN2-NEXT: global_load_dword v0, v[0:1] -; GCN2-NEXT: s_mov_b64 s[34:35], 0 -; GCN2-NEXT: .LBB6_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: v_mov_b32_e32 v3, s4 -; GCN2-NEXT: v_mov_b32_e32 v1, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v2, v0 -; GCN2-NEXT: v_mov_b32_e32 v4, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN2-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN2-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN2-NEXT: s_cbranch_execnz .LBB6_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f32_ret_scalar: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v0, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s5 -; GCN3-NEXT: global_load_dword v0, v[0:1] -; GCN3-NEXT: s_mov_b64 s[34:35], 0 -; GCN3-NEXT: .LBB6_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: v_mov_b32_e32 v3, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v2, v0 -; GCN3-NEXT: v_mov_b32_e32 v4, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_cbranch_execnz .LBB6_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f32_ret_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1134,87 +651,6 @@ define amdgpu_gfx float @global_atomic_xchg_f32_ret_scalar(ptr addrspace(1) inre } define amdgpu_gfx float @global_atomic_xchg_f32_ret_offset_scalar(ptr addrspace(1) inreg %out, float inreg %in) { -; GCN1-LABEL: global_atomic_xchg_f32_ret_offset_scalar: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: s_add_u32 s34, s4, 16 -; GCN1-NEXT: s_addc_u32 s35, s5, 0 -; GCN1-NEXT: v_mov_b32_e32 v0, s34 -; GCN1-NEXT: v_mov_b32_e32 v1, s35 -; GCN1-NEXT: global_load_dword v0, v[0:1] -; GCN1-NEXT: s_mov_b64 s[36:37], 0 -; GCN1-NEXT: .LBB7_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: v_mov_b32_e32 v3, s34 -; GCN1-NEXT: v_mov_b32_e32 v1, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v2, v0 -; GCN1-NEXT: v_mov_b32_e32 v4, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN1-NEXT: s_or_b64 s[36:37], vcc, s[36:37] -; GCN1-NEXT: s_andn2_b64 exec, exec, s[36:37] -; GCN1-NEXT: s_cbranch_execnz .LBB7_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[36:37] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f32_ret_offset_scalar: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: s_add_u32 s34, s4, 16 -; GCN2-NEXT: s_addc_u32 s35, s5, 0 -; GCN2-NEXT: v_mov_b32_e32 v0, s34 -; GCN2-NEXT: v_mov_b32_e32 v1, s35 -; GCN2-NEXT: global_load_dword v0, v[0:1] -; GCN2-NEXT: s_mov_b64 s[36:37], 0 -; GCN2-NEXT: .LBB7_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: v_mov_b32_e32 v3, s34 -; GCN2-NEXT: v_mov_b32_e32 v1, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v2, v0 -; GCN2-NEXT: v_mov_b32_e32 v4, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN2-NEXT: s_or_b64 s[36:37], vcc, s[36:37] -; GCN2-NEXT: s_andn2_b64 exec, exec, s[36:37] -; GCN2-NEXT: s_cbranch_execnz .LBB7_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[36:37] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f32_ret_offset_scalar: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v0, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s5 -; GCN3-NEXT: global_load_dword v0, v[0:1] offset:16 -; GCN3-NEXT: s_mov_b64 s[34:35], 0 -; GCN3-NEXT: .LBB7_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: v_mov_b32_e32 v3, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v2, v0 -; GCN3-NEXT: v_mov_b32_e32 v4, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] offset:16 glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_cbranch_execnz .LBB7_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f32_ret_offset_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll index d137f47..380ce7f 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll @@ -372,65 +372,6 @@ define amdgpu_gfx i64 @global_atomic_xchg_i64_ret_offset_scalar(ptr addrspace(1) ; --------------------------------------------------------------------- define void @global_atomic_xchg_f64_noret(ptr addrspace(1) %ptr, double %in) { -; GCN1-LABEL: global_atomic_xchg_f64_noret: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_load_dword v3, v[0:1] -; GCN1-NEXT: s_mov_b64 s[4:5], 0 -; GCN1-NEXT: .LBB0_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN1-NEXT: v_mov_b32_e32 v3, v4 -; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_cbranch_execnz .LBB0_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f64_noret: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_load_dword v3, v[0:1] -; GCN2-NEXT: s_mov_b64 s[4:5], 0 -; GCN2-NEXT: .LBB0_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN2-NEXT: v_mov_b32_e32 v3, v4 -; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_cbranch_execnz .LBB0_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f64_noret: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_load_dword v3, v[0:1] -; GCN3-NEXT: s_mov_b64 s[4:5], 0 -; GCN3-NEXT: .LBB0_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN3-NEXT: v_mov_b32_e32 v3, v4 -; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_cbranch_execnz .LBB0_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f64_noret: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -464,69 +405,6 @@ define void @global_atomic_xchg_f64_noret(ptr addrspace(1) %ptr, double %in) { } define void @global_atomic_xchg_f64_noret_offset(ptr addrspace(1) %out, double %in) { -; GCN1-LABEL: global_atomic_xchg_f64_noret_offset: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_add_f64_e32 v0, vcc, 16, v0 -; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: global_load_dword v3, v[0:1] -; GCN1-NEXT: s_mov_b64 s[4:5], 0 -; GCN1-NEXT: .LBB1_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN1-NEXT: v_mov_b32_e32 v3, v4 -; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_cbranch_execnz .LBB1_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f64_noret_offset: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 -; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: global_load_dword v3, v[0:1] -; GCN2-NEXT: s_mov_b64 s[4:5], 0 -; GCN2-NEXT: .LBB1_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN2-NEXT: v_mov_b32_e32 v3, v4 -; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_cbranch_execnz .LBB1_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f64_noret_offset: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_load_dword v3, v[0:1] offset:16 -; GCN3-NEXT: s_mov_b64 s[4:5], 0 -; GCN3-NEXT: .LBB1_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] offset:16 glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN3-NEXT: v_mov_b32_e32 v3, v4 -; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_cbranch_execnz .LBB1_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f64_noret_offset: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -563,71 +441,6 @@ define void @global_atomic_xchg_f64_noret_offset(ptr addrspace(1) %out, double % } define double @global_atomic_xchg_f64_ret(ptr addrspace(1) %ptr, double %in) { -; GCN1-LABEL: global_atomic_xchg_f64_ret: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_load_dword v4, v[0:1] -; GCN1-NEXT: s_mov_b64 s[4:5], 0 -; GCN1-NEXT: .LBB2_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v3, v4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_cbranch_execnz .LBB2_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN1-NEXT: v_mov_b32_e32 v0, v4 -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f64_ret: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_load_dword v4, v[0:1] -; GCN2-NEXT: s_mov_b64 s[4:5], 0 -; GCN2-NEXT: .LBB2_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v3, v4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_cbranch_execnz .LBB2_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN2-NEXT: v_mov_b32_e32 v0, v4 -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f64_ret: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_load_dword v4, v[0:1] -; GCN3-NEXT: s_mov_b64 s[4:5], 0 -; GCN3-NEXT: .LBB2_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v3, v4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_cbranch_execnz .LBB2_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN3-NEXT: v_mov_b32_e32 v0, v4 -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f64_ret: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -663,73 +476,6 @@ define double @global_atomic_xchg_f64_ret(ptr addrspace(1) %ptr, double %in) { } define double @global_atomic_xchg_f64_ret_offset(ptr addrspace(1) %out, double %in) { -; GCN1-LABEL: global_atomic_xchg_f64_ret_offset: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_add_f64_e32 v4, vcc, 16, v0 -; GCN1-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc -; GCN1-NEXT: global_load_dword v0, v[4:5] -; GCN1-NEXT: s_mov_b64 s[4:5], 0 -; GCN1-NEXT: .LBB3_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v3, v0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[4:5], v[2:3] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_cbranch_execnz .LBB3_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f64_ret_offset: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_add_u32_e32 v4, vcc, 16, v0 -; GCN2-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc -; GCN2-NEXT: global_load_dword v0, v[4:5] -; GCN2-NEXT: s_mov_b64 s[4:5], 0 -; GCN2-NEXT: .LBB3_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v3, v0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[4:5], v[2:3] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_cbranch_execnz .LBB3_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f64_ret_offset: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_load_dword v4, v[0:1] offset:16 -; GCN3-NEXT: s_mov_b64 s[4:5], 0 -; GCN3-NEXT: .LBB3_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v3, v4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] offset:16 glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GCN3-NEXT: s_cbranch_execnz .LBB3_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN3-NEXT: v_mov_b32_e32 v0, v4 -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f64_ret_offset: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -768,80 +514,6 @@ define double @global_atomic_xchg_f64_ret_offset(ptr addrspace(1) %out, double % } define amdgpu_gfx void @global_atomic_xchg_f64_noret_scalar(ptr addrspace(1) inreg %ptr, double inreg %in) { -; GCN1-LABEL: global_atomic_xchg_f64_noret_scalar: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v0, s4 -; GCN1-NEXT: v_mov_b32_e32 v1, s5 -; GCN1-NEXT: global_load_dword v1, v[0:1] -; GCN1-NEXT: s_mov_b64 s[34:35], 0 -; GCN1-NEXT: .LBB4_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: v_mov_b32_e32 v0, s6 -; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN1-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN1-NEXT: v_mov_b32_e32 v1, v0 -; GCN1-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN1-NEXT: s_cbranch_execnz .LBB4_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f64_noret_scalar: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v0, s4 -; GCN2-NEXT: v_mov_b32_e32 v1, s5 -; GCN2-NEXT: global_load_dword v1, v[0:1] -; GCN2-NEXT: s_mov_b64 s[34:35], 0 -; GCN2-NEXT: .LBB4_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: v_mov_b32_e32 v0, s6 -; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN2-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN2-NEXT: v_mov_b32_e32 v1, v0 -; GCN2-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN2-NEXT: s_cbranch_execnz .LBB4_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f64_noret_scalar: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v0, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s5 -; GCN3-NEXT: global_load_dword v1, v[0:1] -; GCN3-NEXT: s_mov_b64 s[34:35], 0 -; GCN3-NEXT: .LBB4_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: v_mov_b32_e32 v0, s6 -; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN3-NEXT: v_mov_b32_e32 v1, v0 -; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_cbranch_execnz .LBB4_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f64_noret_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -896,84 +568,6 @@ define amdgpu_gfx void @global_atomic_xchg_f64_noret_scalar(ptr addrspace(1) inr } define amdgpu_gfx void @global_atomic_xchg_f64_noret_offset_scalar(ptr addrspace(1) inreg %out, double inreg %in) { -; GCN1-LABEL: global_atomic_xchg_f64_noret_offset_scalar: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: s_add_u32 s34, s4, 16 -; GCN1-NEXT: s_addc_u32 s35, s5, 0 -; GCN1-NEXT: v_mov_b32_e32 v0, s34 -; GCN1-NEXT: v_mov_b32_e32 v1, s35 -; GCN1-NEXT: global_load_dword v1, v[0:1] -; GCN1-NEXT: s_mov_b64 s[36:37], 0 -; GCN1-NEXT: .LBB5_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: v_mov_b32_e32 v2, s34 -; GCN1-NEXT: v_mov_b32_e32 v0, s6 -; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN1-NEXT: s_or_b64 s[36:37], vcc, s[36:37] -; GCN1-NEXT: v_mov_b32_e32 v1, v0 -; GCN1-NEXT: s_andn2_b64 exec, exec, s[36:37] -; GCN1-NEXT: s_cbranch_execnz .LBB5_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[36:37] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f64_noret_offset_scalar: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: s_add_u32 s34, s4, 16 -; GCN2-NEXT: s_addc_u32 s35, s5, 0 -; GCN2-NEXT: v_mov_b32_e32 v0, s34 -; GCN2-NEXT: v_mov_b32_e32 v1, s35 -; GCN2-NEXT: global_load_dword v1, v[0:1] -; GCN2-NEXT: s_mov_b64 s[36:37], 0 -; GCN2-NEXT: .LBB5_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: v_mov_b32_e32 v2, s34 -; GCN2-NEXT: v_mov_b32_e32 v0, s6 -; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN2-NEXT: s_or_b64 s[36:37], vcc, s[36:37] -; GCN2-NEXT: v_mov_b32_e32 v1, v0 -; GCN2-NEXT: s_andn2_b64 exec, exec, s[36:37] -; GCN2-NEXT: s_cbranch_execnz .LBB5_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[36:37] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f64_noret_offset_scalar: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v0, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s5 -; GCN3-NEXT: global_load_dword v1, v[0:1] offset:16 -; GCN3-NEXT: s_mov_b64 s[34:35], 0 -; GCN3-NEXT: .LBB5_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: v_mov_b32_e32 v0, s6 -; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN3-NEXT: v_mov_b32_e32 v1, v0 -; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_cbranch_execnz .LBB5_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f64_noret_offset_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1029,83 +623,6 @@ define amdgpu_gfx void @global_atomic_xchg_f64_noret_offset_scalar(ptr addrspace } define amdgpu_gfx double @global_atomic_xchg_f64_ret_scalar(ptr addrspace(1) inreg %ptr, double inreg %in) { -; GCN1-LABEL: global_atomic_xchg_f64_ret_scalar: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v0, s4 -; GCN1-NEXT: v_mov_b32_e32 v1, s5 -; GCN1-NEXT: global_load_dword v0, v[0:1] -; GCN1-NEXT: s_mov_b64 s[34:35], 0 -; GCN1-NEXT: .LBB6_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: v_mov_b32_e32 v3, s4 -; GCN1-NEXT: v_mov_b32_e32 v1, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v2, v0 -; GCN1-NEXT: v_mov_b32_e32 v4, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN1-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN1-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN1-NEXT: s_cbranch_execnz .LBB6_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f64_ret_scalar: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v0, s4 -; GCN2-NEXT: v_mov_b32_e32 v1, s5 -; GCN2-NEXT: global_load_dword v0, v[0:1] -; GCN2-NEXT: s_mov_b64 s[34:35], 0 -; GCN2-NEXT: .LBB6_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: v_mov_b32_e32 v3, s4 -; GCN2-NEXT: v_mov_b32_e32 v1, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v2, v0 -; GCN2-NEXT: v_mov_b32_e32 v4, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN2-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN2-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN2-NEXT: s_cbranch_execnz .LBB6_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f64_ret_scalar: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v0, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s5 -; GCN3-NEXT: global_load_dword v0, v[0:1] -; GCN3-NEXT: s_mov_b64 s[34:35], 0 -; GCN3-NEXT: .LBB6_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: v_mov_b32_e32 v3, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v2, v0 -; GCN3-NEXT: v_mov_b32_e32 v4, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_cbranch_execnz .LBB6_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f64_ret_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1160,87 +677,6 @@ define amdgpu_gfx double @global_atomic_xchg_f64_ret_scalar(ptr addrspace(1) inr } define amdgpu_gfx double @global_atomic_xchg_f64_ret_offset_scalar(ptr addrspace(1) inreg %out, double inreg %in) { -; GCN1-LABEL: global_atomic_xchg_f64_ret_offset_scalar: -; GCN1: ; %bb.0: -; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN1-NEXT: s_add_u32 s34, s4, 16 -; GCN1-NEXT: s_addc_u32 s35, s5, 0 -; GCN1-NEXT: v_mov_b32_e32 v0, s34 -; GCN1-NEXT: v_mov_b32_e32 v1, s35 -; GCN1-NEXT: global_load_dword v0, v[0:1] -; GCN1-NEXT: s_mov_b64 s[36:37], 0 -; GCN1-NEXT: .LBB7_1: ; %atomicrmw.start -; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN1-NEXT: v_mov_b32_e32 v3, s34 -; GCN1-NEXT: v_mov_b32_e32 v1, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: v_mov_b32_e32 v2, v0 -; GCN1-NEXT: v_mov_b32_e32 v4, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN1-NEXT: buffer_wbinvl1_vol -; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN1-NEXT: s_or_b64 s[36:37], vcc, s[36:37] -; GCN1-NEXT: s_andn2_b64 exec, exec, s[36:37] -; GCN1-NEXT: s_cbranch_execnz .LBB7_1 -; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN1-NEXT: s_or_b64 exec, exec, s[36:37] -; GCN1-NEXT: s_setpc_b64 s[30:31] -; -; GCN2-LABEL: global_atomic_xchg_f64_ret_offset_scalar: -; GCN2: ; %bb.0: -; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN2-NEXT: s_add_u32 s34, s4, 16 -; GCN2-NEXT: s_addc_u32 s35, s5, 0 -; GCN2-NEXT: v_mov_b32_e32 v0, s34 -; GCN2-NEXT: v_mov_b32_e32 v1, s35 -; GCN2-NEXT: global_load_dword v0, v[0:1] -; GCN2-NEXT: s_mov_b64 s[36:37], 0 -; GCN2-NEXT: .LBB7_1: ; %atomicrmw.start -; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN2-NEXT: v_mov_b32_e32 v3, s34 -; GCN2-NEXT: v_mov_b32_e32 v1, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: v_mov_b32_e32 v2, v0 -; GCN2-NEXT: v_mov_b32_e32 v4, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN2-NEXT: buffer_wbinvl1_vol -; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN2-NEXT: s_or_b64 s[36:37], vcc, s[36:37] -; GCN2-NEXT: s_andn2_b64 exec, exec, s[36:37] -; GCN2-NEXT: s_cbranch_execnz .LBB7_1 -; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN2-NEXT: s_or_b64 exec, exec, s[36:37] -; GCN2-NEXT: s_setpc_b64 s[30:31] -; -; GCN3-LABEL: global_atomic_xchg_f64_ret_offset_scalar: -; GCN3: ; %bb.0: -; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v0, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s5 -; GCN3-NEXT: global_load_dword v0, v[0:1] offset:16 -; GCN3-NEXT: s_mov_b64 s[34:35], 0 -; GCN3-NEXT: .LBB7_1: ; %atomicrmw.start -; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN3-NEXT: v_mov_b32_e32 v3, s4 -; GCN3-NEXT: v_mov_b32_e32 v1, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: v_mov_b32_e32 v2, v0 -; GCN3-NEXT: v_mov_b32_e32 v4, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] offset:16 glc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN3-NEXT: buffer_wbinvl1_vol -; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2 -; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35] -; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_cbranch_execnz .LBB7_1 -; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end -; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] -; GCN3-NEXT: s_setpc_b64 s[30:31] ; SI-LABEL: global_atomic_xchg_f64_ret_offset_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll b/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll new file mode 100644 index 0000000..e050240 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +; Check that we perform binary arithmetic in a narrower type where possible, via +; combineBinOpOfZExt or otherwise. + +define <vscale x 8 x i32> @add(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: add: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v8, v12 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %add = add <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %add +} + +define <vscale x 8 x i32> @sub(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: sub: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vwsubu.vv v12, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v8, v12 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %sub = sub <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %sub +} + +define <vscale x 8 x i32> @mul(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: mul: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vv v12, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v8, v12 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %mul = mul <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %mul +} + +define <vscale x 8 x i32> @sdiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: sdiv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v16, v9 +; CHECK-NEXT: vdivu.vv v8, v12, v16 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %sdiv = sdiv <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %sdiv +} + +define <vscale x 8 x i32> @udiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: udiv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v16, v9 +; CHECK-NEXT: vdivu.vv v8, v12, v16 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %udiv = udiv <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %udiv +} + +define <vscale x 8 x i32> @srem(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: srem: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v16, v9 +; CHECK-NEXT: vremu.vv v8, v12, v16 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %srem = srem <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %srem +} + +define <vscale x 8 x i32> @urem(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: urem: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v16, v9 +; CHECK-NEXT: vremu.vv v8, v12, v16 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %urem = urem <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %urem +} + +define <vscale x 8 x i32> @and(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: and: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vand.vv v12, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v8, v12 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %shl = and <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %shl +} + +define <vscale x 8 x i32> @or(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: or: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vor.vv v12, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v8, v12 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %or = or <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %or +} + +define <vscale x 8 x i32> @xor(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { +; CHECK-LABEL: xor: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vxor.vv v12, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v8, v12 +; CHECK-NEXT: ret + %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> + %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> + %xor = xor <vscale x 8 x i32> %a.zext, %b.zext + ret <vscale x 8 x i32> %xor +} diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll new file mode 100644 index 0000000..84936d8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m -mattr=+v -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2 %s + +define <vscale x 1 x i32> @test_vector_std(<vscale x 1 x i32> %va) nounwind { +; SPILL-O2-LABEL: test_vector_std: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + + ret <vscale x 1 x i32> %va +} + +define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee(<vscale x 1 x i32> %va) nounwind { +; SPILL-O2-LABEL: test_vector_callee: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + + ret <vscale x 1 x i32> %va +} diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index a4aef57..571e2df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1187,3 +1187,19 @@ define <vscale x 2 x i32> @vmerge_larger_vl_false_becomes_tail(<vscale x 2 x i32 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %false, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3) ret <vscale x 2 x i32> %b } + +; Test widening pseudos with their TIED variant (passthru same as first op). +define <vscale x 2 x i64> @vpmerge_vwsub.w_tied(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vwsub.w_tied: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vwsub.wv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %vl.zext = zext i32 %vl to i64 + %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %passthru, <vscale x 2 x i32> %y, i64 %vl.zext) + %b = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %passthru, i32 %vl) + ret <vscale x 2 x i64> %b +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir index f28311e..f9b175e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode -run-pass arm-mve-vpt-opts %s -o - | FileCheck %s +# RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode -run-pass arm-mve-vpt-opts -verify-machineinstrs %s -o - | FileCheck %s --- name: vcmp_with_opposite_cond @@ -1021,3 +1021,26 @@ body: | %16:mqpr = MVE_VORR %15, %15, 1, %10, $noreg, undef %16 %17:mqpr = MVE_VORR %16, %16, 1, %11, $noreg, undef %17 ... +--- +name: reuse_kill_flags +alignment: 4 +body: | + bb.0: + ; CHECK-LABEL: name: reuse_kill_flags + ; CHECK: [[t2MOVi:%[0-9]+]]:tgpreven = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vccr = COPY [[t2MOVi]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mqpr = IMPLICIT_DEF + ; CHECK-NEXT: [[MVE_VORR:%[0-9]+]]:mqpr = MVE_VORR [[DEF]], [[DEF]], 1, [[COPY]], $noreg, undef [[MVE_VORR]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mqpr = IMPLICIT_DEF + ; CHECK-NEXT: [[MVE_VORR1:%[0-9]+]]:mqpr = MVE_VORR [[DEF1]], [[DEF1]], 1, killed [[COPY]], $noreg, undef [[MVE_VORR1]] + ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit [[DEF1]] + %0:tgpreven = t2MOVi 0, 14, $noreg, $noreg + %1:vccr = COPY %0:tgpreven + %2:mqpr = IMPLICIT_DEF + %3:mqpr = MVE_VORR %2:mqpr, %2:mqpr, 1, killed %1, $noreg, undef %3 + %4:vccr = COPY %0:tgpreven + %5:mqpr = IMPLICIT_DEF + %6:mqpr = MVE_VORR %5:mqpr, %5:mqpr, 1, killed %4, $noreg, undef %6 + tBX_RET 14 /* CC::al */, $noreg, implicit %5:mqpr + +... diff --git a/llvm/test/CodeGen/X86/combine-pavg.ll b/llvm/test/CodeGen/X86/combine-pavg.ll index 9bb7fec..7a8ddf5 100644 --- a/llvm/test/CodeGen/X86/combine-pavg.ll +++ b/llvm/test/CodeGen/X86/combine-pavg.ll @@ -80,3 +80,33 @@ define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16 %trunc = trunc <16 x i16> %shuffle to <16 x i8> ret <16 x i8> %trunc } + +define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: combine_pavgw_demandedelts: +; SSE: # %bb.0: +; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13] +; SSE-NEXT: pavgw %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; SSE-NEXT: retq +; +; AVX1-LABEL: combine_pavgw_demandedelts: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13] +; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_pavgw_demandedelts: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq + %s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> + %avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1) + %shuffle = shufflevector <8 x i16> %avg, <8 x i16> poison, <8 x i32> zeroinitializer + ret <8 x i16> %shuffle +} + diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll index e3e1cdc..022b25a 100644 --- a/llvm/test/CodeGen/X86/extractelement-load.ll +++ b/llvm/test/CodeGen/X86/extractelement-load.ll @@ -10,20 +10,13 @@ define i32 @t(ptr %val) nounwind { ; X86-SSE2-LABEL: t: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3] -; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: movl 8(%eax), %eax ; X86-SSE2-NEXT: retl ; -; X64-SSSE3-LABEL: t: -; X64-SSSE3: # %bb.0: -; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3] -; X64-SSSE3-NEXT: movd %xmm0, %eax -; X64-SSSE3-NEXT: retq -; -; X64-AVX-LABEL: t: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl 8(%rdi), %eax -; X64-AVX-NEXT: retq +; X64-LABEL: t: +; X64: # %bb.0: +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: retq %tmp2 = load <2 x i64>, ptr %val, align 16 ; <<2 x i64>> [#uses=1] %tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1] %tmp4 = extractelement <4 x i32> %tmp3, i32 2 ; <i32> [#uses=1] @@ -83,11 +76,9 @@ bb: define i64 @t4(ptr %a) { ; X86-SSE2-LABEL: t4: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqa (%eax), %xmm0 -; X86-SSE2-NEXT: movd %xmm0, %eax -; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X86-SSE2-NEXT: movd %xmm0, %edx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl (%ecx), %eax +; X86-SSE2-NEXT: movl 4(%ecx), %edx ; X86-SSE2-NEXT: retl ; ; X64-LABEL: t4: @@ -286,35 +277,25 @@ entry: define i32 @PR85419(ptr %p0) { ; X86-SSE2-LABEL: PR85419: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqa (%eax), %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X86-SSE2-NEXT: movd %xmm1, %ecx -; X86-SSE2-NEXT: xorl %edx, %edx -; X86-SSE2-NEXT: orl (%eax), %ecx -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X86-SSE2-NEXT: movd %xmm0, %eax -; X86-SSE2-NEXT: cmovel %edx, %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl (%ecx), %edx +; X86-SSE2-NEXT: xorl %eax, %eax +; X86-SSE2-NEXT: orl 4(%ecx), %edx +; X86-SSE2-NEXT: je .LBB8_2 +; X86-SSE2-NEXT: # %bb.1: +; X86-SSE2-NEXT: movl 8(%ecx), %eax +; X86-SSE2-NEXT: .LBB8_2: ; X86-SSE2-NEXT: retl ; -; X64-SSSE3-LABEL: PR85419: -; X64-SSSE3: # %bb.0: -; X64-SSSE3-NEXT: xorl %ecx, %ecx -; X64-SSSE3-NEXT: cmpq $0, (%rdi) -; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3] -; X64-SSSE3-NEXT: movd %xmm0, %eax -; X64-SSSE3-NEXT: cmovel %ecx, %eax -; X64-SSSE3-NEXT: retq -; -; X64-AVX-LABEL: PR85419: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: xorl %eax, %eax -; X64-AVX-NEXT: cmpq $0, (%rdi) -; X64-AVX-NEXT: je .LBB8_2 -; X64-AVX-NEXT: # %bb.1: -; X64-AVX-NEXT: movl 8(%rdi), %eax -; X64-AVX-NEXT: .LBB8_2: -; X64-AVX-NEXT: retq +; X64-LABEL: PR85419: +; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq $0, (%rdi) +; X64-NEXT: je .LBB8_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: .LBB8_2: +; X64-NEXT: retq %load = load <2 x i64>, ptr %p0, align 16 %vecext.i = extractelement <2 x i64> %load, i64 0 %cmp = icmp eq i64 %vecext.i, 0 @@ -443,35 +424,35 @@ define i32 @main() nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp ; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: pushl %edi ; X86-SSE2-NEXT: pushl %esi ; X86-SSE2-NEXT: andl $-32, %esp ; X86-SSE2-NEXT: subl $64, %esp -; X86-SSE2-NEXT: movdqa zero, %xmm0 -; X86-SSE2-NEXT: movaps n1+16, %xmm1 -; X86-SSE2-NEXT: movaps n1, %xmm2 -; X86-SSE2-NEXT: movaps %xmm2, zero -; X86-SSE2-NEXT: movaps %xmm1, zero+16 -; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2] -; X86-SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movaps %xmm1, (%esp) -; X86-SSE2-NEXT: movdqa (%esp), %xmm1 -; X86-SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X86-SSE2-NEXT: movd %xmm2, %eax -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X86-SSE2-NEXT: movd %xmm2, %ecx +; X86-SSE2-NEXT: movaps n1+16, %xmm0 +; X86-SSE2-NEXT: movaps n1, %xmm1 +; X86-SSE2-NEXT: movl zero+4, %ecx +; X86-SSE2-NEXT: movl zero+8, %eax +; X86-SSE2-NEXT: movaps %xmm1, zero +; X86-SSE2-NEXT: movaps %xmm0, zero+16 +; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,2,2,2] +; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movaps %xmm0, (%esp) +; X86-SSE2-NEXT: movdqa (%esp), %xmm0 +; X86-SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X86-SSE2-NEXT: movd %xmm1, %esi ; X86-SSE2-NEXT: xorl %edx, %edx -; X86-SSE2-NEXT: divl %ecx -; X86-SSE2-NEXT: movl %eax, %ecx +; X86-SSE2-NEXT: divl %esi +; X86-SSE2-NEXT: movl %eax, %esi ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X86-SSE2-NEXT: movd %xmm0, %eax -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] -; X86-SSE2-NEXT: movd %xmm0, %esi +; X86-SSE2-NEXT: movd %xmm0, %edi +; X86-SSE2-NEXT: movl %ecx, %eax ; X86-SSE2-NEXT: xorl %edx, %edx -; X86-SSE2-NEXT: divl %esi -; X86-SSE2-NEXT: addl %ecx, %eax -; X86-SSE2-NEXT: leal -4(%ebp), %esp +; X86-SSE2-NEXT: divl %edi +; X86-SSE2-NEXT: addl %esi, %eax +; X86-SSE2-NEXT: leal -8(%ebp), %esp ; X86-SSE2-NEXT: popl %esi +; X86-SSE2-NEXT: popl %edi ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl ; @@ -481,31 +462,29 @@ define i32 @main() nounwind { ; X64-SSSE3-NEXT: movq %rsp, %rbp ; X64-SSSE3-NEXT: andq $-32, %rsp ; X64-SSSE3-NEXT: subq $64, %rsp -; X64-SSSE3-NEXT: movdqa zero(%rip), %xmm0 ; X64-SSSE3-NEXT: movq n1@GOTPCREL(%rip), %rax -; X64-SSSE3-NEXT: movaps (%rax), %xmm1 -; X64-SSSE3-NEXT: movaps 16(%rax), %xmm2 -; X64-SSSE3-NEXT: movaps %xmm1, zero(%rip) -; X64-SSSE3-NEXT: movaps %xmm2, zero+16(%rip) -; X64-SSSE3-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2] -; X64-SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: movaps %xmm1, (%rsp) -; X64-SSSE3-NEXT: movdqa (%rsp), %xmm1 -; X64-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 -; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-SSSE3-NEXT: movd %xmm2, %eax -; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-SSSE3-NEXT: movd %xmm2, %ecx +; X64-SSSE3-NEXT: movaps (%rax), %xmm0 +; X64-SSSE3-NEXT: movaps 16(%rax), %xmm1 +; X64-SSSE3-NEXT: movl zero+4(%rip), %ecx +; X64-SSSE3-NEXT: movl zero+8(%rip), %eax +; X64-SSSE3-NEXT: movaps %xmm0, zero(%rip) +; X64-SSSE3-NEXT: movaps %xmm1, zero+16(%rip) +; X64-SSSE3-NEXT: movaps {{.*#+}} xmm0 = [2,2,2,2] +; X64-SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; X64-SSSE3-NEXT: movaps %xmm0, (%rsp) +; X64-SSSE3-NEXT: movdqa (%rsp), %xmm0 +; X64-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-SSSE3-NEXT: movd %xmm1, %esi ; X64-SSSE3-NEXT: xorl %edx, %edx -; X64-SSSE3-NEXT: divl %ecx -; X64-SSSE3-NEXT: movl %eax, %ecx +; X64-SSSE3-NEXT: divl %esi +; X64-SSSE3-NEXT: movl %eax, %esi ; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X64-SSSE3-NEXT: movd %xmm0, %eax -; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] -; X64-SSSE3-NEXT: movd %xmm0, %esi +; X64-SSSE3-NEXT: movd %xmm0, %edi +; X64-SSSE3-NEXT: movl %ecx, %eax ; X64-SSSE3-NEXT: xorl %edx, %edx -; X64-SSSE3-NEXT: divl %esi -; X64-SSSE3-NEXT: addl %ecx, %eax +; X64-SSSE3-NEXT: divl %edi +; X64-SSSE3-NEXT: addl %esi, %eax ; X64-SSSE3-NEXT: movq %rbp, %rsp ; X64-SSSE3-NEXT: popq %rbp ; X64-SSSE3-NEXT: retq diff --git a/llvm/test/CodeGen/X86/huge-stack-offset.ll b/llvm/test/CodeGen/X86/huge-stack-offset.ll index 68dcfa7..e825328 100644 --- a/llvm/test/CodeGen/X86/huge-stack-offset.ll +++ b/llvm/test/CodeGen/X86/huge-stack-offset.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-linux-unknown | FileCheck %s --check-prefix=CHECK-64 -; RUN: llc < %s -mtriple=i386-linux-unknown | FileCheck %s --check-prefix=CHECK-32 +; RUN: llc < %s -mtriple=x86_64-linux-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc < %s -mtriple=i386-linux-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-32 ; Test that a large stack offset uses a single add/sub instruction to ; adjust the stack pointer. diff --git a/llvm/test/CodeGen/X86/huge-stack-offset2.ll b/llvm/test/CodeGen/X86/huge-stack-offset2.ll index 3bf0260..053643eb 100644 --- a/llvm/test/CodeGen/X86/huge-stack-offset2.ll +++ b/llvm/test/CodeGen/X86/huge-stack-offset2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=CHECK +; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s --check-prefix=CHECK ; Test how we handle pathologically large stack frames when RAX is live through ; the prologue and epilogue. diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index 898b34e..6aa0a81 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -12,7 +12,7 @@ ; vXf64 ; -define void @store_v1f64_v1i64(<1 x i64> %trigger, ptr %addr, <1 x double> %val) { +define void @store_v1f64_v1i64(<1 x i64> %trigger, ptr %addr, <1 x double> %val) nounwind { ; SSE-LABEL: store_v1f64_v1i64: ; SSE: ## %bb.0: ; SSE-NEXT: testq %rdi, %rdi @@ -46,7 +46,7 @@ define void @store_v1f64_v1i64(<1 x i64> %trigger, ptr %addr, <1 x double> %val) ret void } -define void @store_v2f64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x double> %val) { +define void @store_v2f64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x double> %val) nounwind { ; SSE-LABEL: store_v2f64_v2i64: ; SSE: ## %bb.0: ; SSE-NEXT: movmskpd %xmm0, %eax @@ -106,7 +106,7 @@ define void @store_v2f64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x double> %val) ret void } -define void @store_v4f64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x double> %val) { +define void @store_v4f64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x double> %val) nounwind { ; SSE2-LABEL: store_v4f64_v4i64: ; SSE2: ## %bb.0: ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] @@ -222,7 +222,7 @@ define void @store_v4f64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x double> %val) ; vXf32 ; -define void @store_v2f32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x float> %val) { +define void @store_v2f32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x float> %val) nounwind { ; SSE2-LABEL: store_v2f32_v2i32: ; SSE2: ## %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] @@ -314,7 +314,7 @@ define void @store_v2f32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x float> %val) ret void } -define void @store_v4f32_v4i32(<4 x float> %x, ptr %ptr, <4 x float> %y, <4 x i32> %mask) { +define void @store_v4f32_v4i32(<4 x float> %x, ptr %ptr, <4 x float> %y, <4 x i32> %mask) nounwind { ; SSE2-LABEL: store_v4f32_v4i32: ; SSE2: ## %bb.0: ; SSE2-NEXT: movmskps %xmm2, %eax @@ -425,7 +425,7 @@ define void @store_v4f32_v4i32(<4 x float> %x, ptr %ptr, <4 x float> %y, <4 x i3 ret void } -define void @store_v8f32_v8i32(<8 x float> %x, ptr %ptr, <8 x float> %y, <8 x i32> %mask) { +define void @store_v8f32_v8i32(<8 x float> %x, ptr %ptr, <8 x float> %y, <8 x i32> %mask) nounwind { ; SSE2-LABEL: store_v8f32_v8i32: ; SSE2: ## %bb.0: ; SSE2-NEXT: packssdw %xmm5, %xmm4 @@ -605,7 +605,7 @@ define void @store_v8f32_v8i32(<8 x float> %x, ptr %ptr, <8 x float> %y, <8 x i3 ret void } -define void @store_v16f32_v16i32(<16 x float> %x, ptr %ptr, <16 x float> %y, <16 x i32> %mask) { +define void @store_v16f32_v16i32(<16 x float> %x, ptr %ptr, <16 x float> %y, <16 x i32> %mask) nounwind { ; SSE2-LABEL: store_v16f32_v16i32: ; SSE2: ## %bb.0: ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm4 @@ -914,7 +914,7 @@ define void @store_v16f32_v16i32(<16 x float> %x, ptr %ptr, <16 x float> %y, <16 ; vXi64 ; -define void @store_v2i64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x i64> %val) { +define void @store_v2i64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x i64> %val) nounwind { ; SSE2-LABEL: store_v2i64_v2i64: ; SSE2: ## %bb.0: ; SSE2-NEXT: movmskpd %xmm0, %eax @@ -998,7 +998,7 @@ define void @store_v2i64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x i64> %val) { ret void } -define void @store_v4i64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x i64> %val) { +define void @store_v4i64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x i64> %val) nounwind { ; SSE2-LABEL: store_v4i64_v4i64: ; SSE2: ## %bb.0: ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] @@ -1122,7 +1122,7 @@ define void @store_v4i64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x i64> %val) { ; vXi32 ; -define void @store_v1i32_v1i32(<1 x i32> %trigger, ptr %addr, <1 x i32> %val) { +define void @store_v1i32_v1i32(<1 x i32> %trigger, ptr %addr, <1 x i32> %val) nounwind { ; SSE-LABEL: store_v1i32_v1i32: ; SSE: ## %bb.0: ; SSE-NEXT: testl %edi, %edi @@ -1156,7 +1156,7 @@ define void @store_v1i32_v1i32(<1 x i32> %trigger, ptr %addr, <1 x i32> %val) { ret void } -define void @store_v2i32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) { +define void @store_v2i32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) nounwind { ; SSE2-LABEL: store_v2i32_v2i32: ; SSE2: ## %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] @@ -1256,7 +1256,7 @@ define void @store_v2i32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) { ret void } -define void @store_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { +define void @store_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) nounwind { ; SSE2-LABEL: store_v4i32_v4i32: ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 @@ -1370,7 +1370,7 @@ define void @store_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { ret void } -define void @store_v8i32_v8i32(<8 x i32> %trigger, ptr %addr, <8 x i32> %val) { +define void @store_v8i32_v8i32(<8 x i32> %trigger, ptr %addr, <8 x i32> %val) nounwind { ; SSE2-LABEL: store_v8i32_v8i32: ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 @@ -1560,7 +1560,7 @@ define void @store_v8i32_v8i32(<8 x i32> %trigger, ptr %addr, <8 x i32> %val) { ; vXi16 ; -define void @store_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %val) { +define void @store_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %val) nounwind { ; SSE2-LABEL: store_v8i16_v8i16: ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 @@ -1907,7 +1907,7 @@ define void @store_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %val) { ret void } -define void @store_v16i16_v16i16(<16 x i16> %trigger, ptr %addr, <16 x i16> %val) { +define void @store_v16i16_v16i16(<16 x i16> %trigger, ptr %addr, <16 x i16> %val) nounwind { ; SSE2-LABEL: store_v16i16_v16i16: ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 @@ -2676,7 +2676,7 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, ptr %addr, <16 x i16> %val ; vXi8 ; -define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) { +define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) nounwind { ; SSE2-LABEL: store_v16i8_v16i8: ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 @@ -3273,7 +3273,7 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) { ret void } -define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) { +define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) nounwind { ; SSE2-LABEL: store_v32i8_v32i8: ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 @@ -4670,7 +4670,7 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) { ;;; Stores with Constant Masks -define void @mstore_constmask_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { +define void @mstore_constmask_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) nounwind { ; SSE-LABEL: mstore_constmask_v4i32_v4i32: ; SSE: ## %bb.0: ; SSE-NEXT: movups %xmm1, (%rdi) @@ -4693,7 +4693,7 @@ define void @mstore_constmask_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i3 ; Make sure we are able to detect all ones constant mask after type legalization ; to avoid masked stores. -define void @mstore_constmask_allones_split(<16 x i64> %trigger, ptr %addr, <16 x i64> %val) { +define void @mstore_constmask_allones_split(<16 x i64> %trigger, ptr %addr, <16 x i64> %val) nounwind { ; SSE2-LABEL: mstore_constmask_allones_split: ; SSE2: ## %bb.0: ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0 @@ -4810,7 +4810,7 @@ define void @mstore_constmask_allones_split(<16 x i64> %trigger, ptr %addr, <16 ; When only one element of the mask is set, reduce to a scalar store. -define void @one_mask_bit_set1(ptr %addr, <4 x i32> %val) { +define void @one_mask_bit_set1(ptr %addr, <4 x i32> %val) nounwind { ; SSE-LABEL: one_mask_bit_set1: ; SSE: ## %bb.0: ; SSE-NEXT: movss %xmm0, (%rdi) @@ -4832,7 +4832,7 @@ define void @one_mask_bit_set1(ptr %addr, <4 x i32> %val) { ; Choose a different element to show that the correct address offset is produced. -define void @one_mask_bit_set2(ptr %addr, <4 x float> %val) { +define void @one_mask_bit_set2(ptr %addr, <4 x float> %val) nounwind { ; SSE2-LABEL: one_mask_bit_set2: ; SSE2: ## %bb.0: ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] @@ -4860,7 +4860,7 @@ define void @one_mask_bit_set2(ptr %addr, <4 x float> %val) { ; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly. -define void @one_mask_bit_set3(ptr %addr, <4 x i64> %val) { +define void @one_mask_bit_set3(ptr %addr, <4 x i64> %val) nounwind { ; SSE-LABEL: one_mask_bit_set3: ; SSE: ## %bb.0: ; SSE-NEXT: movlps %xmm1, 16(%rdi) @@ -4886,7 +4886,7 @@ define void @one_mask_bit_set3(ptr %addr, <4 x i64> %val) { ; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly. -define void @one_mask_bit_set4(ptr %addr, <4 x double> %val) { +define void @one_mask_bit_set4(ptr %addr, <4 x double> %val) nounwind { ; SSE-LABEL: one_mask_bit_set4: ; SSE: ## %bb.0: ; SSE-NEXT: movhps %xmm1, 24(%rdi) @@ -4912,7 +4912,7 @@ define void @one_mask_bit_set4(ptr %addr, <4 x double> %val) { ; Try a 512-bit vector to make sure AVX doesn't die and AVX512 works as expected. -define void @one_mask_bit_set5(ptr %addr, <8 x double> %val) { +define void @one_mask_bit_set5(ptr %addr, <8 x double> %val) nounwind { ; SSE-LABEL: one_mask_bit_set5: ; SSE: ## %bb.0: ; SSE-NEXT: movlps %xmm3, 48(%rdi) @@ -4944,7 +4944,7 @@ define void @one_mask_bit_set5(ptr %addr, <8 x double> %val) { } ; Try one elt in each half of a vector that needs to split -define void @one_mask_bit_set6(ptr %addr, <16 x i64> %val) { +define void @one_mask_bit_set6(ptr %addr, <16 x i64> %val) nounwind { ; SSE2-LABEL: one_mask_bit_set6: ; SSE2: ## %bb.0: ; SSE2-NEXT: movlps %xmm3, 48(%rdi) @@ -4999,7 +4999,7 @@ define void @one_mask_bit_set6(ptr %addr, <16 x i64> %val) { ret void } -define void @top_bits_unset_stack() { +define void @top_bits_unset_stack() nounwind { ; SSE-LABEL: top_bits_unset_stack: ; SSE: ## %bb.0: ## %entry ; SSE-NEXT: xorps %xmm0, %xmm0 @@ -5047,7 +5047,6 @@ define void @top_bits_unset_stack() { ; X86-AVX512-LABEL: top_bits_unset_stack: ; X86-AVX512: ## %bb.0: ## %entry ; X86-AVX512-NEXT: subl $76, %esp -; X86-AVX512-NEXT: .cfi_def_cfa_offset 80 ; X86-AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; X86-AVX512-NEXT: movb $63, %al ; X86-AVX512-NEXT: kmovd %eax, %k1 @@ -5064,7 +5063,7 @@ entry: ; SimplifyDemandedBits eliminates an ashr here. -define void @masked_store_bool_mask_demand_trunc_sext(<4 x double> %x, ptr %p, <4 x i32> %masksrc) { +define void @masked_store_bool_mask_demand_trunc_sext(<4 x double> %x, ptr %p, <4 x i32> %masksrc) nounwind { ; SSE-LABEL: masked_store_bool_mask_demand_trunc_sext: ; SSE: ## %bb.0: ; SSE-NEXT: pslld $31, %xmm2 @@ -5160,7 +5159,7 @@ define void @masked_store_bool_mask_demand_trunc_sext(<4 x double> %x, ptr %p, < ; PR26697 -define void @one_mask_bit_set1_variable(ptr %addr, <4 x float> %val, <4 x i32> %mask) { +define void @one_mask_bit_set1_variable(ptr %addr, <4 x float> %val, <4 x i32> %mask) nounwind { ; SSE2-LABEL: one_mask_bit_set1_variable: ; SSE2: ## %bb.0: ; SSE2-NEXT: movmskps %xmm1, %eax @@ -5267,7 +5266,7 @@ define void @one_mask_bit_set1_variable(ptr %addr, <4 x float> %val, <4 x i32> % ; This needs to be widened to v4i32. ; This used to assert in type legalization. PR38436 ; FIXME: The codegen for AVX512 should use KSHIFT to zero the upper bits of the mask. -define void @widen_masked_store(<3 x i32> %v, ptr %p, <3 x i1> %mask) { +define void @widen_masked_store(<3 x i32> %v, ptr %p, <3 x i1> %mask) nounwind { ; SSE2-LABEL: widen_masked_store: ; SSE2: ## %bb.0: ; SSE2-NEXT: andb $1, %sil @@ -5448,7 +5447,7 @@ define void @widen_masked_store(<3 x i32> %v, ptr %p, <3 x i1> %mask) { ret void } -define void @zero_mask(ptr %addr, <2 x double> %val) { +define void @zero_mask(ptr %addr, <2 x double> %val) nounwind { ; SSE-LABEL: zero_mask: ; SSE: ## %bb.0: ; SSE-NEXT: retq @@ -5464,7 +5463,7 @@ define void @zero_mask(ptr %addr, <2 x double> %val) { ret void } -define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask) { +define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask) nounwind { ; SSE2-LABEL: PR11210: ; SSE2: ## %bb.0: ; SSE2-NEXT: movmskps %xmm2, %eax @@ -5638,492 +5637,248 @@ define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask) ret void } -define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigger.ptr, ptr %val.ptr, ptr %dst) { -; SSE2-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts: -; SSE2: ## %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm6 -; SSE2-NEXT: movdqa 32(%rdi), %xmm7 -; SSE2-NEXT: movdqa 64(%rdi), %xmm8 -; SSE2-NEXT: movl 80(%rsi), %eax -; SSE2-NEXT: movl 64(%rsi), %r8d -; SSE2-NEXT: movl 48(%rsi), %r9d -; SSE2-NEXT: movl 32(%rsi), %r10d -; SSE2-NEXT: movl 16(%rsi), %r11d -; SSE2-NEXT: movdqa 80(%rsi), %xmm0 -; SSE2-NEXT: movdqa 64(%rsi), %xmm1 -; SSE2-NEXT: movdqa 48(%rsi), %xmm2 -; SSE2-NEXT: movdqa 32(%rsi), %xmm3 -; SSE2-NEXT: movdqa 16(%rsi), %xmm4 -; SSE2-NEXT: movdqa (%rsi), %xmm5 -; SSE2-NEXT: packssdw 48(%rdi), %xmm7 -; SSE2-NEXT: packssdw 16(%rdi), %xmm6 -; SSE2-NEXT: packsswb %xmm7, %xmm6 -; SSE2-NEXT: packssdw 80(%rdi), %xmm8 -; SSE2-NEXT: packsswb %xmm8, %xmm8 -; SSE2-NEXT: pmovmskb %xmm6, %edi -; SSE2-NEXT: andl $21845, %edi ## imm = 0x5555 -; SSE2-NEXT: pmovmskb %xmm8, %ecx -; SSE2-NEXT: andl $85, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %edi, %ecx -; SSE2-NEXT: testb $1, %cl -; SSE2-NEXT: jne LBB31_1 -; SSE2-NEXT: ## %bb.2: ## %else -; SSE2-NEXT: testb $2, %cl -; SSE2-NEXT: jne LBB31_3 -; SSE2-NEXT: LBB31_4: ## %else2 -; SSE2-NEXT: testb $4, %cl -; SSE2-NEXT: jne LBB31_5 -; SSE2-NEXT: LBB31_6: ## %else4 -; SSE2-NEXT: testb $8, %cl -; SSE2-NEXT: jne LBB31_7 -; SSE2-NEXT: LBB31_8: ## %else6 -; SSE2-NEXT: testb $16, %cl -; SSE2-NEXT: jne LBB31_9 -; SSE2-NEXT: LBB31_10: ## %else8 -; SSE2-NEXT: testb $32, %cl -; SSE2-NEXT: jne LBB31_11 -; SSE2-NEXT: LBB31_12: ## %else10 -; SSE2-NEXT: testb $64, %cl -; SSE2-NEXT: jne LBB31_13 -; SSE2-NEXT: LBB31_14: ## %else12 -; SSE2-NEXT: testb %cl, %cl -; SSE2-NEXT: js LBB31_15 -; SSE2-NEXT: LBB31_16: ## %else14 -; SSE2-NEXT: testl $256, %ecx ## imm = 0x100 -; SSE2-NEXT: jne LBB31_17 -; SSE2-NEXT: LBB31_18: ## %else16 -; SSE2-NEXT: testl $512, %ecx ## imm = 0x200 -; SSE2-NEXT: jne LBB31_19 -; SSE2-NEXT: LBB31_20: ## %else18 -; SSE2-NEXT: testl $1024, %ecx ## imm = 0x400 -; SSE2-NEXT: jne LBB31_21 -; SSE2-NEXT: LBB31_22: ## %else20 -; SSE2-NEXT: testl $2048, %ecx ## imm = 0x800 -; SSE2-NEXT: jne LBB31_23 -; SSE2-NEXT: LBB31_24: ## %else22 -; SSE2-NEXT: testl $4096, %ecx ## imm = 0x1000 -; SSE2-NEXT: jne LBB31_25 -; SSE2-NEXT: LBB31_26: ## %else24 -; SSE2-NEXT: testl $8192, %ecx ## imm = 0x2000 -; SSE2-NEXT: jne LBB31_27 -; SSE2-NEXT: LBB31_28: ## %else26 -; SSE2-NEXT: testl $16384, %ecx ## imm = 0x4000 -; SSE2-NEXT: jne LBB31_29 -; SSE2-NEXT: LBB31_30: ## %else28 -; SSE2-NEXT: testw %cx, %cx -; SSE2-NEXT: js LBB31_31 -; SSE2-NEXT: LBB31_32: ## %else30 -; SSE2-NEXT: testl $65536, %ecx ## imm = 0x10000 -; SSE2-NEXT: jne LBB31_33 -; SSE2-NEXT: LBB31_34: ## %else32 -; SSE2-NEXT: testl $131072, %ecx ## imm = 0x20000 -; SSE2-NEXT: jne LBB31_35 -; SSE2-NEXT: LBB31_36: ## %else34 -; SSE2-NEXT: testl $262144, %ecx ## imm = 0x40000 -; SSE2-NEXT: jne LBB31_37 -; SSE2-NEXT: LBB31_38: ## %else36 -; SSE2-NEXT: testl $524288, %ecx ## imm = 0x80000 -; SSE2-NEXT: jne LBB31_39 -; SSE2-NEXT: LBB31_40: ## %else38 -; SSE2-NEXT: testl $1048576, %ecx ## imm = 0x100000 -; SSE2-NEXT: jne LBB31_41 -; SSE2-NEXT: LBB31_42: ## %else40 -; SSE2-NEXT: testl $2097152, %ecx ## imm = 0x200000 -; SSE2-NEXT: jne LBB31_43 -; SSE2-NEXT: LBB31_44: ## %else42 -; SSE2-NEXT: testl $4194304, %ecx ## imm = 0x400000 -; SSE2-NEXT: je LBB31_46 -; SSE2-NEXT: LBB31_45: ## %cond.store43 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: movl %eax, 88(%rdx) -; SSE2-NEXT: LBB31_46: ## %else44 -; SSE2-NEXT: movb $1, %al -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: jne LBB31_48 -; SSE2-NEXT: ## %bb.47: ## %cond.store45 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movl %eax, 92(%rdx) -; SSE2-NEXT: LBB31_48: ## %else46 -; SSE2-NEXT: retq -; SSE2-NEXT: LBB31_1: ## %cond.store -; SSE2-NEXT: movl (%rsi), %esi -; SSE2-NEXT: movl %esi, (%rdx) -; SSE2-NEXT: testb $2, %cl -; SSE2-NEXT: je LBB31_4 -; SSE2-NEXT: LBB31_3: ## %cond.store1 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,1,1] -; SSE2-NEXT: movd %xmm6, %esi -; SSE2-NEXT: movl %esi, 4(%rdx) -; SSE2-NEXT: testb $4, %cl -; SSE2-NEXT: je LBB31_6 -; SSE2-NEXT: LBB31_5: ## %cond.store3 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[2,3,2,3] -; SSE2-NEXT: movd %xmm6, %esi -; SSE2-NEXT: movl %esi, 8(%rdx) -; SSE2-NEXT: testb $8, %cl -; SSE2-NEXT: je LBB31_8 -; SSE2-NEXT: LBB31_7: ## %cond.store5 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[3,3,3,3] -; SSE2-NEXT: movd %xmm5, %esi -; SSE2-NEXT: movl %esi, 12(%rdx) -; SSE2-NEXT: testb $16, %cl -; SSE2-NEXT: je LBB31_10 -; SSE2-NEXT: LBB31_9: ## %cond.store7 -; SSE2-NEXT: movl %r11d, 16(%rdx) -; SSE2-NEXT: testb $32, %cl -; SSE2-NEXT: je LBB31_12 -; SSE2-NEXT: LBB31_11: ## %cond.store9 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,1,1] -; SSE2-NEXT: movd %xmm5, %esi -; SSE2-NEXT: movl %esi, 20(%rdx) -; SSE2-NEXT: testb $64, %cl -; SSE2-NEXT: je LBB31_14 -; SSE2-NEXT: LBB31_13: ## %cond.store11 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3] -; SSE2-NEXT: movd %xmm5, %esi -; SSE2-NEXT: movl %esi, 24(%rdx) -; SSE2-NEXT: testb %cl, %cl -; SSE2-NEXT: jns LBB31_16 -; SSE2-NEXT: LBB31_15: ## %cond.store13 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[3,3,3,3] -; SSE2-NEXT: movd %xmm4, %esi -; SSE2-NEXT: movl %esi, 28(%rdx) -; SSE2-NEXT: testl $256, %ecx ## imm = 0x100 -; SSE2-NEXT: je LBB31_18 -; SSE2-NEXT: LBB31_17: ## %cond.store15 -; SSE2-NEXT: movl %r10d, 32(%rdx) -; SSE2-NEXT: testl $512, %ecx ## imm = 0x200 -; SSE2-NEXT: je LBB31_20 -; SSE2-NEXT: LBB31_19: ## %cond.store17 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,1,1] -; SSE2-NEXT: movd %xmm4, %esi -; SSE2-NEXT: movl %esi, 36(%rdx) -; SSE2-NEXT: testl $1024, %ecx ## imm = 0x400 -; SSE2-NEXT: je LBB31_22 -; SSE2-NEXT: LBB31_21: ## %cond.store19 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] -; SSE2-NEXT: movd %xmm4, %esi -; SSE2-NEXT: movl %esi, 40(%rdx) -; SSE2-NEXT: testl $2048, %ecx ## imm = 0x800 -; SSE2-NEXT: je LBB31_24 -; SSE2-NEXT: LBB31_23: ## %cond.store21 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] -; SSE2-NEXT: movd %xmm3, %esi -; SSE2-NEXT: movl %esi, 44(%rdx) -; SSE2-NEXT: testl $4096, %ecx ## imm = 0x1000 -; SSE2-NEXT: je LBB31_26 -; SSE2-NEXT: LBB31_25: ## %cond.store23 -; SSE2-NEXT: movl %r9d, 48(%rdx) -; SSE2-NEXT: testl $8192, %ecx ## imm = 0x2000 -; SSE2-NEXT: je LBB31_28 -; SSE2-NEXT: LBB31_27: ## %cond.store25 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,1,1] -; SSE2-NEXT: movd %xmm3, %esi -; SSE2-NEXT: movl %esi, 52(%rdx) -; SSE2-NEXT: testl $16384, %ecx ## imm = 0x4000 -; SSE2-NEXT: je LBB31_30 -; SSE2-NEXT: LBB31_29: ## %cond.store27 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] -; SSE2-NEXT: movd %xmm3, %esi -; SSE2-NEXT: movl %esi, 56(%rdx) -; SSE2-NEXT: testw %cx, %cx -; SSE2-NEXT: jns LBB31_32 -; SSE2-NEXT: LBB31_31: ## %cond.store29 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[3,3,3,3] -; SSE2-NEXT: movd %xmm2, %esi -; SSE2-NEXT: movl %esi, 60(%rdx) -; SSE2-NEXT: testl $65536, %ecx ## imm = 0x10000 -; SSE2-NEXT: je LBB31_34 -; SSE2-NEXT: LBB31_33: ## %cond.store31 -; SSE2-NEXT: movl %r8d, 64(%rdx) -; SSE2-NEXT: testl $131072, %ecx ## imm = 0x20000 -; SSE2-NEXT: je LBB31_36 -; SSE2-NEXT: LBB31_35: ## %cond.store33 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; SSE2-NEXT: movd %xmm2, %esi -; SSE2-NEXT: movl %esi, 68(%rdx) -; SSE2-NEXT: testl $262144, %ecx ## imm = 0x40000 -; SSE2-NEXT: je LBB31_38 -; SSE2-NEXT: LBB31_37: ## %cond.store35 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; SSE2-NEXT: movd %xmm2, %esi -; SSE2-NEXT: movl %esi, 72(%rdx) -; SSE2-NEXT: testl $524288, %ecx ## imm = 0x80000 -; SSE2-NEXT: je LBB31_40 -; SSE2-NEXT: LBB31_39: ## %cond.store37 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; SSE2-NEXT: movd %xmm1, %esi -; SSE2-NEXT: movl %esi, 76(%rdx) -; SSE2-NEXT: testl $1048576, %ecx ## imm = 0x100000 -; SSE2-NEXT: je LBB31_42 -; SSE2-NEXT: LBB31_41: ## %cond.store39 -; SSE2-NEXT: movl %eax, 80(%rdx) -; SSE2-NEXT: testl $2097152, %ecx ## imm = 0x200000 -; SSE2-NEXT: je LBB31_44 -; SSE2-NEXT: LBB31_43: ## %cond.store41 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: movl %eax, 84(%rdx) -; SSE2-NEXT: testl $4194304, %ecx ## imm = 0x400000 -; SSE2-NEXT: jne LBB31_45 -; SSE2-NEXT: jmp LBB31_46 -; -; SSE4-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts: -; SSE4: ## %bb.0: -; SSE4-NEXT: pushq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 16 -; SSE4-NEXT: pushq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 24 -; SSE4-NEXT: pushq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 32 -; SSE4-NEXT: pushq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 40 -; SSE4-NEXT: pushq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 48 -; SSE4-NEXT: pushq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 56 -; SSE4-NEXT: .cfi_offset %rbx, -56 -; SSE4-NEXT: .cfi_offset %r12, -48 -; SSE4-NEXT: .cfi_offset %r13, -40 -; SSE4-NEXT: .cfi_offset %r14, -32 -; SSE4-NEXT: .cfi_offset %r15, -24 -; SSE4-NEXT: .cfi_offset %rbp, -16 -; SSE4-NEXT: movdqa (%rdi), %xmm1 -; SSE4-NEXT: movdqa 32(%rdi), %xmm2 -; SSE4-NEXT: movdqa 64(%rdi), %xmm0 -; SSE4-NEXT: movl 92(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 88(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 84(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 80(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 76(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 72(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 68(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 64(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 60(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 56(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: movl 52(%rsi), %eax -; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; SSE4-NEXT: packssdw 48(%rdi), %xmm2 -; SSE4-NEXT: packssdw 16(%rdi), %xmm1 -; SSE4-NEXT: packsswb %xmm2, %xmm1 -; SSE4-NEXT: packssdw 80(%rdi), %xmm0 -; SSE4-NEXT: packsswb %xmm0, %xmm0 -; SSE4-NEXT: pmovmskb %xmm1, %eax -; SSE4-NEXT: andl $21845, %eax ## imm = 0x5555 -; SSE4-NEXT: pmovmskb %xmm0, %edi -; SSE4-NEXT: andl $85, %edi -; SSE4-NEXT: shll $16, %edi -; SSE4-NEXT: orl %eax, %edi -; SSE4-NEXT: movl 48(%rsi), %r13d -; SSE4-NEXT: testb $1, %dil -; SSE4-NEXT: movl 44(%rsi), %eax -; SSE4-NEXT: movl 40(%rsi), %ecx -; SSE4-NEXT: movl 36(%rsi), %r8d -; SSE4-NEXT: movl 32(%rsi), %r9d -; SSE4-NEXT: movl 28(%rsi), %r10d -; SSE4-NEXT: movl 24(%rsi), %r11d -; SSE4-NEXT: movl 20(%rsi), %ebx -; SSE4-NEXT: movl 16(%rsi), %ebp -; SSE4-NEXT: movl 12(%rsi), %r14d -; SSE4-NEXT: movl 8(%rsi), %r15d -; SSE4-NEXT: movl 4(%rsi), %r12d -; SSE4-NEXT: jne LBB31_1 -; SSE4-NEXT: ## %bb.2: ## %else -; SSE4-NEXT: testb $2, %dil -; SSE4-NEXT: jne LBB31_3 -; SSE4-NEXT: LBB31_4: ## %else2 -; SSE4-NEXT: testb $4, %dil -; SSE4-NEXT: jne LBB31_5 -; SSE4-NEXT: LBB31_6: ## %else4 -; SSE4-NEXT: testb $8, %dil -; SSE4-NEXT: jne LBB31_7 -; SSE4-NEXT: LBB31_8: ## %else6 -; SSE4-NEXT: testb $16, %dil -; SSE4-NEXT: jne LBB31_9 -; SSE4-NEXT: LBB31_10: ## %else8 -; SSE4-NEXT: testb $32, %dil -; SSE4-NEXT: jne LBB31_11 -; SSE4-NEXT: LBB31_12: ## %else10 -; SSE4-NEXT: testb $64, %dil -; SSE4-NEXT: jne LBB31_13 -; SSE4-NEXT: LBB31_14: ## %else12 -; SSE4-NEXT: testb %dil, %dil -; SSE4-NEXT: js LBB31_15 -; SSE4-NEXT: LBB31_16: ## %else14 -; SSE4-NEXT: testl $256, %edi ## imm = 0x100 -; SSE4-NEXT: jne LBB31_17 -; SSE4-NEXT: LBB31_18: ## %else16 -; SSE4-NEXT: testl $512, %edi ## imm = 0x200 -; SSE4-NEXT: jne LBB31_19 -; SSE4-NEXT: LBB31_20: ## %else18 -; SSE4-NEXT: testl $1024, %edi ## imm = 0x400 -; SSE4-NEXT: jne LBB31_21 -; SSE4-NEXT: LBB31_22: ## %else20 -; SSE4-NEXT: testl $2048, %edi ## imm = 0x800 -; SSE4-NEXT: jne LBB31_23 -; SSE4-NEXT: LBB31_24: ## %else22 -; SSE4-NEXT: testl $4096, %edi ## imm = 0x1000 -; SSE4-NEXT: jne LBB31_25 -; SSE4-NEXT: LBB31_26: ## %else24 -; SSE4-NEXT: testl $8192, %edi ## imm = 0x2000 -; SSE4-NEXT: jne LBB31_27 -; SSE4-NEXT: LBB31_28: ## %else26 -; SSE4-NEXT: testl $16384, %edi ## imm = 0x4000 -; SSE4-NEXT: jne LBB31_29 -; SSE4-NEXT: LBB31_30: ## %else28 -; SSE4-NEXT: testw %di, %di -; SSE4-NEXT: js LBB31_31 -; SSE4-NEXT: LBB31_32: ## %else30 -; SSE4-NEXT: testl $65536, %edi ## imm = 0x10000 -; SSE4-NEXT: jne LBB31_33 -; SSE4-NEXT: LBB31_34: ## %else32 -; SSE4-NEXT: testl $131072, %edi ## imm = 0x20000 -; SSE4-NEXT: jne LBB31_35 -; SSE4-NEXT: LBB31_36: ## %else34 -; SSE4-NEXT: testl $262144, %edi ## imm = 0x40000 -; SSE4-NEXT: jne LBB31_37 -; SSE4-NEXT: LBB31_38: ## %else36 -; SSE4-NEXT: testl $524288, %edi ## imm = 0x80000 -; SSE4-NEXT: jne LBB31_39 -; SSE4-NEXT: LBB31_40: ## %else38 -; SSE4-NEXT: testl $1048576, %edi ## imm = 0x100000 -; SSE4-NEXT: jne LBB31_41 -; SSE4-NEXT: LBB31_42: ## %else40 -; SSE4-NEXT: testl $2097152, %edi ## imm = 0x200000 -; SSE4-NEXT: jne LBB31_43 -; SSE4-NEXT: LBB31_44: ## %else42 -; SSE4-NEXT: testl $4194304, %edi ## imm = 0x400000 -; SSE4-NEXT: je LBB31_46 -; SSE4-NEXT: LBB31_45: ## %cond.store43 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 88(%rdx) -; SSE4-NEXT: LBB31_46: ## %else44 -; SSE4-NEXT: movb $1, %al -; SSE4-NEXT: testb %al, %al -; SSE4-NEXT: jne LBB31_48 -; SSE4-NEXT: ## %bb.47: ## %cond.store45 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 92(%rdx) -; SSE4-NEXT: LBB31_48: ## %else46 -; SSE4-NEXT: popq %rbx -; SSE4-NEXT: popq %r12 -; SSE4-NEXT: popq %r13 -; SSE4-NEXT: popq %r14 -; SSE4-NEXT: popq %r15 -; SSE4-NEXT: popq %rbp -; SSE4-NEXT: retq -; SSE4-NEXT: LBB31_1: ## %cond.store -; SSE4-NEXT: movl (%rsi), %esi -; SSE4-NEXT: movl %esi, (%rdx) -; SSE4-NEXT: testb $2, %dil -; SSE4-NEXT: je LBB31_4 -; SSE4-NEXT: LBB31_3: ## %cond.store1 -; SSE4-NEXT: movl %r12d, 4(%rdx) -; SSE4-NEXT: testb $4, %dil -; SSE4-NEXT: je LBB31_6 -; SSE4-NEXT: LBB31_5: ## %cond.store3 -; SSE4-NEXT: movl %r15d, 8(%rdx) -; SSE4-NEXT: testb $8, %dil -; SSE4-NEXT: je LBB31_8 -; SSE4-NEXT: LBB31_7: ## %cond.store5 -; SSE4-NEXT: movl %r14d, 12(%rdx) -; SSE4-NEXT: testb $16, %dil -; SSE4-NEXT: je LBB31_10 -; SSE4-NEXT: LBB31_9: ## %cond.store7 -; SSE4-NEXT: movl %ebp, 16(%rdx) -; SSE4-NEXT: testb $32, %dil -; SSE4-NEXT: je LBB31_12 -; SSE4-NEXT: LBB31_11: ## %cond.store9 -; SSE4-NEXT: movl %ebx, 20(%rdx) -; SSE4-NEXT: testb $64, %dil -; SSE4-NEXT: je LBB31_14 -; SSE4-NEXT: LBB31_13: ## %cond.store11 -; SSE4-NEXT: movl %r11d, 24(%rdx) -; SSE4-NEXT: testb %dil, %dil -; SSE4-NEXT: jns LBB31_16 -; SSE4-NEXT: LBB31_15: ## %cond.store13 -; SSE4-NEXT: movl %r10d, 28(%rdx) -; SSE4-NEXT: testl $256, %edi ## imm = 0x100 -; SSE4-NEXT: je LBB31_18 -; SSE4-NEXT: LBB31_17: ## %cond.store15 -; SSE4-NEXT: movl %r9d, 32(%rdx) -; SSE4-NEXT: testl $512, %edi ## imm = 0x200 -; SSE4-NEXT: je LBB31_20 -; SSE4-NEXT: LBB31_19: ## %cond.store17 -; SSE4-NEXT: movl %r8d, 36(%rdx) -; SSE4-NEXT: testl $1024, %edi ## imm = 0x400 -; SSE4-NEXT: je LBB31_22 -; SSE4-NEXT: LBB31_21: ## %cond.store19 -; SSE4-NEXT: movl %ecx, 40(%rdx) -; SSE4-NEXT: testl $2048, %edi ## imm = 0x800 -; SSE4-NEXT: je LBB31_24 -; SSE4-NEXT: LBB31_23: ## %cond.store21 -; SSE4-NEXT: movl %eax, 44(%rdx) -; SSE4-NEXT: testl $4096, %edi ## imm = 0x1000 -; SSE4-NEXT: je LBB31_26 -; SSE4-NEXT: LBB31_25: ## %cond.store23 -; SSE4-NEXT: movl %r13d, 48(%rdx) -; SSE4-NEXT: testl $8192, %edi ## imm = 0x2000 -; SSE4-NEXT: je LBB31_28 -; SSE4-NEXT: LBB31_27: ## %cond.store25 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 52(%rdx) -; SSE4-NEXT: testl $16384, %edi ## imm = 0x4000 -; SSE4-NEXT: je LBB31_30 -; SSE4-NEXT: LBB31_29: ## %cond.store27 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 56(%rdx) -; SSE4-NEXT: testw %di, %di -; SSE4-NEXT: jns LBB31_32 -; SSE4-NEXT: LBB31_31: ## %cond.store29 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 60(%rdx) -; SSE4-NEXT: testl $65536, %edi ## imm = 0x10000 -; SSE4-NEXT: je LBB31_34 -; SSE4-NEXT: LBB31_33: ## %cond.store31 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 64(%rdx) -; SSE4-NEXT: testl $131072, %edi ## imm = 0x20000 -; SSE4-NEXT: je LBB31_36 -; SSE4-NEXT: LBB31_35: ## %cond.store33 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 68(%rdx) -; SSE4-NEXT: testl $262144, %edi ## imm = 0x40000 -; SSE4-NEXT: je LBB31_38 -; SSE4-NEXT: LBB31_37: ## %cond.store35 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 72(%rdx) -; SSE4-NEXT: testl $524288, %edi ## imm = 0x80000 -; SSE4-NEXT: je LBB31_40 -; SSE4-NEXT: LBB31_39: ## %cond.store37 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 76(%rdx) -; SSE4-NEXT: testl $1048576, %edi ## imm = 0x100000 -; SSE4-NEXT: je LBB31_42 -; SSE4-NEXT: LBB31_41: ## %cond.store39 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 80(%rdx) -; SSE4-NEXT: testl $2097152, %edi ## imm = 0x200000 -; SSE4-NEXT: je LBB31_44 -; SSE4-NEXT: LBB31_43: ## %cond.store41 -; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload -; SSE4-NEXT: movl %eax, 84(%rdx) -; SSE4-NEXT: testl $4194304, %edi ## imm = 0x400000 -; SSE4-NEXT: jne LBB31_45 -; SSE4-NEXT: jmp LBB31_46 +define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigger.ptr, ptr %val.ptr, ptr %dst) nounwind { +; SSE-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts: +; SSE: ## %bb.0: +; SSE-NEXT: pushq %rbp +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r13 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: movdqa (%rdi), %xmm1 +; SSE-NEXT: movdqa 32(%rdi), %xmm2 +; SSE-NEXT: movdqa 64(%rdi), %xmm0 +; SSE-NEXT: movl 92(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 88(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 84(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 80(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 76(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 72(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 68(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 64(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 60(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 56(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: movl 52(%rsi), %eax +; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; SSE-NEXT: packssdw 48(%rdi), %xmm2 +; SSE-NEXT: packssdw 16(%rdi), %xmm1 +; SSE-NEXT: packsswb %xmm2, %xmm1 +; SSE-NEXT: packssdw 80(%rdi), %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm1, %eax +; SSE-NEXT: andl $21845, %eax ## imm = 0x5555 +; SSE-NEXT: pmovmskb %xmm0, %edi +; SSE-NEXT: andl $85, %edi +; SSE-NEXT: shll $16, %edi +; SSE-NEXT: orl %eax, %edi +; SSE-NEXT: movl 48(%rsi), %r13d +; SSE-NEXT: testb $1, %dil +; SSE-NEXT: movl 44(%rsi), %eax +; SSE-NEXT: movl 40(%rsi), %ecx +; SSE-NEXT: movl 36(%rsi), %r8d +; SSE-NEXT: movl 32(%rsi), %r9d +; SSE-NEXT: movl 28(%rsi), %r10d +; SSE-NEXT: movl 24(%rsi), %r11d +; SSE-NEXT: movl 20(%rsi), %ebx +; SSE-NEXT: movl 16(%rsi), %ebp +; SSE-NEXT: movl 12(%rsi), %r14d +; SSE-NEXT: movl 8(%rsi), %r15d +; SSE-NEXT: movl 4(%rsi), %r12d +; SSE-NEXT: jne LBB31_1 +; SSE-NEXT: ## %bb.2: ## %else +; SSE-NEXT: testb $2, %dil +; SSE-NEXT: jne LBB31_3 +; SSE-NEXT: LBB31_4: ## %else2 +; SSE-NEXT: testb $4, %dil +; SSE-NEXT: jne LBB31_5 +; SSE-NEXT: LBB31_6: ## %else4 +; SSE-NEXT: testb $8, %dil +; SSE-NEXT: jne LBB31_7 +; SSE-NEXT: LBB31_8: ## %else6 +; SSE-NEXT: testb $16, %dil +; SSE-NEXT: jne LBB31_9 +; SSE-NEXT: LBB31_10: ## %else8 +; SSE-NEXT: testb $32, %dil +; SSE-NEXT: jne LBB31_11 +; SSE-NEXT: LBB31_12: ## %else10 +; SSE-NEXT: testb $64, %dil +; SSE-NEXT: jne LBB31_13 +; SSE-NEXT: LBB31_14: ## %else12 +; SSE-NEXT: testb %dil, %dil +; SSE-NEXT: js LBB31_15 +; SSE-NEXT: LBB31_16: ## %else14 +; SSE-NEXT: testl $256, %edi ## imm = 0x100 +; SSE-NEXT: jne LBB31_17 +; SSE-NEXT: LBB31_18: ## %else16 +; SSE-NEXT: testl $512, %edi ## imm = 0x200 +; SSE-NEXT: jne LBB31_19 +; SSE-NEXT: LBB31_20: ## %else18 +; SSE-NEXT: testl $1024, %edi ## imm = 0x400 +; SSE-NEXT: jne LBB31_21 +; SSE-NEXT: LBB31_22: ## %else20 +; SSE-NEXT: testl $2048, %edi ## imm = 0x800 +; SSE-NEXT: jne LBB31_23 +; SSE-NEXT: LBB31_24: ## %else22 +; SSE-NEXT: testl $4096, %edi ## imm = 0x1000 +; SSE-NEXT: jne LBB31_25 +; SSE-NEXT: LBB31_26: ## %else24 +; SSE-NEXT: testl $8192, %edi ## imm = 0x2000 +; SSE-NEXT: jne LBB31_27 +; SSE-NEXT: LBB31_28: ## %else26 +; SSE-NEXT: testl $16384, %edi ## imm = 0x4000 +; SSE-NEXT: jne LBB31_29 +; SSE-NEXT: LBB31_30: ## %else28 +; SSE-NEXT: testw %di, %di +; SSE-NEXT: js LBB31_31 +; SSE-NEXT: LBB31_32: ## %else30 +; SSE-NEXT: testl $65536, %edi ## imm = 0x10000 +; SSE-NEXT: jne LBB31_33 +; SSE-NEXT: LBB31_34: ## %else32 +; SSE-NEXT: testl $131072, %edi ## imm = 0x20000 +; SSE-NEXT: jne LBB31_35 +; SSE-NEXT: LBB31_36: ## %else34 +; SSE-NEXT: testl $262144, %edi ## imm = 0x40000 +; SSE-NEXT: jne LBB31_37 +; SSE-NEXT: LBB31_38: ## %else36 +; SSE-NEXT: testl $524288, %edi ## imm = 0x80000 +; SSE-NEXT: jne LBB31_39 +; SSE-NEXT: LBB31_40: ## %else38 +; SSE-NEXT: testl $1048576, %edi ## imm = 0x100000 +; SSE-NEXT: jne LBB31_41 +; SSE-NEXT: LBB31_42: ## %else40 +; SSE-NEXT: testl $2097152, %edi ## imm = 0x200000 +; SSE-NEXT: jne LBB31_43 +; SSE-NEXT: LBB31_44: ## %else42 +; SSE-NEXT: testl $4194304, %edi ## imm = 0x400000 +; SSE-NEXT: je LBB31_46 +; SSE-NEXT: LBB31_45: ## %cond.store43 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 88(%rdx) +; SSE-NEXT: LBB31_46: ## %else44 +; SSE-NEXT: movb $1, %al +; SSE-NEXT: testb %al, %al +; SSE-NEXT: jne LBB31_48 +; SSE-NEXT: ## %bb.47: ## %cond.store45 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 92(%rdx) +; SSE-NEXT: LBB31_48: ## %else46 +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r13 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: popq %rbp +; SSE-NEXT: retq +; SSE-NEXT: LBB31_1: ## %cond.store +; SSE-NEXT: movl (%rsi), %esi +; SSE-NEXT: movl %esi, (%rdx) +; SSE-NEXT: testb $2, %dil +; SSE-NEXT: je LBB31_4 +; SSE-NEXT: LBB31_3: ## %cond.store1 +; SSE-NEXT: movl %r12d, 4(%rdx) +; SSE-NEXT: testb $4, %dil +; SSE-NEXT: je LBB31_6 +; SSE-NEXT: LBB31_5: ## %cond.store3 +; SSE-NEXT: movl %r15d, 8(%rdx) +; SSE-NEXT: testb $8, %dil +; SSE-NEXT: je LBB31_8 +; SSE-NEXT: LBB31_7: ## %cond.store5 +; SSE-NEXT: movl %r14d, 12(%rdx) +; SSE-NEXT: testb $16, %dil +; SSE-NEXT: je LBB31_10 +; SSE-NEXT: LBB31_9: ## %cond.store7 +; SSE-NEXT: movl %ebp, 16(%rdx) +; SSE-NEXT: testb $32, %dil +; SSE-NEXT: je LBB31_12 +; SSE-NEXT: LBB31_11: ## %cond.store9 +; SSE-NEXT: movl %ebx, 20(%rdx) +; SSE-NEXT: testb $64, %dil +; SSE-NEXT: je LBB31_14 +; SSE-NEXT: LBB31_13: ## %cond.store11 +; SSE-NEXT: movl %r11d, 24(%rdx) +; SSE-NEXT: testb %dil, %dil +; SSE-NEXT: jns LBB31_16 +; SSE-NEXT: LBB31_15: ## %cond.store13 +; SSE-NEXT: movl %r10d, 28(%rdx) +; SSE-NEXT: testl $256, %edi ## imm = 0x100 +; SSE-NEXT: je LBB31_18 +; SSE-NEXT: LBB31_17: ## %cond.store15 +; SSE-NEXT: movl %r9d, 32(%rdx) +; SSE-NEXT: testl $512, %edi ## imm = 0x200 +; SSE-NEXT: je LBB31_20 +; SSE-NEXT: LBB31_19: ## %cond.store17 +; SSE-NEXT: movl %r8d, 36(%rdx) +; SSE-NEXT: testl $1024, %edi ## imm = 0x400 +; SSE-NEXT: je LBB31_22 +; SSE-NEXT: LBB31_21: ## %cond.store19 +; SSE-NEXT: movl %ecx, 40(%rdx) +; SSE-NEXT: testl $2048, %edi ## imm = 0x800 +; SSE-NEXT: je LBB31_24 +; SSE-NEXT: LBB31_23: ## %cond.store21 +; SSE-NEXT: movl %eax, 44(%rdx) +; SSE-NEXT: testl $4096, %edi ## imm = 0x1000 +; SSE-NEXT: je LBB31_26 +; SSE-NEXT: LBB31_25: ## %cond.store23 +; SSE-NEXT: movl %r13d, 48(%rdx) +; SSE-NEXT: testl $8192, %edi ## imm = 0x2000 +; SSE-NEXT: je LBB31_28 +; SSE-NEXT: LBB31_27: ## %cond.store25 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 52(%rdx) +; SSE-NEXT: testl $16384, %edi ## imm = 0x4000 +; SSE-NEXT: je LBB31_30 +; SSE-NEXT: LBB31_29: ## %cond.store27 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 56(%rdx) +; SSE-NEXT: testw %di, %di +; SSE-NEXT: jns LBB31_32 +; SSE-NEXT: LBB31_31: ## %cond.store29 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 60(%rdx) +; SSE-NEXT: testl $65536, %edi ## imm = 0x10000 +; SSE-NEXT: je LBB31_34 +; SSE-NEXT: LBB31_33: ## %cond.store31 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 64(%rdx) +; SSE-NEXT: testl $131072, %edi ## imm = 0x20000 +; SSE-NEXT: je LBB31_36 +; SSE-NEXT: LBB31_35: ## %cond.store33 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 68(%rdx) +; SSE-NEXT: testl $262144, %edi ## imm = 0x40000 +; SSE-NEXT: je LBB31_38 +; SSE-NEXT: LBB31_37: ## %cond.store35 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 72(%rdx) +; SSE-NEXT: testl $524288, %edi ## imm = 0x80000 +; SSE-NEXT: je LBB31_40 +; SSE-NEXT: LBB31_39: ## %cond.store37 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 76(%rdx) +; SSE-NEXT: testl $1048576, %edi ## imm = 0x100000 +; SSE-NEXT: je LBB31_42 +; SSE-NEXT: LBB31_41: ## %cond.store39 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 80(%rdx) +; SSE-NEXT: testl $2097152, %edi ## imm = 0x200000 +; SSE-NEXT: je LBB31_44 +; SSE-NEXT: LBB31_43: ## %cond.store41 +; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload +; SSE-NEXT: movl %eax, 84(%rdx) +; SSE-NEXT: testl $4194304, %edi ## imm = 0x400000 +; SSE-NEXT: jne LBB31_45 +; SSE-NEXT: jmp LBB31_46 ; ; AVX1-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts: ; AVX1: ## %bb.0: @@ -6266,7 +6021,7 @@ define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigge } ; From https://reviews.llvm.org/rGf8d9097168b7#1165311 -define void @undefshuffle(<8 x i1> %i0, ptr %src, ptr %dst) #0 { +define void @undefshuffle(<8 x i1> %i0, ptr %src, ptr %dst) nounwind { ; SSE2-LABEL: undefshuffle: ; SSE2: ## %bb.0: ## %else ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/pr45378.ll b/llvm/test/CodeGen/X86/pr45378.ll index 426f4ee..6a5770a 100644 --- a/llvm/test/CodeGen/X86/pr45378.ll +++ b/llvm/test/CodeGen/X86/pr45378.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=SSE41 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=CHECK,AVX declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) @@ -71,28 +71,12 @@ define i1 @parseHeaders2_scalar_or(ptr %ptr) nounwind { } define i1 @parseHeaders2_scalar_and(ptr %ptr) nounwind { -; SSE2-LABEL: parseHeaders2_scalar_and: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqu (%rdi), %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; SSE2-NEXT: movq %xmm0, %rax -; SSE2-NEXT: testq %rax, (%rdi) -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; SSE41-LABEL: parseHeaders2_scalar_and: -; SSE41: # %bb.0: -; SSE41-NEXT: movq (%rdi), %rax -; SSE41-NEXT: testq %rax, 8(%rdi) -; SSE41-NEXT: sete %al -; SSE41-NEXT: retq -; -; AVX-LABEL: parseHeaders2_scalar_and: -; AVX: # %bb.0: -; AVX-NEXT: movq (%rdi), %rax -; AVX-NEXT: testq %rax, 8(%rdi) -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; CHECK-LABEL: parseHeaders2_scalar_and: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: testq %rax, 8(%rdi) +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq %vload = load <2 x i64>, ptr %ptr, align 8 %v1 = extractelement <2 x i64> %vload, i32 0 %v2 = extractelement <2 x i64> %vload, i32 1 diff --git a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll index 2187c65..97c3c204 100644 --- a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll +++ b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll @@ -60,36 +60,30 @@ define void @failing(ptr %0, ptr %1) nounwind { ; CHECK-NEXT: .LBB0_2: # %vector.body ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movdqu 1024(%rdx,%rdi), %xmm5 -; CHECK-NEXT: movdqu 1040(%rdx,%rdi), %xmm6 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] -; CHECK-NEXT: movq %xmm5, %r8 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm6[2,3,2,3] -; CHECK-NEXT: movq %xmm5, %r9 -; CHECK-NEXT: cmpq 1040(%rdx,%rdi), %rsi -; CHECK-NEXT: movq %rcx, %r10 -; CHECK-NEXT: sbbq %r9, %r10 -; CHECK-NEXT: setge %r9b -; CHECK-NEXT: movzbl %r9b, %r9d -; CHECK-NEXT: andl $1, %r9d -; CHECK-NEXT: negq %r9 -; CHECK-NEXT: movq %r9, %xmm5 ; CHECK-NEXT: cmpq 1024(%rdx,%rdi), %rsi -; CHECK-NEXT: movq %rcx, %r9 -; CHECK-NEXT: sbbq %r8, %r9 +; CHECK-NEXT: movq %rcx, %r8 +; CHECK-NEXT: sbbq 1032(%rdx,%rdi), %r8 +; CHECK-NEXT: setge %r8b +; CHECK-NEXT: movzbl %r8b, %r8d +; CHECK-NEXT: andl $1, %r8d +; CHECK-NEXT: negq %r8 +; CHECK-NEXT: movq %r8, %xmm5 +; CHECK-NEXT: cmpq 1040(%rdx,%rdi), %rsi +; CHECK-NEXT: movq %rcx, %r8 +; CHECK-NEXT: sbbq 1048(%rdx,%rdi), %r8 ; CHECK-NEXT: setge %r8b ; CHECK-NEXT: movzbl %r8b, %r8d ; CHECK-NEXT: andl $1, %r8d ; CHECK-NEXT: negq %r8 ; CHECK-NEXT: movq %r8, %xmm6 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0] -; CHECK-NEXT: movdqa %xmm1, %xmm5 -; CHECK-NEXT: psllq %xmm4, %xmm5 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0] +; CHECK-NEXT: movdqa %xmm1, %xmm6 +; CHECK-NEXT: psllq %xmm4, %xmm6 ; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,2,3] ; CHECK-NEXT: movdqa %xmm1, %xmm8 ; CHECK-NEXT: psllq %xmm7, %xmm8 -; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm5[0],xmm8[1] -; CHECK-NEXT: andpd %xmm6, %xmm8 +; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm6[0],xmm8[1] +; CHECK-NEXT: andpd %xmm5, %xmm8 ; CHECK-NEXT: orpd %xmm8, %xmm3 ; CHECK-NEXT: paddq %xmm2, %xmm4 ; CHECK-NEXT: addq $32, %rdi diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index 2610f43..62051d1 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1983,91 +1983,75 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind { ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movzwl 16(%eax), %edx ; X86-SSE-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-SSE-NEXT: movdqa (%eax), %xmm3 -; X86-SSE-NEXT: movdqa (%ecx), %xmm0 -; X86-SSE-NEXT: movdqa 16(%ecx), %xmm1 -; X86-SSE-NEXT: pxor %xmm5, %xmm5 -; X86-SSE-NEXT: movdqa %xmm3, %xmm2 -; X86-SSE-NEXT: pextrw $7, %xmm3, %eax -; X86-SSE-NEXT: pextrw $4, %xmm3, %edi -; X86-SSE-NEXT: pextrw $0, %xmm3, %ebp -; X86-SSE-NEXT: pextrw $1, %xmm3, %esi -; X86-SSE-NEXT: pextrw $3, %xmm3, %ebx -; X86-SSE-NEXT: movdqa %xmm3, %xmm4 -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] -; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X86-SSE-NEXT: movd %xmm3, %ecx +; X86-SSE-NEXT: movdqa (%eax), %xmm2 +; X86-SSE-NEXT: pxor %xmm1, %xmm1 +; X86-SSE-NEXT: movdqa %xmm2, %xmm0 +; X86-SSE-NEXT: pextrw $7, %xmm2, %eax +; X86-SSE-NEXT: pextrw $4, %xmm2, %esi +; X86-SSE-NEXT: pextrw $1, %xmm2, %edi +; X86-SSE-NEXT: pextrw $0, %xmm2, %ebx +; X86-SSE-NEXT: pextrw $3, %xmm2, %ebp +; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: divl 28(%ecx) +; X86-SSE-NEXT: movd %edx, %xmm1 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X86-SSE-NEXT: movd %xmm3, %eax ; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx +; X86-SSE-NEXT: divl 24(%ecx) ; X86-SSE-NEXT: movd %edx, %xmm3 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3] -; X86-SSE-NEXT: movd %xmm5, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3] -; X86-SSE-NEXT: movd %xmm5, %ecx +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; X86-SSE-NEXT: movl %esi, %eax ; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx -; X86-SSE-NEXT: movd %edx, %xmm5 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] +; X86-SSE-NEXT: divl 16(%ecx) +; X86-SSE-NEXT: movd %edx, %xmm1 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE-NEXT: movd %xmm0, %eax +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: divl 20(%ecx) +; X86-SSE-NEXT: movd %edx, %xmm0 +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; X86-SSE-NEXT: movl %edi, %eax ; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: divl 16(%edi) +; X86-SSE-NEXT: divl 4(%ecx) ; X86-SSE-NEXT: movd %edx, %xmm3 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] -; X86-SSE-NEXT: movd %xmm2, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X86-SSE-NEXT: movd %xmm1, %ecx +; X86-SSE-NEXT: movl %ebx, %eax ; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx -; X86-SSE-NEXT: movd %edx, %xmm1 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0] +; X86-SSE-NEXT: divl (%ecx) +; X86-SSE-NEXT: movd %edx, %xmm0 +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] ; X86-SSE-NEXT: movl %ebp, %eax ; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl (%edi) -; X86-SSE-NEXT: movd %edx, %xmm1 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X86-SSE-NEXT: movd %xmm2, %ecx -; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx -; X86-SSE-NEXT: movd %edx, %xmm2 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X86-SSE-NEXT: movd %xmm2, %ecx -; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: divl 12(%ecx) +; X86-SSE-NEXT: movd %edx, %xmm3 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; X86-SSE-NEXT: movd %xmm2, %eax ; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx +; X86-SSE-NEXT: divl 8(%ecx) ; X86-SSE-NEXT: movd %edx, %xmm2 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] -; X86-SSE-NEXT: movd %xmm4, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X86-SSE-NEXT: movd %xmm0, %ecx -; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx -; X86-SSE-NEXT: movd %edx, %xmm0 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl 32(%edi) +; X86-SSE-NEXT: divl 32(%ecx) ; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] -; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] -; X86-SSE-NEXT: pmuludq %xmm2, %xmm4 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] +; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X86-SSE-NEXT: pmuludq %xmm2, %xmm0 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-SSE-NEXT: pmuludq %xmm2, %xmm3 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] ; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; X86-SSE-NEXT: pmuludq %xmm2, %xmm3 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007 ; X86-SSE-NEXT: movl %eax, (%eax) -; X86-SSE-NEXT: movdqa %xmm3, (%eax) +; X86-SSE-NEXT: movdqa %xmm1, (%eax) ; X86-SSE-NEXT: movdqa %xmm0, (%eax) ; X86-SSE-NEXT: addl $4, %esp ; X86-SSE-NEXT: popl %esi @@ -2204,91 +2188,76 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind { ; X64-SSE-LABEL: PR34947: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movzwl 16(%rdi), %ecx -; X64-SSE-NEXT: movdqa (%rdi), %xmm3 -; X64-SSE-NEXT: movdqa (%rsi), %xmm0 -; X64-SSE-NEXT: movdqa 16(%rsi), %xmm1 -; X64-SSE-NEXT: pxor %xmm5, %xmm5 -; X64-SSE-NEXT: movdqa %xmm3, %xmm2 -; X64-SSE-NEXT: pextrw $7, %xmm3, %eax -; X64-SSE-NEXT: pextrw $4, %xmm3, %r8d -; X64-SSE-NEXT: pextrw $0, %xmm3, %r10d -; X64-SSE-NEXT: pextrw $1, %xmm3, %edi -; X64-SSE-NEXT: pextrw $3, %xmm3, %r9d -; X64-SSE-NEXT: movdqa %xmm3, %xmm4 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] -; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X64-SSE-NEXT: movd %xmm3, %r11d -; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %r11d -; X64-SSE-NEXT: movd %edx, %xmm3 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3] -; X64-SSE-NEXT: movd %xmm5, %eax -; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3] -; X64-SSE-NEXT: movd %xmm5, %r11d +; X64-SSE-NEXT: movdqa (%rdi), %xmm2 +; X64-SSE-NEXT: pxor %xmm1, %xmm1 +; X64-SSE-NEXT: movdqa %xmm2, %xmm0 +; X64-SSE-NEXT: pextrw $7, %xmm2, %eax +; X64-SSE-NEXT: pextrw $4, %xmm2, %edi +; X64-SSE-NEXT: pextrw $1, %xmm2, %r8d +; X64-SSE-NEXT: pextrw $0, %xmm2, %r9d +; X64-SSE-NEXT: pextrw $3, %xmm2, %r10d +; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %r11d -; X64-SSE-NEXT: movd %edx, %xmm5 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] -; X64-SSE-NEXT: movl %r8d, %eax +; X64-SSE-NEXT: divl 28(%rsi) +; X64-SSE-NEXT: movd %edx, %xmm1 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X64-SSE-NEXT: movd %xmm3, %eax ; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl 16(%rsi) +; X64-SSE-NEXT: divl 24(%rsi) ; X64-SSE-NEXT: movd %edx, %xmm3 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] -; X64-SSE-NEXT: movd %xmm2, %eax -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X64-SSE-NEXT: movd %xmm1, %r8d -; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %r8d -; X64-SSE-NEXT: movd %edx, %xmm1 ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0] -; X64-SSE-NEXT: movl %r10d, %eax +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl (%rsi) +; X64-SSE-NEXT: divl 16(%rsi) ; X64-SSE-NEXT: movd %edx, %xmm1 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X64-SSE-NEXT: movd %xmm2, %r8d -; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-SSE-NEXT: movd %xmm0, %eax ; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %r8d -; X64-SSE-NEXT: movd %edx, %xmm2 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X64-SSE-NEXT: movd %xmm2, %edi +; X64-SSE-NEXT: divl 20(%rsi) +; X64-SSE-NEXT: movd %edx, %xmm0 +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; X64-SSE-NEXT: movl %r8d, %eax +; X64-SSE-NEXT: xorl %edx, %edx +; X64-SSE-NEXT: divl 4(%rsi) +; X64-SSE-NEXT: movd %edx, %xmm0 ; X64-SSE-NEXT: movl %r9d, %eax ; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %edi -; X64-SSE-NEXT: movd %edx, %xmm2 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] -; X64-SSE-NEXT: movd %xmm4, %eax -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-SSE-NEXT: movd %xmm0, %edi +; X64-SSE-NEXT: divl (%rsi) +; X64-SSE-NEXT: movd %edx, %xmm3 +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; X64-SSE-NEXT: movl %r10d, %eax ; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %edi +; X64-SSE-NEXT: divl 12(%rsi) ; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; X64-SSE-NEXT: movd %xmm2, %eax +; X64-SSE-NEXT: xorl %edx, %edx +; X64-SSE-NEXT: divl 8(%rsi) +; X64-SSE-NEXT: movd %edx, %xmm2 +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; X64-SSE-NEXT: movl %ecx, %eax ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl 32(%rsi) ; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [8199,8199,8199,8199] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; X64-SSE-NEXT: pmuludq %xmm0, %xmm1 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X64-SSE-NEXT: pmuludq %xmm0, %xmm2 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] ; X64-SSE-NEXT: pmuludq %xmm0, %xmm3 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] ; X64-SSE-NEXT: pmuludq %xmm0, %xmm2 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] +; X64-SSE-NEXT: pmuludq %xmm0, %xmm1 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X64-SSE-NEXT: pmuludq %xmm0, %xmm2 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007 ; X64-SSE-NEXT: movl %eax, (%rax) -; X64-SSE-NEXT: movdqa %xmm3, (%rax) ; X64-SSE-NEXT: movdqa %xmm1, (%rax) +; X64-SSE-NEXT: movdqa %xmm3, (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX1-LABEL: PR34947: diff --git a/llvm/test/CodeGen/X86/stack-protector.ll b/llvm/test/CodeGen/X86/stack-protector.ll index a277f9f..f4f3ae4 100644 --- a/llvm/test/CodeGen/X86/stack-protector.ll +++ b/llvm/test/CodeGen/X86/stack-protector.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-I386 %s ; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-X64 %s ; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-KERNEL-X64 %s +; RUN: llc -code-model=kernel -mtriple=x86_64-unknown-freebsd < %s -o - | FileCheck --check-prefix=FREEBSD-KERNEL-X64 %s ; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | FileCheck --check-prefix=DARWIN-X64 %s ; RUN: llc -mtriple=amd64-pc-openbsd < %s -o - | FileCheck --check-prefix=OPENBSD-AMD64 %s ; RUN: llc -mtriple=i386-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-I386 %s @@ -75,6 +76,10 @@ entry: ; LINUX-X64: mov{{l|q}} %fs: ; LINUX-X64: callq __stack_chk_fail +; FREEBSD-KERNEL-X64-LABEL: test1b: +; FREEBSD-KERNEL-X64-NOT: mov{{l|q}} __stack_chk_guard@GOTPCREL +; FREEBSD-KERNEL-X64: callq __stack_chk_fail + ; LINUX-KERNEL-X64-LABEL: test1b: ; LINUX-KERNEL-X64: mov{{l|q}} %gs: ; LINUX-KERNEL-X64: callq __stack_chk_fail @@ -118,6 +123,10 @@ entry: ; LINUX-X64: mov{{l|q}} %fs: ; LINUX-X64: callq __stack_chk_fail +; FREEBSD-KERNEL-X64-LABEL: test1c: +; FREEBSD-KERNEL-X64: mov{{l|q}} __stack_chk_guard(%rip) +; FREEBSD-KERNEL-X64: callq __stack_chk_fail + ; LINUX-KERNEL-X64-LABEL: test1c: ; LINUX-KERNEL-X64: mov{{l|q}} %gs: ; LINUX-KERNEL-X64: callq __stack_chk_fail diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll index 99a3821..f2240a9 100644 --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -1101,17 +1101,13 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in define void @indices_convert() { ; SSE3-LABEL: indices_convert: ; SSE3: # %bb.0: # %bb -; SSE3-NEXT: movdqa (%rax), %xmm0 -; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; SSE3-NEXT: movd %xmm1, %eax -; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movaps (%rax), %xmm0 +; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movl (%rax), %eax +; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE3-NEXT: andl $3, %eax -; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; SSE3-NEXT: movd %xmm1, %ecx -; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE3-NEXT: andl $3, %ecx ; SSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] @@ -1120,17 +1116,13 @@ define void @indices_convert() { ; ; SSSE3-LABEL: indices_convert: ; SSSE3: # %bb.0: # %bb -; SSSE3-NEXT: movdqa (%rax), %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; SSSE3-NEXT: movd %xmm1, %eax -; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movaps (%rax), %xmm0 +; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movl (%rax), %eax +; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSSE3-NEXT: andl $3, %eax -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; SSSE3-NEXT: movd %xmm1, %ecx -; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSSE3-NEXT: andl $3, %ecx ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 7bbcdee..e26de4b 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -2911,23 +2911,12 @@ define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) { ; define <2 x double> @sitofp_load_2i64_to_2f64(ptr%a) { -; SSE2-LABEL: sitofp_load_2i64_to_2f64: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm1 -; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; SSE2-NEXT: movq %xmm1, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sd %rax, %xmm1 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE2-NEXT: retq -; -; SSE41-LABEL: sitofp_load_2i64_to_2f64: -; SSE41: # %bb.0: -; SSE41-NEXT: cvtsi2sdq 8(%rdi), %xmm1 -; SSE41-NEXT: cvtsi2sdq (%rdi), %xmm0 -; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE41-NEXT: retq +; SSE-LABEL: sitofp_load_2i64_to_2f64: +; SSE: # %bb.0: +; SSE-NEXT: cvtsi2sdq 8(%rdi), %xmm1 +; SSE-NEXT: cvtsi2sdq (%rdi), %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq ; ; VEX-LABEL: sitofp_load_2i64_to_2f64: ; VEX: # %bb.0: @@ -3093,35 +3082,16 @@ define <2 x double> @sitofp_load_2i8_to_2f64(ptr%a) { } define <4 x double> @sitofp_load_4i64_to_4f64(ptr%a) { -; SSE2-LABEL: sitofp_load_4i64_to_4f64: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm1 -; SSE2-NEXT: movdqa 16(%rdi), %xmm2 -; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; SSE2-NEXT: movq %xmm1, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sd %rax, %xmm1 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sdq 16(%rdi), %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] -; SSE2-NEXT: movq %xmm2, %rax -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: cvtsi2sd %rax, %xmm2 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE2-NEXT: retq -; -; SSE41-LABEL: sitofp_load_4i64_to_4f64: -; SSE41: # %bb.0: -; SSE41-NEXT: cvtsi2sdq 8(%rdi), %xmm1 -; SSE41-NEXT: cvtsi2sdq (%rdi), %xmm0 -; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE41-NEXT: cvtsi2sdq 24(%rdi), %xmm2 -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: cvtsi2sdq 16(%rdi), %xmm1 -; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE41-NEXT: retq +; SSE-LABEL: sitofp_load_4i64_to_4f64: +; SSE: # %bb.0: +; SSE-NEXT: cvtsi2sdq 8(%rdi), %xmm1 +; SSE-NEXT: cvtsi2sdq (%rdi), %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: cvtsi2sdq 24(%rdi), %xmm2 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: cvtsi2sdq 16(%rdi), %xmm1 +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE-NEXT: retq ; ; VEX-LABEL: sitofp_load_4i64_to_4f64: ; VEX: # %bb.0: @@ -3865,22 +3835,14 @@ define <4 x double> @uitofp_load_4i8_to_4f64(ptr%a) { define <4 x float> @sitofp_load_4i64_to_4f32(ptr%a) { ; SSE2-LABEL: sitofp_load_4i64_to_4f32: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm1 -; SSE2-NEXT: movdqa 16(%rdi), %xmm0 -; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; SSE2-NEXT: movq %xmm0, %rax -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSE2-NEXT: cvtsi2ssq 24(%rdi), %xmm0 +; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm1 +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: cvtsi2ssq 8(%rdi), %xmm2 ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; SSE2-NEXT: movq %xmm1, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_load_4i64_to_4f32: @@ -4015,39 +3977,24 @@ define <4 x float> @sitofp_load_4i8_to_4f32(ptr%a) { define <8 x float> @sitofp_load_8i64_to_8f32(ptr%a) { ; SSE2-LABEL: sitofp_load_8i64_to_8f32: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm1 -; SSE2-NEXT: movdqa 16(%rdi), %xmm0 -; SSE2-NEXT: movdqa 32(%rdi), %xmm2 -; SSE2-NEXT: movdqa 48(%rdi), %xmm3 -; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; SSE2-NEXT: movq %xmm0, %rax -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] +; SSE2-NEXT: cvtsi2ssq 24(%rdi), %xmm0 +; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm1 +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: cvtsi2ssq 8(%rdi), %xmm2 ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; SSE2-NEXT: movq %xmm1, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] -; SSE2-NEXT: xorps %xmm4, %xmm4 -; SSE2-NEXT: cvtsi2ssq 48(%rdi), %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] -; SSE2-NEXT: movq %xmm1, %rax +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 -; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; SSE2-NEXT: cvtsi2ssq 56(%rdi), %xmm1 +; SSE2-NEXT: xorps %xmm2, %xmm2 +; SSE2-NEXT: cvtsi2ssq 48(%rdi), %xmm2 +; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: cvtsi2ssq 40(%rdi), %xmm3 ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq 32(%rdi), %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] -; SSE2-NEXT: movq %xmm2, %rax -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: cvtsi2ss %rax, %xmm2 -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_load_8i64_to_8f32: @@ -4256,70 +4203,64 @@ define <8 x float> @sitofp_load_8i8_to_8f32(ptr%a) { define <4 x float> @uitofp_load_4i64_to_4f32(ptr%a) { ; SSE2-LABEL: uitofp_load_4i64_to_4f32: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa 16(%rdi), %xmm0 -; SSE2-NEXT: movq 16(%rdi), %rax +; SSE2-NEXT: movq 24(%rdi), %rax ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB83_1 ; SSE2-NEXT: # %bb.2: -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 ; SSE2-NEXT: jmp .LBB83_3 ; SSE2-NEXT: .LBB83_1: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 -; SSE2-NEXT: addss %xmm1, %xmm1 +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 +; SSE2-NEXT: addss %xmm0, %xmm0 ; SSE2-NEXT: .LBB83_3: -; SSE2-NEXT: movq (%rdi), %rax -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; SSE2-NEXT: movq %xmm0, %rcx -; SSE2-NEXT: testq %rcx, %rcx +; SSE2-NEXT: movq 16(%rdi), %rax +; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB83_4 ; SSE2-NEXT: # %bb.5: -; SSE2-NEXT: cvtsi2ss %rcx, %xmm2 +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 ; SSE2-NEXT: jmp .LBB83_6 ; SSE2-NEXT: .LBB83_4: +; SSE2-NEXT: movq %rax, %rcx +; SSE2-NEXT: shrq %rcx +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: orq %rcx, %rax +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 +; SSE2-NEXT: addss %xmm1, %xmm1 +; SSE2-NEXT: .LBB83_6: +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: movq 8(%rdi), %rcx +; SSE2-NEXT: testq %rcx, %rcx +; SSE2-NEXT: js .LBB83_7 +; SSE2-NEXT: # %bb.8: +; SSE2-NEXT: cvtsi2ss %rcx, %xmm2 +; SSE2-NEXT: jmp .LBB83_9 +; SSE2-NEXT: .LBB83_7: ; SSE2-NEXT: movq %rcx, %rdx ; SSE2-NEXT: shrq %rdx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: orq %rdx, %rcx ; SSE2-NEXT: cvtsi2ss %rcx, %xmm2 ; SSE2-NEXT: addss %xmm2, %xmm2 -; SSE2-NEXT: .LBB83_6: -; SSE2-NEXT: movdqa (%rdi), %xmm3 -; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB83_7 -; SSE2-NEXT: # %bb.8: -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: jmp .LBB83_9 -; SSE2-NEXT: .LBB83_7: -; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shrq %rcx -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: addss %xmm0, %xmm0 ; SSE2-NEXT: .LBB83_9: -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] -; SSE2-NEXT: movq %xmm2, %rax +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB83_10 ; SSE2-NEXT: # %bb.11: -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: cvtsi2ss %rax, %xmm2 +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 ; SSE2-NEXT: jmp .LBB83_12 ; SSE2-NEXT: .LBB83_10: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: cvtsi2ss %rax, %xmm2 -; SSE2-NEXT: addss %xmm2, %xmm2 +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 +; SSE2-NEXT: addss %xmm0, %xmm0 ; SSE2-NEXT: .LBB83_12: ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -4591,8 +4532,7 @@ define <4 x float> @uitofp_load_4i8_to_4f32(ptr%a) { define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) { ; SSE2-LABEL: uitofp_load_8i64_to_8f32: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa 16(%rdi), %xmm0 -; SSE2-NEXT: movq 16(%rdi), %rax +; SSE2-NEXT: movq 24(%rdi), %rax ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB87_1 ; SSE2-NEXT: # %bb.2: @@ -4606,127 +4546,114 @@ define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) { ; SSE2-NEXT: cvtsi2ss %rax, %xmm2 ; SSE2-NEXT: addss %xmm2, %xmm2 ; SSE2-NEXT: .LBB87_3: -; SSE2-NEXT: movq (%rdi), %rax -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; SSE2-NEXT: movq %xmm0, %rcx -; SSE2-NEXT: testq %rcx, %rcx +; SSE2-NEXT: movq 16(%rdi), %rax +; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB87_4 ; SSE2-NEXT: # %bb.5: -; SSE2-NEXT: cvtsi2ss %rcx, %xmm1 +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 ; SSE2-NEXT: jmp .LBB87_6 ; SSE2-NEXT: .LBB87_4: -; SSE2-NEXT: movq %rcx, %rdx -; SSE2-NEXT: shrq %rdx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: orq %rdx, %rcx -; SSE2-NEXT: cvtsi2ss %rcx, %xmm1 +; SSE2-NEXT: movq %rax, %rcx +; SSE2-NEXT: shrq %rcx +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: orq %rcx, %rax +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 ; SSE2-NEXT: addss %xmm1, %xmm1 ; SSE2-NEXT: .LBB87_6: -; SSE2-NEXT: movdqa (%rdi), %xmm3 -; SSE2-NEXT: testq %rax, %rax +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: movq 8(%rdi), %rcx +; SSE2-NEXT: testq %rcx, %rcx ; SSE2-NEXT: js .LBB87_7 ; SSE2-NEXT: # %bb.8: -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: jmp .LBB87_9 -; SSE2-NEXT: .LBB87_7: +; SSE2-NEXT: cvtsi2ss %rcx, %xmm3 +; SSE2-NEXT: testq %rax, %rax +; SSE2-NEXT: jns .LBB87_11 +; SSE2-NEXT: .LBB87_10: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 ; SSE2-NEXT: addss %xmm0, %xmm0 -; SSE2-NEXT: .LBB87_9: -; SSE2-NEXT: movq 48(%rdi), %rax -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] -; SSE2-NEXT: movq %xmm3, %rcx -; SSE2-NEXT: testq %rcx, %rcx -; SSE2-NEXT: js .LBB87_10 -; SSE2-NEXT: # %bb.11: -; SSE2-NEXT: cvtsi2ss %rcx, %xmm4 ; SSE2-NEXT: jmp .LBB87_12 -; SSE2-NEXT: .LBB87_10: +; SSE2-NEXT: .LBB87_7: ; SSE2-NEXT: movq %rcx, %rdx ; SSE2-NEXT: shrq %rdx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: orq %rdx, %rcx -; SSE2-NEXT: cvtsi2ss %rcx, %xmm4 -; SSE2-NEXT: addss %xmm4, %xmm4 +; SSE2-NEXT: cvtsi2ss %rcx, %xmm3 +; SSE2-NEXT: addss %xmm3, %xmm3 +; SSE2-NEXT: testq %rax, %rax +; SSE2-NEXT: js .LBB87_10 +; SSE2-NEXT: .LBB87_11: +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 ; SSE2-NEXT: .LBB87_12: -; SSE2-NEXT: movdqa 48(%rdi), %xmm5 +; SSE2-NEXT: movq 56(%rdi), %rax ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB87_13 ; SSE2-NEXT: # %bb.14: -; SSE2-NEXT: xorps %xmm3, %xmm3 -; SSE2-NEXT: cvtsi2ss %rax, %xmm3 +; SSE2-NEXT: cvtsi2ss %rax, %xmm5 ; SSE2-NEXT: jmp .LBB87_15 ; SSE2-NEXT: .LBB87_13: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm3, %xmm3 -; SSE2-NEXT: cvtsi2ss %rax, %xmm3 -; SSE2-NEXT: addss %xmm3, %xmm3 +; SSE2-NEXT: cvtsi2ss %rax, %xmm5 +; SSE2-NEXT: addss %xmm5, %xmm5 ; SSE2-NEXT: .LBB87_15: -; SSE2-NEXT: movq 32(%rdi), %rax -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] -; SSE2-NEXT: movq %xmm5, %rcx -; SSE2-NEXT: testq %rcx, %rcx +; SSE2-NEXT: movq 48(%rdi), %rax +; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB87_16 ; SSE2-NEXT: # %bb.17: -; SSE2-NEXT: xorps %xmm5, %xmm5 -; SSE2-NEXT: cvtsi2ss %rcx, %xmm5 +; SSE2-NEXT: cvtsi2ss %rax, %xmm4 ; SSE2-NEXT: jmp .LBB87_18 ; SSE2-NEXT: .LBB87_16: -; SSE2-NEXT: movq %rcx, %rdx -; SSE2-NEXT: shrq %rdx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: orq %rdx, %rcx -; SSE2-NEXT: xorps %xmm5, %xmm5 -; SSE2-NEXT: cvtsi2ss %rcx, %xmm5 -; SSE2-NEXT: addss %xmm5, %xmm5 +; SSE2-NEXT: movq %rax, %rcx +; SSE2-NEXT: shrq %rcx +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: orq %rcx, %rax +; SSE2-NEXT: cvtsi2ss %rax, %xmm4 +; SSE2-NEXT: addss %xmm4, %xmm4 ; SSE2-NEXT: .LBB87_18: -; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] -; SSE2-NEXT: movdqa 32(%rdi), %xmm4 +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE2-NEXT: movq 40(%rdi), %rax ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB87_19 ; SSE2-NEXT: # %bb.20: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 +; SSE2-NEXT: xorps %xmm2, %xmm2 +; SSE2-NEXT: cvtsi2ss %rax, %xmm2 ; SSE2-NEXT: jmp .LBB87_21 ; SSE2-NEXT: .LBB87_19: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 -; SSE2-NEXT: addss %xmm1, %xmm1 +; SSE2-NEXT: xorps %xmm2, %xmm2 +; SSE2-NEXT: cvtsi2ss %rax, %xmm2 +; SSE2-NEXT: addss %xmm2, %xmm2 ; SSE2-NEXT: .LBB87_21: -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; SSE2-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,2,3] -; SSE2-NEXT: movq %xmm2, %rax +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] +; SSE2-NEXT: movq 32(%rdi), %rax ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB87_22 ; SSE2-NEXT: # %bb.23: -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: cvtsi2ss %rax, %xmm2 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 ; SSE2-NEXT: jmp .LBB87_24 ; SSE2-NEXT: .LBB87_22: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: cvtsi2ss %rax, %xmm2 -; SSE2-NEXT: addss %xmm2, %xmm2 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 +; SSE2-NEXT: addss %xmm1, %xmm1 ; SSE2-NEXT: .LBB87_24: ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: uitofp_load_8i64_to_8f32: diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll index 96ac4b6..9133b32 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll @@ -758,7 +758,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef %t, i32 noundef ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -808,7 +808,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef %t, i32 noundef ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -851,7 +851,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -901,7 +901,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -936,7 +936,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -986,7 +986,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1021,7 +1021,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1071,7 +1071,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1106,7 +1106,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(fp128 noundef %t, i32 nound ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1156,7 +1156,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(fp128 noundef %t, i32 nound ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1191,7 +1191,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1241,7 +1241,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1276,7 +1276,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz([2 x i64] %t.coe ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1326,7 +1326,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz([2 x i64] %t.coe ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1361,7 +1361,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz([2 x double] a ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1411,7 +1411,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz([2 x double] a ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1446,7 +1446,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz([4 x double] alignst ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1496,7 +1496,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz([4 x double] alignst ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1531,7 +1531,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz([2 x i64] %t.co ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1581,7 +1581,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz([2 x i64] %t.co ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1616,7 +1616,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz([2 x fp128] ali ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1666,7 +1666,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz([2 x fp128] ali ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1701,7 +1701,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz([4 x fp128] ali ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1751,7 +1751,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz([4 x fp128] ali ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll index 1535fcc..e0b5907 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll @@ -39,7 +39,7 @@ define i64 @foo(i64 %guard, ...) #1 { ; Only 56 bytes of the register save area is copied, because of ; "use-soft-float". -; CHECK: call void @llvm.va_start(ptr %vl) +; CHECK: call void @llvm.va_start.p0(ptr %vl) ; CHECK: [[VlAddr:%.*]] = ptrtoint ptr %vl to i64 ; CHECK: [[RegSaveAreaAddrAddr:%.*]] = add i64 [[VlAddr]], 24 ; CHECK: [[RegSaveAreaAddr:%.*]] = inttoptr i64 [[RegSaveAreaAddrAddr]] to ptr diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll index aff4d2c..2051015 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll @@ -560,7 +560,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef signext %t, i32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -580,7 +580,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef signext %t, i32 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -623,7 +623,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -643,7 +643,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -678,7 +678,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -698,7 +698,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -733,7 +733,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -753,7 +753,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -788,7 +788,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(x86_fp80 noundef %t, i32 no ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -808,7 +808,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(x86_fp80 noundef %t, i32 no ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -843,7 +843,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -863,7 +863,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -898,7 +898,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz(i64 %t.coerce0, ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -918,7 +918,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz(i64 %t.coerce0, ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -953,7 +953,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz(double %t.coer ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -973,7 +973,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz(double %t.coer ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1008,7 +1008,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz(ptr noundef byval(%s ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1028,7 +1028,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz(ptr noundef byval(%s ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1063,7 +1063,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz(double %t.coerc ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1083,7 +1083,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz(double %t.coerc ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1118,7 +1118,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz(ptr noundef byv ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1138,7 +1138,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz(ptr noundef byv ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; @@ -1173,7 +1173,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz(ptr noundef byv ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) -; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr @@ -1193,7 +1193,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz(ptr noundef byv ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) ; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) -; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll index 21f3311..f07f3ad 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll @@ -542,7 +542,7 @@ define void @VAStart(i32 %x, ...) sanitize_memory { ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP27]], 17592186044416, !dbg [[DBG11]] ; CHECK-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP29]] to ptr, !dbg [[DBG11]] ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 24, i1 false), !dbg [[DBG11]] -; CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]), !dbg [[DBG11]] +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]), !dbg [[DBG11]] ; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[VA]] to i64, !dbg [[DBG11]] ; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], 16, !dbg [[DBG11]] ; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr, !dbg [[DBG11]] diff --git a/llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s b/llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s new file mode 100644 index 0000000..4623500 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s @@ -0,0 +1,27 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// OBJDUMP: 0000 00000000 0f000000 00000000 00000000 + +.text + +.p2align 8 +.type caller,@function +caller: + s_endpgm + +.rodata + +.p2align 6 +.amdhsa_kernel caller + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_private_segment_fixed_size max(7, callee1.private_seg_size, callee2.private_seg_size) +.end_amdhsa_kernel + +.set callee1.private_seg_size, 4 +.set callee2.private_seg_size, 15 + +// ASM: .amdhsa_private_segment_fixed_size max(7, callee1.private_seg_size, callee2.private_seg_size) diff --git a/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s b/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s new file mode 100644 index 0000000..fab3e89 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s @@ -0,0 +1,281 @@ +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=ASM %s + +// Some expression currently require (immediately) solvable expressions, i.e., +// they don't depend on yet-unknown symbolic values. + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type user_sgpr_count,@function +user_sgpr_count: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_count + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_count defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_count + +.p2align 8 +.type user_sgpr_private_segment_buffer,@function +user_sgpr_private_segment_buffer: + s_endpgm + +.amdhsa_kernel user_sgpr_private_segment_buffer + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_private_segment_buffer defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer + +.p2align 8 +.type user_sgpr_kernarg_preload_length,@function +user_sgpr_kernarg_preload_length: + s_endpgm + +.amdhsa_kernel user_sgpr_kernarg_preload_length + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_kernarg_preload_length defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length defined_boolean + +.p2align 8 +.type user_sgpr_kernarg_preload_offset,@function +user_sgpr_kernarg_preload_offset: + s_endpgm + +.amdhsa_kernel user_sgpr_kernarg_preload_offset + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_kernarg_preload_offset defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset defined_boolean + +.p2align 8 +.type user_sgpr_dispatch_ptr,@function +user_sgpr_dispatch_ptr: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_dispatch_ptr + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_dispatch_ptr defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr + +.p2align 8 +.type user_sgpr_queue_ptr,@function +user_sgpr_queue_ptr: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_queue_ptr + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_queue_ptr defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr + +.p2align 8 +.type user_sgpr_kernarg_segment_ptr,@function +user_sgpr_kernarg_segment_ptr: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_kernarg_segment_ptr + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_kernarg_segment_ptr defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr + +.p2align 8 +.type user_sgpr_dispatch_id,@function +user_sgpr_dispatch_id: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_dispatch_id + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_dispatch_id defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id + +.p2align 8 +.type user_sgpr_flat_scratch_init,@function +user_sgpr_flat_scratch_init: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_flat_scratch_init + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_flat_scratch_init defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init + +.p2align 8 +.type user_sgpr_private_segment_size,@function +user_sgpr_private_segment_size: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_private_segment_size + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_private_segment_size defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size + +.p2align 8 +.type wavefront_size32,@function +wavefront_size32: + s_endpgm + +.p2align 6 +.amdhsa_kernel wavefront_size32 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_wavefront_size32 defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_wavefront_size32 + +.p2align 8 +.type next_free_vgpr,@function +next_free_vgpr: + s_endpgm + +.p2align 6 +.amdhsa_kernel next_free_vgpr + .amdhsa_next_free_vgpr defined_boolean + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_next_free_vgpr + +.p2align 8 +.type next_free_sgpr,@function +next_free_sgpr: + s_endpgm + +.p2align 6 +.amdhsa_kernel next_free_sgpr + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr defined_boolean + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_next_free_sgpr + +.p2align 8 +.type accum_offset,@function +accum_offset: + s_endpgm + +.p2align 6 +.amdhsa_kernel accum_offset + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_accum_offset + +.p2align 8 +.type reserve_vcc,@function +reserve_vcc: + s_endpgm + +.p2align 6 +.amdhsa_kernel reserve_vcc + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_reserve_vcc defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_reserve_vcc + +.p2align 8 +.type reserve_flat_scratch,@function +reserve_flat_scratch: + s_endpgm + +.p2align 6 +.amdhsa_kernel reserve_flat_scratch + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_reserve_flat_scratch defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_reserve_flat_scratch + +.p2align 8 +.type shared_vgpr_count,@function +shared_vgpr_count: + s_endpgm + +.p2align 6 +.amdhsa_kernel shared_vgpr_count + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_shared_vgpr_count defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_shared_vgpr_count + +.set defined_boolean, 1 + +// ASM: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s new file mode 100644 index 0000000..95af59c --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s @@ -0,0 +1,190 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f0afe4 801f007f 000c0000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f0afe4 801f007f 000c0000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1)>>0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&32)>>5 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 +// ASM-NEXT: .amdhsa_memory_ordered 1 +// ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s new file mode 100644 index 0000000..e1107fb --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s @@ -0,0 +1,186 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f0afe4 811f007f 000c0000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f0afe4 811f007f 000c0000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_enable_private_segment defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_enable_private_segment defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10 +// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_enable_private_segment 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 +// ASM-NEXT: .amdhsa_memory_ordered 1 +// ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s new file mode 100644 index 0000000..449616d --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s @@ -0,0 +1,184 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f02fe4 811f007f 000c0000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f02fe4 811f007f 000c0000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_round_robin_scheduling defined_boolean + .amdhsa_enable_private_segment defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_round_robin_scheduling defined_boolean + .amdhsa_enable_private_segment defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10 +// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31 +// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_enable_private_segment 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 +// ASM-NEXT: .amdhsa_memory_ordered 1 +// ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_round_robin_scheduling 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s new file mode 100644 index 0000000..c7e0544 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s @@ -0,0 +1,168 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f0af00 801f007f 00080000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f0af00 801f007f 00080000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s new file mode 100644 index 0000000..49a5015 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s @@ -0,0 +1,171 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefix=ASM %s + +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx801 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f0af00 801f007f 00080000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f0af00 801f007f 00080000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s new file mode 100644 index 0000000..b7f8923 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s @@ -0,0 +1,148 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100 +// OBJDUMP-NEXT: 0030 0000ac04 81000000 00000000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 +// OBJDUMP-NEXT: 0070 0000ac04 81000000 00000000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_system_sgpr_private_segment_wavefront_offset defined_boolean + .amdhsa_dx10_clamp defined_boolean + .amdhsa_ieee_mode defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_tg_split defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_system_sgpr_private_segment_wavefront_offset defined_boolean + .amdhsa_dx10_clamp defined_boolean + .amdhsa_ieee_mode defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_tg_split defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_accum_offset (((((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&63)>>0)+1)*4 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_tg_split (((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&65536)>>16 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_accum_offset 4 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 0 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 0 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 0 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_tg_split 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +// ASM-NEXT: .amdhsa_exception_int_div_zero 0 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-tg-split.s b/llvm/test/MC/AMDGPU/hsa-tg-split.s new file mode 100644 index 0000000..5a4d3e2 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-tg-split.s @@ -0,0 +1,74 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// OBJDUMP: Contents of section .rodata +// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100 +// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 + +.text +// ASM: .text + +.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type minimal,@function +minimal: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel minimal + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel minimal +// ASM-NEXT: .amdhsa_group_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_accum_offset 4 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 0 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 0 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 0 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_fp16_overflow 0 +// ASM-NEXT: .amdhsa_tg_split 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +// ASM-NEXT: .amdhsa_exception_int_div_zero 0 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll index 1882107..4c5a448 100644 --- a/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll +++ b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll @@ -23,7 +23,7 @@ define internal i32 @i(ptr inalloca(ptr) %a, ...) { ; CHECK-LABEL: define {{[^@]+}}@i ; CHECK-SAME: (ptr inalloca(ptr) [[A:%.*]], ...) unnamed_addr { ; CHECK-NEXT: [[AP:%.*]] = alloca ptr, align 4 -; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]]) ; CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[AP]], align 4 ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 ; CHECK-NEXT: ret i32 [[L]] diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll index ef365d6..38dfd25 100644 --- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -17,10 +17,10 @@ define i32 @func1(i32 %a, double %b, ptr %v, ...) nounwind { ; CHECK-NEXT: [[AP:%.*]] = alloca ptr, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], ptr [[A_ADDR]], double [[B:%.*]], ptr [[B_ADDR]]) -; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(ptr [[V:%.*]], ptr [[AP]]) -; CHECK-NEXT: call void @llvm.va_end(ptr [[AP]]) +; CHECK-NEXT: call void @llvm.va_copy.p0(ptr [[V:%.*]], ptr [[AP]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[AP]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]]) ; CHECK-NEXT: call void @outlined_ir_func_1(i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]]) ; CHECK-NEXT: [[TMP_RELOAD:%.*]] = load i32, ptr [[TMP_LOC]], align 4 @@ -52,10 +52,10 @@ define i32 @func2(i32 %a, double %b, ptr %v, ...) nounwind { ; CHECK-NEXT: [[AP:%.*]] = alloca ptr, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], ptr [[A_ADDR]], double [[B:%.*]], ptr [[B_ADDR]]) -; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(ptr [[V:%.*]], ptr [[AP]]) -; CHECK-NEXT: call void @llvm.va_end(ptr [[AP]]) +; CHECK-NEXT: call void @llvm.va_copy.p0(ptr [[V:%.*]], ptr [[AP]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[AP]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]]) ; CHECK-NEXT: call void @outlined_ir_func_1(i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]]) ; CHECK-NEXT: [[TMP_RELOAD:%.*]] = load i32, ptr [[TMP_LOC]], align 4 diff --git a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll index 9f565de..2d52608 100644 --- a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll +++ b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll @@ -51,7 +51,7 @@ entry: ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 -; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr [[AP]], i32 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]]) ; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[V]], ptr [[AP]], i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]]) @@ -70,7 +70,7 @@ entry: ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 -; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr [[AP]], i32 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]]) ; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[V]], ptr [[AP]], i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]]) @@ -84,8 +84,8 @@ entry: ; CHECK-NEXT: newFuncRoot: ; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] ; CHECK: entry_to_outline: -; CHECK-NEXT: call void @llvm.va_copy(ptr [[TMP0]], ptr [[TMP1]]) -; CHECK-NEXT: call void @llvm.va_end(ptr [[TMP1]]) +; CHECK-NEXT: call void @llvm.va_copy.p0(ptr [[TMP0]], ptr [[TMP1]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[TMP1]]) ; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll index 50b0e7a..2f264a2 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -141,4 +141,4 @@ attributes #1 = { argmemonly nounwind } !5 = distinct !{!5, !"some domain"} !6 = !{!7} !7 = distinct !{!7, !5, !"some scope 2"} -!8 = !{i64 0, i64 8, null} +!8 = !{i64 0, i64 8, !0} diff --git a/llvm/test/Transforms/InstCombine/powi.ll b/llvm/test/Transforms/InstCombine/powi.ll index 43e34c8..6c0575e 100644 --- a/llvm/test/Transforms/InstCombine/powi.ll +++ b/llvm/test/Transforms/InstCombine/powi.ll @@ -313,7 +313,7 @@ define double @fdiv_pow_powi(double %x) { ; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc nnan double [[X:%.*]], [[X]] ; CHECK-NEXT: ret double [[DIV]] ; - %p1 = call double @llvm.powi.f64.i32(double %x, i32 3) + %p1 = call reassoc double @llvm.powi.f64.i32(double %x, i32 3) %div = fdiv reassoc nnan double %p1, %x ret double %div } @@ -323,7 +323,7 @@ define float @fdiv_powf_powi(float %x) { ; CHECK-NEXT: [[DIV:%.*]] = call reassoc nnan float @llvm.powi.f32.i32(float [[X:%.*]], i32 99) ; CHECK-NEXT: ret float [[DIV]] ; - %p1 = call float @llvm.powi.f32.i32(float %x, i32 100) + %p1 = call reassoc float @llvm.powi.f32.i32(float %x, i32 100) %div = fdiv reassoc nnan float %p1, %x ret float %div } @@ -347,10 +347,21 @@ define double @fdiv_pow_powi_multi_use(double %x) { define float @fdiv_powf_powi_missing_reassoc(float %x) { ; CHECK-LABEL: @fdiv_powf_powi_missing_reassoc( ; CHECK-NEXT: [[P1:%.*]] = call float @llvm.powi.f32.i32(float [[X:%.*]], i32 100) -; CHECK-NEXT: [[DIV:%.*]] = fdiv nnan float [[P1]], [[X]] +; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc nnan float [[P1]], [[X]] ; CHECK-NEXT: ret float [[DIV]] ; %p1 = call float @llvm.powi.f32.i32(float %x, i32 100) + %div = fdiv reassoc nnan float %p1, %x + ret float %div +} + +define float @fdiv_powf_powi_missing_reassoc1(float %x) { +; CHECK-LABEL: @fdiv_powf_powi_missing_reassoc1( +; CHECK-NEXT: [[P1:%.*]] = call reassoc float @llvm.powi.f32.i32(float [[X:%.*]], i32 100) +; CHECK-NEXT: [[DIV:%.*]] = fdiv nnan float [[P1]], [[X]] +; CHECK-NEXT: ret float [[DIV]] +; + %p1 = call reassoc float @llvm.powi.f32.i32(float %x, i32 100) %div = fdiv nnan float %p1, %x ret float %div } diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll index 996d2c0..d079c03 100644 --- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll +++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll @@ -75,7 +75,7 @@ entry: !1 = !{!"omnipotent char", !0} !2 = !{!5, !5, i64 0} !3 = !{i64 0, i64 4, !2} -!4 = !{i64 0, i64 8, null} +!4 = !{i64 0, i64 8, !2} !5 = !{!"float", !0} !6 = !{i64 0, i64 4, !2, i64 4, i64 4, !2} !7 = !{i64 0, i64 2, !2, i64 4, i64 6, !2} diff --git a/llvm/test/Transforms/NewGVN/pr31483.ll b/llvm/test/Transforms/NewGVN/pr31483.ll index 0e7461c..82e9a2a 100644 --- a/llvm/test/Transforms/NewGVN/pr31483.ll +++ b/llvm/test/Transforms/NewGVN/pr31483.ll @@ -41,7 +41,7 @@ define signext i32 @ham(ptr %arg, ptr %arg1) #0 { ; CHECK: bb22: ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb23: -; CHECK-NEXT: call void @llvm.va_end(ptr [[TMP]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[TMP]]) ; CHECK-NEXT: ret i32 undef ; bb: diff --git a/llvm/test/Transforms/Reassociate/vaarg_movable.ll b/llvm/test/Transforms/Reassociate/vaarg_movable.ll index 337877a..4e45b21 100644 --- a/llvm/test/Transforms/Reassociate/vaarg_movable.ll +++ b/llvm/test/Transforms/Reassociate/vaarg_movable.ll @@ -10,13 +10,13 @@ define i32 @func(i32 %dummy, ...) { ; ; CHECK-LABEL: @func( ; CHECK-NEXT: [[VARARGS:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: call void @llvm.va_start(ptr [[VARARGS]]) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VARARGS]]) ; CHECK-NEXT: [[V0:%.*]] = va_arg ptr [[VARARGS]], i32 ; CHECK-NEXT: [[V1:%.*]] = va_arg ptr [[VARARGS]], i32 ; CHECK-NEXT: [[V0_NEG:%.*]] = sub i32 0, [[V0]] ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[V0_NEG]], 1 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[V1]] -; CHECK-NEXT: call void @llvm.va_end(ptr [[VARARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VARARGS]]) ; CHECK-NEXT: ret i32 [[ADD]] ; %varargs = alloca ptr, align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll new file mode 100644 index 0000000..f376ca7 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ptr [[B:%.*]], ptr [[C:%.*]], i32 [[TMP0:%.*]], ptr [[A:%.*]], i1 [[TOBOOL3_NOT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[TOBOOL3_NOT]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16> +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i16> +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i1> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL3_NOT]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[TMP12]], <i32 16, i32 16, i32 16, i32 16> +; CHECK-NEXT: [[TMP14:%.*]] = ashr <4 x i32> [[TMP13]], <i32 16, i32 16, i32 16, i32 16> +; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16> +; CHECK-NEXT: br i1 true, label [[BB3]], label [[BB2]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i16> [ [[TMP5]], [[BB1]] ], [ [[TMP15]], [[BB2]] ] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i16> [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = sext i16 [[TMP17]] to i32 +; CHECK-NEXT: store i32 [[TMP18]], ptr [[B]], align 16 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i16> [[TMP16]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = sext i16 [[TMP19]] to i32 +; CHECK-NEXT: store i32 [[TMP20]], ptr [[A]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i16> [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 +; CHECK-NEXT: store i32 [[TMP22]], ptr [[C]], align 16 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i16> [[TMP16]], i32 3 +; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32 +; CHECK-NEXT: store i32 [[TMP24]], ptr [[B]], align 8 +; CHECK-NEXT: ret i32 0 +; +entry: + br i1 %tobool3.not, label %bb1, label %bb2 + +bb1: + %conv1.i.us = ashr i32 %0, 16 + %cmp2.i.us = icmp slt i32 %conv1.i.us, %0 + %sext26.us = zext i1 %cmp2.i.us to i32 + %conv1.i.us.5 = ashr i32 %0, 16 + %cmp2.i.us.5 = icmp slt i32 %conv1.i.us.5, %0 + %sext26.us.5 = zext i1 %cmp2.i.us.5 to i32 + %conv1.i.us.6 = ashr i32 %0, 16 + %cmp2.i.us.6 = icmp slt i32 %conv1.i.us.6, %0 + %sext26.us.6 = zext i1 %cmp2.i.us.6 to i32 + %conv1.i.us.7 = ashr i32 %0, 16 + %cmp2.i.us.7 = icmp slt i32 %conv1.i.us.7, %0 + %sext26.us.7 = zext i1 %cmp2.i.us.7 to i32 + br label %bb3 + +bb2: + %cmp2.i = icmp sgt i32 %0, 0 + %1 = zext i1 %cmp2.i to i32 + %cond.i = select i1 %tobool3.not, i32 %0, i32 %1 + %sext26 = shl i32 %cond.i, 16 + %conv13 = ashr i32 %sext26, 16 + %cmp2.i.5 = icmp sgt i32 %0, 0 + %2 = zext i1 %cmp2.i.5 to i32 + %cond.i.5 = select i1 %tobool3.not, i32 %0, i32 %2 + %sext26.5 = shl i32 %cond.i.5, 16 + %conv13.5 = ashr i32 %sext26.5, 16 + %cmp2.i.6 = icmp sgt i32 %0, 0 + %3 = zext i1 %cmp2.i.6 to i32 + %cond.i.6 = select i1 %tobool3.not, i32 %0, i32 %3 + %sext26.6 = shl i32 %cond.i.6, 16 + %conv13.6 = ashr i32 %sext26.6, 16 + %cmp2.i.7 = icmp sgt i32 %0, 0 + %4 = zext i1 %cmp2.i.7 to i32 + %cond.i.7 = select i1 %tobool3.not, i32 %0, i32 %4 + %sext26.7 = shl i32 %cond.i.7, 16 + %conv13.7 = ashr i32 %sext26.7, 16 + br i1 true, label %bb3, label %bb2 + +bb3: + %conv13p = phi i32 [ %sext26.us, %bb1 ], [ %conv13, %bb2 ] + %conv13.5p = phi i32 [ %sext26.us.5, %bb1 ], [ %conv13.5, %bb2 ] + %conv13.6p = phi i32 [ %sext26.us.6, %bb1 ], [ %conv13.6, %bb2 ] + %conv13.7p = phi i32 [ %sext26.us.7, %bb1 ], [ %conv13.7, %bb2 ] + store i32 %conv13p, ptr %b, align 16 + store i32 %conv13.5p, ptr %a, align 8 + store i32 %conv13.6p, ptr %c, align 16 + store i32 %conv13.7p, ptr %b, align 8 + ret i32 0 +} diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index 0fcd787..61034de 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -539,7 +539,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias !6 = !{!5, !5, i64 0} !7 = !{i64 0, i64 8, !6, i64 8, i64 4, !1} !8 = !{i64 0, i64 4, !1, i64 4, i64 8, !6} -!9 = !{i64 0, i64 8, !6, i64 4, i64 8, !1} +!9 = !{i64 0, i64 8, !6, i64 8, i64 8, !1} !10 = !{i64 0, i64 2, !1, i64 2, i64 2, !1} !11 = !{i64 0, i64 1, !1, i64 1, i64 3, !1} !12 = !{i64 0, i64 2, !1, i64 2, i64 6, !1} diff --git a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll index bbcdcb6..73ae66d 100644 --- a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll +++ b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll @@ -836,5 +836,6 @@ define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { !2 = !{ !"set2", !0 } !3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} } !4 = !{ float 4.0 } -!5 = !{ i64 0, i64 8, null } +!5 = !{ i64 0, i64 8, !6 } +!6 = !{ !1, !1, i64 0 } !13 = distinct !{} diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll index db7c5f5..87a70cc 100644 --- a/llvm/test/Transforms/Scalarizer/basic.ll +++ b/llvm/test/Transforms/Scalarizer/basic.ll @@ -870,5 +870,6 @@ define <2 x float> @f25(<2 x float> %src) { !2 = !{ !"set2", !0 } !3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} } !4 = !{ float 4.0 } -!5 = !{ i64 0, i64 8, null } +!5 = !{ i64 0, i64 8, !6 } +!6 = !{ !1, !1, i64 0 } !13 = distinct !{} diff --git a/llvm/test/Verifier/tbaa-struct.ll b/llvm/test/Verifier/tbaa-struct.ll index b8ddc7c..14c19a1 100644 --- a/llvm/test/Verifier/tbaa-struct.ll +++ b/llvm/test/Verifier/tbaa-struct.ll @@ -1,28 +1,36 @@ -; RUN: llvm-as < %s 2>&1 - -; FIXME: The verifer should reject the invalid !tbaa.struct nodes below. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s define void @test_overlapping_regions(ptr %a1) { +; CHECK: Overlapping tbaa.struct regions +; CHECK-NEXT: %ld = load i8, ptr %a1, align 1, !tbaa.struct !0 %ld = load i8, ptr %a1, align 1, !tbaa.struct !0 ret void } define void @test_size_not_integer(ptr %a1) { +; CHECK: Size must be a constant integer +; CHECK-NEXT: store i8 1, ptr %a1, align 1, !tbaa.struct !5 store i8 1, ptr %a1, align 1, !tbaa.struct !5 ret void } define void @test_offset_not_integer(ptr %a1, ptr %a2) { +; CHECK: Offset must be a constant integer +; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !6 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !6 ret void } define void @test_tbaa_missing(ptr %a1, ptr %a2) { +; CHECK: TBAA tag missing +; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !7 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !7 ret void } define void @test_tbaa_invalid(ptr %a1) { +; CHECK: Old-style TBAA is no longer allowed, use struct-path TBAA instead +; CHECK-NEXT: store i8 1, ptr %a1, align 1, !tbaa.struct !8 store i8 1, ptr %a1, align 1, !tbaa.struct !8 ret void } diff --git a/llvm/test/tools/llvm-lib/arm64ec-implib.test b/llvm/test/tools/llvm-lib/arm64ec-implib.test index 9ce53fe..e9987d0 100644 --- a/llvm/test/tools/llvm-lib/arm64ec-implib.test +++ b/llvm/test/tools/llvm-lib/arm64ec-implib.test @@ -14,6 +14,8 @@ ARMAP-NEXT: Archive EC map ARMAP-NEXT: #expname in test.dll ARMAP-NEXT: #funcexp in test.dll ARMAP-NEXT: #mangledfunc in test.dll +ARMAP-NEXT: #manglednonamefunc in test.dll +ARMAP-NEXT: #nonamefunc in test.dll ARMAP-NEXT: ?test_cpp_func@@$$hYAHPEAX@Z in test.dll ARMAP-NEXT: ?test_cpp_func@@YAHPEAX@Z in test.dll ARMAP-NEXT: __IMPORT_DESCRIPTOR_test in test.dll @@ -23,13 +25,19 @@ ARMAP-NEXT: __imp_aux_?test_cpp_func@@YAHPEAX@Z in test.dll ARMAP-NEXT: __imp_aux_expname in test.dll ARMAP-NEXT: __imp_aux_funcexp in test.dll ARMAP-NEXT: __imp_aux_mangledfunc in test.dll +ARMAP-NEXT: __imp_aux_manglednonamefunc in test.dll +ARMAP-NEXT: __imp_aux_nonamefunc in test.dll ARMAP-NEXT: __imp_dataexp in test.dll ARMAP-NEXT: __imp_expname in test.dll ARMAP-NEXT: __imp_funcexp in test.dll ARMAP-NEXT: __imp_mangledfunc in test.dll +ARMAP-NEXT: __imp_manglednonamefunc in test.dll +ARMAP-NEXT: __imp_nonamefunc in test.dll ARMAP-NEXT: expname in test.dll ARMAP-NEXT: funcexp in test.dll ARMAP-NEXT: mangledfunc in test.dll +ARMAP-NEXT: manglednonamefunc in test.dll +ARMAP-NEXT: nonamefunc in test.dll ARMAP-NEXT: test_NULL_THUNK_DATA in test.dll RUN: llvm-readobj test.lib | FileCheck -check-prefix=READOBJ %s @@ -95,6 +103,25 @@ READOBJ-NEXT: Type: data READOBJ-NEXT: Name type: name READOBJ-NEXT: Export name: dataexp READOBJ-NEXT: Symbol: __imp_dataexp +READOBJ-EMPTY: +READOBJ-NEXT: File: test.dll +READOBJ-NEXT: Format: COFF-import-file-ARM64EC +READOBJ-NEXT: Type: code +READOBJ-NEXT: Name type: ordinal +READOBJ-NEXT: Symbol: __imp_nonamefunc +READOBJ-NEXT: Symbol: nonamefunc +READOBJ-NEXT: Symbol: __imp_aux_nonamefunc +READOBJ-NEXT: Symbol: #nonamefunc +READOBJ-EMPTY: +READOBJ-NEXT: File: test.dll +READOBJ-NEXT: Format: COFF-import-file-ARM64EC +READOBJ-NEXT: Type: code +READOBJ-NEXT: Name type: ordinal +READOBJ-NEXT: Symbol: __imp_manglednonamefunc +READOBJ-NEXT: Symbol: manglednonamefunc +READOBJ-NEXT: Symbol: __imp_aux_manglednonamefunc +READOBJ-NEXT: Symbol: #manglednonamefunc + Using -machine:arm64x gives the same output. RUN: llvm-lib -machine:arm64x -def:test.def -out:testx.lib @@ -112,22 +139,28 @@ RUN: llvm-nm --print-armap testx.lib | FileCheck -check-prefix=ARMAPX %s ARMAPX: Archive map ARMAPX-NEXT: #mangledfunc in test.dll +ARMAPX-NEXT: #manglednonamefunc in test.dll ARMAPX-NEXT: ?test_cpp_func@@YAHPEAX@Z in test.dll ARMAPX-NEXT: __IMPORT_DESCRIPTOR_test in test.dll ARMAPX-NEXT: __NULL_IMPORT_DESCRIPTOR in test.dll ARMAPX-NEXT: __imp_#mangledfunc in test.dll +ARMAPX-NEXT: __imp_#manglednonamefunc in test.dll ARMAPX-NEXT: __imp_?test_cpp_func@@YAHPEAX@Z in test.dll ARMAPX-NEXT: __imp_dataexp in test.dll ARMAPX-NEXT: __imp_expname in test.dll ARMAPX-NEXT: __imp_funcexp in test.dll +ARMAPX-NEXT: __imp_nonamefunc in test.dll ARMAPX-NEXT: expname in test.dll ARMAPX-NEXT: funcexp in test.dll +ARMAPX-NEXT: nonamefunc in test.dll ARMAPX-NEXT: test_NULL_THUNK_DATA in test.dll ARMAPX-EMPTY: ARMAPX-NEXT: Archive EC map ARMAPX-NEXT: #expname in test.dll ARMAPX-NEXT: #funcexp in test.dll ARMAPX-NEXT: #mangledfunc in test.dll +ARMAPX-NEXT: #manglednonamefunc in test.dll +ARMAPX-NEXT: #nonamefunc in test.dll ARMAPX-NEXT: ?test_cpp_func@@$$hYAHPEAX@Z in test.dll ARMAPX-NEXT: ?test_cpp_func@@YAHPEAX@Z in test.dll ARMAPX-NEXT: __IMPORT_DESCRIPTOR_test in test.dll @@ -137,13 +170,19 @@ ARMAPX-NEXT: __imp_aux_?test_cpp_func@@YAHPEAX@Z in test.dll ARMAPX-NEXT: __imp_aux_expname in test.dll ARMAPX-NEXT: __imp_aux_funcexp in test.dll ARMAPX-NEXT: __imp_aux_mangledfunc in test.dll +ARMAPX-NEXT: __imp_aux_manglednonamefunc in test.dll +ARMAPX-NEXT: __imp_aux_nonamefunc in test.dll ARMAPX-NEXT: __imp_dataexp in test.dll ARMAPX-NEXT: __imp_expname in test.dll ARMAPX-NEXT: __imp_funcexp in test.dll ARMAPX-NEXT: __imp_mangledfunc in test.dll +ARMAPX-NEXT: __imp_manglednonamefunc in test.dll +ARMAPX-NEXT: __imp_nonamefunc in test.dll ARMAPX-NEXT: expname in test.dll ARMAPX-NEXT: funcexp in test.dll ARMAPX-NEXT: mangledfunc in test.dll +ARMAPX-NEXT: manglednonamefunc in test.dll +ARMAPX-NEXT: nonamefunc in test.dll ARMAPX-NEXT: test_NULL_THUNK_DATA in test.dll RUN: llvm-readobj testx.lib | FileCheck -check-prefix=READOBJX %s @@ -211,6 +250,24 @@ READOBJX-NEXT: Export name: dataexp READOBJX-NEXT: Symbol: __imp_dataexp READOBJX-EMPTY: READOBJX-NEXT: File: test.dll +READOBJX-NEXT: Format: COFF-import-file-ARM64EC +READOBJX-NEXT: Type: code +READOBJX-NEXT: Name type: ordinal +READOBJX-NEXT: Symbol: __imp_nonamefunc +READOBJX-NEXT: Symbol: nonamefunc +READOBJX-NEXT: Symbol: __imp_aux_nonamefunc +READOBJX-NEXT: Symbol: #nonamefunc +READOBJX-EMPTY: +READOBJX-NEXT: File: test.dll +READOBJX-NEXT: Format: COFF-import-file-ARM64EC +READOBJX-NEXT: Type: code +READOBJX-NEXT: Name type: ordinal +READOBJX-NEXT: Symbol: __imp_manglednonamefunc +READOBJX-NEXT: Symbol: manglednonamefunc +READOBJX-NEXT: Symbol: __imp_aux_manglednonamefunc +READOBJX-NEXT: Symbol: #manglednonamefunc +READOBJX-EMPTY: +READOBJX-NEXT: File: test.dll READOBJX-NEXT: Format: COFF-import-file-ARM64 READOBJX-NEXT: Type: code READOBJX-NEXT: Name type: name @@ -248,6 +305,20 @@ READOBJX-NEXT: Type: data READOBJX-NEXT: Name type: name READOBJX-NEXT: Export name: dataexp READOBJX-NEXT: Symbol: __imp_dataexp +READOBJX-EMPTY: +READOBJX-NEXT: File: test.dll +READOBJX-NEXT: Format: COFF-import-file-ARM64 +READOBJX-NEXT: Type: code +READOBJX-NEXT: Name type: ordinal +READOBJX-NEXT: Symbol: __imp_nonamefunc +READOBJX-NEXT: Symbol: nonamefunc +READOBJX-EMPTY: +READOBJX-NEXT: File: test.dll +READOBJX-NEXT: Format: COFF-import-file-ARM64 +READOBJX-NEXT: Type: code +READOBJX-NEXT: Name type: ordinal +READOBJX-NEXT: Symbol: __imp_#manglednonamefunc +READOBJX-NEXT: Symbol: #manglednonamefunc RUN: llvm-lib -machine:arm64ec -def:test.def -defArm64Native:test2.def -out:test2.lib @@ -266,6 +337,8 @@ ARMAPX2-NEXT: Archive EC map ARMAPX2-NEXT: #expname in test2.dll ARMAPX2-NEXT: #funcexp in test2.dll ARMAPX2-NEXT: #mangledfunc in test2.dll +ARMAPX2-NEXT: #manglednonamefunc in test2.dll +ARMAPX2-NEXT: #nonamefunc in test2.dll ARMAPX2-NEXT: ?test_cpp_func@@$$hYAHPEAX@Z in test2.dll ARMAPX2-NEXT: ?test_cpp_func@@YAHPEAX@Z in test2.dll ARMAPX2-NEXT: __IMPORT_DESCRIPTOR_test2 in test2.dll @@ -275,13 +348,19 @@ ARMAPX2-NEXT: __imp_aux_?test_cpp_func@@YAHPEAX@Z in test2.dll ARMAPX2-NEXT: __imp_aux_expname in test2.dll ARMAPX2-NEXT: __imp_aux_funcexp in test2.dll ARMAPX2-NEXT: __imp_aux_mangledfunc in test2.dll +ARMAPX2-NEXT: __imp_aux_manglednonamefunc in test2.dll +ARMAPX2-NEXT: __imp_aux_nonamefunc in test2.dll ARMAPX2-NEXT: __imp_dataexp in test2.dll ARMAPX2-NEXT: __imp_expname in test2.dll ARMAPX2-NEXT: __imp_funcexp in test2.dll ARMAPX2-NEXT: __imp_mangledfunc in test2.dll +ARMAPX2-NEXT: __imp_manglednonamefunc in test2.dll +ARMAPX2-NEXT: __imp_nonamefunc in test2.dll ARMAPX2-NEXT: expname in test2.dll ARMAPX2-NEXT: funcexp in test2.dll ARMAPX2-NEXT: mangledfunc in test2.dll +ARMAPX2-NEXT: manglednonamefunc in test2.dll +ARMAPX2-NEXT: nonamefunc in test2.dll ARMAPX2-NEXT: test2_NULL_THUNK_DATA in test2.dll ARMAPX2: test2.dll: @@ -312,6 +391,18 @@ ARMAPX2-NEXT: test2.dll: ARMAPX2-NEXT: 00000000 D __imp_dataexp ARMAPX2-EMPTY: ARMAPX2-NEXT: test2.dll: +ARMAPX2-NEXT: 00000000 T #nonamefunc +ARMAPX2-NEXT: 00000000 T __imp_aux_nonamefunc +ARMAPX2-NEXT: 00000000 T __imp_nonamefunc +ARMAPX2-NEXT: 00000000 T nonamefunc +ARMAPX2-EMPTY: +ARMAPX2-NEXT: test2.dll: +ARMAPX2-NEXT: 00000000 T #manglednonamefunc +ARMAPX2-NEXT: 00000000 T __imp_aux_manglednonamefunc +ARMAPX2-NEXT: 00000000 T __imp_manglednonamefunc +ARMAPX2-NEXT: 00000000 T manglednonamefunc +ARMAPX2-EMPTY: +ARMAPX2-NEXT: test2.dll: ARMAPX2-NEXT: 00000000 T __imp_otherfunc ARMAPX2-NEXT: 00000000 T otherfunc @@ -406,6 +497,8 @@ EXPORTS ?test_cpp_func@@YAHPEAX@Z expname=impname dataexp DATA + nonamefunc @1 NONAME + #manglednonamefunc @2 NONAME #--- test2.def LIBRARY test2.dll diff --git a/llvm/unittests/Object/GOFFObjectFileTest.cpp b/llvm/unittests/Object/GOFFObjectFileTest.cpp index 734dac6..69f60d0 100644 --- a/llvm/unittests/Object/GOFFObjectFileTest.cpp +++ b/llvm/unittests/Object/GOFFObjectFileTest.cpp @@ -502,3 +502,100 @@ TEST(GOFFObjectFileTest, InvalidERSymbolType) { FailedWithMessage("ESD record 1 has unknown Executable type 0x03")); } } + +TEST(GOFFObjectFileTest, TXTConstruct) { + char GOFFData[GOFF::RecordLength * 6] = {}; + + // HDR record. + GOFFData[0] = 0x03; + GOFFData[1] = 0xF0; + GOFFData[50] = 0x01; + + // ESD record. + GOFFData[GOFF::RecordLength] = 0x03; + GOFFData[GOFF::RecordLength + 7] = 0x01; // ESDID. + GOFFData[GOFF::RecordLength + 71] = 0x05; // Size of symbol name. + GOFFData[GOFF::RecordLength + 72] = 0xa5; // Symbol name is v. + GOFFData[GOFF::RecordLength + 73] = 0x81; // Symbol name is a. + GOFFData[GOFF::RecordLength + 74] = 0x99; // Symbol name is r. + GOFFData[GOFF::RecordLength + 75] = 0x7b; // Symbol name is #. + GOFFData[GOFF::RecordLength + 76] = 0x83; // Symbol name is c. + + // ESD record. + GOFFData[GOFF::RecordLength * 2] = 0x03; + GOFFData[GOFF::RecordLength * 2 + 3] = 0x01; + GOFFData[GOFF::RecordLength * 2 + 7] = 0x02; // ESDID. + GOFFData[GOFF::RecordLength * 2 + 11] = 0x01; // Parent ESDID. + GOFFData[GOFF::RecordLength * 2 + 27] = 0x08; // Length. + GOFFData[GOFF::RecordLength * 2 + 40] = 0x01; // Name Space ID. + GOFFData[GOFF::RecordLength * 2 + 41] = 0x80; + GOFFData[GOFF::RecordLength * 2 + 60] = 0x04; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 61] = 0x04; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 63] = 0x0a; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 66] = 0x03; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 71] = 0x08; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 72] = 0xc3; // Symbol name is c. + GOFFData[GOFF::RecordLength * 2 + 73] = 0x6d; // Symbol name is _. + GOFFData[GOFF::RecordLength * 2 + 74] = 0xc3; // Symbol name is c. + GOFFData[GOFF::RecordLength * 2 + 75] = 0xd6; // Symbol name is o. + GOFFData[GOFF::RecordLength * 2 + 76] = 0xc4; // Symbol name is D. + GOFFData[GOFF::RecordLength * 2 + 77] = 0xc5; // Symbol name is E. + GOFFData[GOFF::RecordLength * 2 + 78] = 0xf6; // Symbol name is 6. + GOFFData[GOFF::RecordLength * 2 + 79] = 0xf4; // Symbol name is 4. + + // ESD record. + GOFFData[GOFF::RecordLength * 3] = 0x03; + GOFFData[GOFF::RecordLength * 3 + 3] = 0x02; + GOFFData[GOFF::RecordLength * 3 + 7] = 0x03; // ESDID. + GOFFData[GOFF::RecordLength * 3 + 11] = 0x02; // Parent ESDID. + GOFFData[GOFF::RecordLength * 3 + 71] = 0x05; // Size of symbol name. + GOFFData[GOFF::RecordLength * 3 + 72] = 0xa5; // Symbol name is v. + GOFFData[GOFF::RecordLength * 3 + 73] = 0x81; // Symbol name is a. + GOFFData[GOFF::RecordLength * 3 + 74] = 0x99; // Symbol name is r. + GOFFData[GOFF::RecordLength * 3 + 75] = 0x7b; // Symbol name is #. + GOFFData[GOFF::RecordLength * 3 + 76] = 0x83; // Symbol name is c. + + // TXT record. + GOFFData[GOFF::RecordLength * 4] = 0x03; + GOFFData[GOFF::RecordLength * 4 + 1] = 0x10; + GOFFData[GOFF::RecordLength * 4 + 7] = 0x02; + GOFFData[GOFF::RecordLength * 4 + 23] = 0x08; // Data Length. + GOFFData[GOFF::RecordLength * 4 + 24] = 0x12; + GOFFData[GOFF::RecordLength * 4 + 25] = 0x34; + GOFFData[GOFF::RecordLength * 4 + 26] = 0x56; + GOFFData[GOFF::RecordLength * 4 + 27] = 0x78; + GOFFData[GOFF::RecordLength * 4 + 28] = 0x9a; + GOFFData[GOFF::RecordLength * 4 + 29] = 0xbc; + GOFFData[GOFF::RecordLength * 4 + 30] = 0xde; + GOFFData[GOFF::RecordLength * 4 + 31] = 0xf0; + + // END record. + GOFFData[GOFF::RecordLength * 5] = 0x03; + GOFFData[GOFF::RecordLength * 5 + 1] = 0x40; + GOFFData[GOFF::RecordLength * 5 + 11] = 0x06; + + StringRef Data(GOFFData, GOFF::RecordLength * 6); + + Expected<std::unique_ptr<ObjectFile>> GOFFObjOrErr = + object::ObjectFile::createGOFFObjectFile( + MemoryBufferRef(Data, "dummyGOFF")); + + ASSERT_THAT_EXPECTED(GOFFObjOrErr, Succeeded()); + + GOFFObjectFile *GOFFObj = dyn_cast<GOFFObjectFile>((*GOFFObjOrErr).get()); + auto Symbols = GOFFObj->symbols(); + ASSERT_EQ(std::distance(Symbols.begin(), Symbols.end()), 1); + SymbolRef Symbol = *Symbols.begin(); + Expected<StringRef> SymbolNameOrErr = GOFFObj->getSymbolName(Symbol); + ASSERT_THAT_EXPECTED(SymbolNameOrErr, Succeeded()); + StringRef SymbolName = SymbolNameOrErr.get(); + EXPECT_EQ(SymbolName, "var#c"); + + auto Sections = GOFFObj->sections(); + ASSERT_EQ(std::distance(Sections.begin(), Sections.end()), 1); + SectionRef Section = *Sections.begin(); + Expected<StringRef> SectionContent = Section.getContents(); + ASSERT_THAT_EXPECTED(SectionContent, Succeeded()); + StringRef Contents = SectionContent.get(); + EXPECT_EQ(Contents, "\x12\x34\x56\x78\x9a\xbc\xde\xf0"); +} diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index a7d0b16..2c72a72 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -2347,13 +2347,6 @@ AArch64ExtensionDependenciesBaseArchTestParams {}, {"aes", "sha2", "sha3", "sm4"}}, - // +sve implies +f32mm if the base architecture is v8.6A+ or v9.1A+, but - // not earlier architectures. - {AArch64::ARMV8_5A, {"sve"}, {"sve"}, {"f32mm"}}, - {AArch64::ARMV9A, {"sve"}, {"sve"}, {"f32mm"}}, - {AArch64::ARMV8_6A, {"sve"}, {"sve", "f32mm"}, {}}, - {AArch64::ARMV9_1A, {"sve"}, {"sve", "f32mm"}, {}}, - // +fp16 implies +fp16fml for v8.4A+, but not v9.0-A+ {AArch64::ARMV8_3A, {"fp16"}, {"fullfp16"}, {"fp16fml"}}, {AArch64::ARMV9A, {"fp16"}, {"fullfp16"}, {"fp16fml"}}, @@ -2520,10 +2513,10 @@ AArch64ExtensionDependenciesBaseCPUTestParams {}}, {"cortex-a520", {}, - {"v9.2a", "bf16", "crc", "dotprod", "f32mm", "flagm", - "fp-armv8", "fullfp16", "fp16fml", "i8mm", "lse", "mte", - "pauth", "perfmon", "predres", "ras", "rcpc", "rdm", - "sb", "neon", "ssbs", "sve", "sve2-bitperm", "sve2"}, + {"v9.2a", "bf16", "crc", "dotprod", "flagm", "fp-armv8", + "fullfp16", "fp16fml", "i8mm", "lse", "mte", "pauth", + "perfmon", "predres", "ras", "rcpc", "rdm", "sb", + "neon", "ssbs", "sve", "sve2-bitperm", "sve2"}, {}}, // Negative modifiers diff --git a/llvm/utils/TableGen/Common/CMakeLists.txt b/llvm/utils/TableGen/Common/CMakeLists.txt index 0440f02..c31ed5a 100644 --- a/llvm/utils/TableGen/Common/CMakeLists.txt +++ b/llvm/utils/TableGen/Common/CMakeLists.txt @@ -12,10 +12,12 @@ set(LLVM_LINK_COMPONENTS add_llvm_library(LLVMTableGenCommon STATIC OBJECT EXCLUDE_FROM_ALL GlobalISel/CodeExpander.cpp + GlobalISel/CombinerUtils.cpp GlobalISel/CXXPredicates.cpp GlobalISel/GlobalISelMatchTable.cpp GlobalISel/GlobalISelMatchTableExecutorEmitter.cpp GlobalISel/MatchDataInfo.cpp + GlobalISel/PatternParser.cpp GlobalISel/Patterns.cpp AsmWriterInst.cpp diff --git a/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.cpp b/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.cpp new file mode 100644 index 0000000..37e6306 --- /dev/null +++ b/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.cpp @@ -0,0 +1,23 @@ +//===- CombinerUtils.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CombinerUtils.h" +#include "llvm/ADT/StringSet.h" + +namespace llvm { + +StringRef insertStrRef(StringRef S) { + if (S.empty()) + return {}; + + static StringSet<> Pool; + auto [It, Inserted] = Pool.insert(S); + return It->getKey(); +} + +} // namespace llvm diff --git a/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.h b/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.h index 8cb2514..82a64c6 100644 --- a/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.h +++ b/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.h @@ -65,6 +65,10 @@ inline const DagInit *getDagWithOperatorOfSubClass(const Init &N, return I; return nullptr; } + +/// Copies a StringRef into a static pool to preserve it. +StringRef insertStrRef(StringRef S); + } // namespace llvm #endif diff --git a/llvm/utils/TableGen/Common/GlobalISel/PatternParser.cpp b/llvm/utils/TableGen/Common/GlobalISel/PatternParser.cpp new file mode 100644 index 0000000..1d6c4c7 --- /dev/null +++ b/llvm/utils/TableGen/Common/GlobalISel/PatternParser.cpp @@ -0,0 +1,462 @@ +//===- PatternParser.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Common/GlobalISel/PatternParser.h" +#include "Basic/CodeGenIntrinsics.h" +#include "Common/CodeGenTarget.h" +#include "Common/GlobalISel/CombinerUtils.h" +#include "Common/GlobalISel/Patterns.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/SaveAndRestore.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" + +namespace llvm { +namespace gi { +static constexpr StringLiteral MIFlagsEnumClassName = "MIFlagEnum"; + +namespace { +class PrettyStackTraceParse : public PrettyStackTraceEntry { + const Record &Def; + +public: + PrettyStackTraceParse(const Record &Def) : Def(Def) {} + + void print(raw_ostream &OS) const override { + if (Def.isSubClassOf("GICombineRule")) + OS << "Parsing GICombineRule '" << Def.getName() << '\''; + else if (Def.isSubClassOf(PatFrag::ClassName)) + OS << "Parsing " << PatFrag::ClassName << " '" << Def.getName() << '\''; + else + OS << "Parsing '" << Def.getName() << '\''; + OS << '\n'; + } +}; +} // namespace + +bool PatternParser::parsePatternList( + const DagInit &List, + function_ref<bool(std::unique_ptr<Pattern>)> ParseAction, + StringRef Operator, StringRef AnonPatNamePrefix) { + if (List.getOperatorAsDef(DiagLoc)->getName() != Operator) { + PrintError(DiagLoc, "Expected " + Operator + " operator"); + return false; + } + + if (List.getNumArgs() == 0) { + PrintError(DiagLoc, Operator + " pattern list is empty"); + return false; + } + + // The match section consists of a list of matchers and predicates. Parse each + // one and add the equivalent GIMatchDag nodes, predicates, and edges. + for (unsigned I = 0; I < List.getNumArgs(); ++I) { + Init *Arg = List.getArg(I); + std::string Name = List.getArgName(I) + ? List.getArgName(I)->getValue().str() + : ("__" + AnonPatNamePrefix + "_" + Twine(I)).str(); + + if (auto Pat = parseInstructionPattern(*Arg, Name)) { + if (!ParseAction(std::move(Pat))) + return false; + continue; + } + + if (auto Pat = parseWipMatchOpcodeMatcher(*Arg, Name)) { + if (!ParseAction(std::move(Pat))) + return false; + continue; + } + + // Parse arbitrary C++ code + if (const auto *StringI = dyn_cast<StringInit>(Arg)) { + auto CXXPat = std::make_unique<CXXPattern>(*StringI, insertStrRef(Name)); + if (!ParseAction(std::move(CXXPat))) + return false; + continue; + } + + PrintError(DiagLoc, + "Failed to parse pattern: '" + Arg->getAsString() + '\''); + return false; + } + + return true; +} + +static const CodeGenInstruction & +getInstrForIntrinsic(const CodeGenTarget &CGT, const CodeGenIntrinsic *I) { + StringRef Opc; + if (I->isConvergent) { + Opc = I->hasSideEffects ? "G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS" + : "G_INTRINSIC_CONVERGENT"; + } else { + Opc = I->hasSideEffects ? "G_INTRINSIC_W_SIDE_EFFECTS" : "G_INTRINSIC"; + } + + RecordKeeper &RK = I->TheDef->getRecords(); + return CGT.getInstruction(RK.getDef(Opc)); +} + +static const CodeGenIntrinsic *getCodeGenIntrinsic(Record *R) { + // Intrinsics need to have a static lifetime because the match table keeps + // references to CodeGenIntrinsic objects. + static DenseMap<const Record *, std::unique_ptr<CodeGenIntrinsic>> + AllIntrinsics; + + auto &Ptr = AllIntrinsics[R]; + if (!Ptr) + Ptr = std::make_unique<CodeGenIntrinsic>(R, std::vector<Record *>()); + return Ptr.get(); +} + +std::unique_ptr<Pattern> +PatternParser::parseInstructionPattern(const Init &Arg, StringRef Name) { + const DagInit *DagPat = dyn_cast<DagInit>(&Arg); + if (!DagPat) + return nullptr; + + std::unique_ptr<InstructionPattern> Pat; + if (const DagInit *IP = getDagWithOperatorOfSubClass(Arg, "Instruction")) { + auto &Instr = CGT.getInstruction(IP->getOperatorAsDef(DiagLoc)); + Pat = + std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name)); + } else if (const DagInit *IP = + getDagWithOperatorOfSubClass(Arg, "Intrinsic")) { + Record *TheDef = IP->getOperatorAsDef(DiagLoc); + const CodeGenIntrinsic *Intrin = getCodeGenIntrinsic(TheDef); + const CodeGenInstruction &Instr = getInstrForIntrinsic(CGT, Intrin); + Pat = + std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name)); + cast<CodeGenInstructionPattern>(*Pat).setIntrinsic(Intrin); + } else if (const DagInit *PFP = + getDagWithOperatorOfSubClass(Arg, PatFrag::ClassName)) { + const Record *Def = PFP->getOperatorAsDef(DiagLoc); + const PatFrag *PF = parsePatFrag(Def); + if (!PF) + return nullptr; // Already diagnosed by parsePatFrag + Pat = std::make_unique<PatFragPattern>(*PF, insertStrRef(Name)); + } else if (const DagInit *BP = + getDagWithOperatorOfSubClass(Arg, BuiltinPattern::ClassName)) { + Pat = std::make_unique<BuiltinPattern>(*BP->getOperatorAsDef(DiagLoc), + insertStrRef(Name)); + } else + return nullptr; + + for (unsigned K = 0; K < DagPat->getNumArgs(); ++K) { + Init *Arg = DagPat->getArg(K); + if (auto *DagArg = getDagWithSpecificOperator(*Arg, "MIFlags")) { + if (!parseInstructionPatternMIFlags(*Pat, DagArg)) + return nullptr; + continue; + } + + if (!parseInstructionPatternOperand(*Pat, Arg, DagPat->getArgName(K))) + return nullptr; + } + + if (!Pat->checkSemantics(DiagLoc)) + return nullptr; + + return std::move(Pat); +} + +std::unique_ptr<Pattern> +PatternParser::parseWipMatchOpcodeMatcher(const Init &Arg, StringRef Name) { + const DagInit *Matcher = getDagWithSpecificOperator(Arg, "wip_match_opcode"); + if (!Matcher) + return nullptr; + + if (Matcher->getNumArgs() == 0) { + PrintError(DiagLoc, "Empty wip_match_opcode"); + return nullptr; + } + + // Each argument is an opcode that can match. + auto Result = std::make_unique<AnyOpcodePattern>(insertStrRef(Name)); + for (const auto &Arg : Matcher->getArgs()) { + Record *OpcodeDef = getDefOfSubClass(*Arg, "Instruction"); + if (OpcodeDef) { + Result->addOpcode(&CGT.getInstruction(OpcodeDef)); + continue; + } + + PrintError(DiagLoc, "Arguments to wip_match_opcode must be instructions"); + return nullptr; + } + + return std::move(Result); +} + +bool PatternParser::parseInstructionPatternOperand(InstructionPattern &IP, + const Init *OpInit, + const StringInit *OpName) { + const auto ParseErr = [&]() { + PrintError(DiagLoc, + "cannot parse operand '" + OpInit->getAsUnquotedString() + "' "); + if (OpName) + PrintNote(DiagLoc, + "operand name is '" + OpName->getAsUnquotedString() + '\''); + return false; + }; + + // untyped immediate, e.g. 0 + if (const auto *IntImm = dyn_cast<IntInit>(OpInit)) { + std::string Name = OpName ? OpName->getAsUnquotedString() : ""; + IP.addOperand(IntImm->getValue(), insertStrRef(Name), PatternType()); + return true; + } + + // typed immediate, e.g. (i32 0) + if (const auto *DagOp = dyn_cast<DagInit>(OpInit)) { + if (DagOp->getNumArgs() != 1) + return ParseErr(); + + const Record *TyDef = DagOp->getOperatorAsDef(DiagLoc); + auto ImmTy = PatternType::get(DiagLoc, TyDef, + "cannot parse immediate '" + + DagOp->getAsUnquotedString() + '\''); + if (!ImmTy) + return false; + + if (!IP.hasAllDefs()) { + PrintError(DiagLoc, "out operand of '" + IP.getInstName() + + "' cannot be an immediate"); + return false; + } + + const auto *Val = dyn_cast<IntInit>(DagOp->getArg(0)); + if (!Val) + return ParseErr(); + + std::string Name = OpName ? OpName->getAsUnquotedString() : ""; + IP.addOperand(Val->getValue(), insertStrRef(Name), *ImmTy); + return true; + } + + // Typed operand e.g. $x/$z in (G_FNEG $x, $z) + if (auto *DefI = dyn_cast<DefInit>(OpInit)) { + if (!OpName) { + PrintError(DiagLoc, "expected an operand name after '" + + OpInit->getAsString() + '\''); + return false; + } + const Record *Def = DefI->getDef(); + auto Ty = PatternType::get(DiagLoc, Def, "cannot parse operand type"); + if (!Ty) + return false; + IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), *Ty); + return true; + } + + // Untyped operand e.g. $x/$z in (G_FNEG $x, $z) + if (isa<UnsetInit>(OpInit)) { + assert(OpName && "Unset w/ no OpName?"); + IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), PatternType()); + return true; + } + + return ParseErr(); +} + +bool PatternParser::parseInstructionPatternMIFlags(InstructionPattern &IP, + const DagInit *Op) { + auto *CGIP = dyn_cast<CodeGenInstructionPattern>(&IP); + if (!CGIP) { + PrintError(DiagLoc, + "matching/writing MIFlags is only allowed on CodeGenInstruction " + "patterns"); + return false; + } + + const auto CheckFlagEnum = [&](const Record *R) { + if (!R->isSubClassOf(MIFlagsEnumClassName)) { + PrintError(DiagLoc, "'" + R->getName() + "' is not a subclass of '" + + MIFlagsEnumClassName + "'"); + return false; + } + + return true; + }; + + if (CGIP->getMIFlagsInfo()) { + PrintError(DiagLoc, "MIFlags can only be present once on an instruction"); + return false; + } + + auto &FI = CGIP->getOrCreateMIFlagsInfo(); + for (unsigned K = 0; K < Op->getNumArgs(); ++K) { + const Init *Arg = Op->getArg(K); + + // Match/set a flag: (MIFlags FmNoNans) + if (const auto *Def = dyn_cast<DefInit>(Arg)) { + const Record *R = Def->getDef(); + if (!CheckFlagEnum(R)) + return false; + + FI.addSetFlag(R); + continue; + } + + // Do not match a flag/unset a flag: (MIFlags (not FmNoNans)) + if (const DagInit *NotDag = getDagWithSpecificOperator(*Arg, "not")) { + for (const Init *NotArg : NotDag->getArgs()) { + const DefInit *DefArg = dyn_cast<DefInit>(NotArg); + if (!DefArg) { + PrintError(DiagLoc, "cannot parse '" + NotArg->getAsUnquotedString() + + "': expected a '" + MIFlagsEnumClassName + + "'"); + return false; + } + + const Record *R = DefArg->getDef(); + if (!CheckFlagEnum(R)) + return false; + + FI.addUnsetFlag(R); + continue; + } + + continue; + } + + // Copy flags from a matched instruction: (MIFlags $mi) + if (isa<UnsetInit>(Arg)) { + FI.addCopyFlag(insertStrRef(Op->getArgName(K)->getAsUnquotedString())); + continue; + } + } + + return true; +} + +std::unique_ptr<PatFrag> PatternParser::parsePatFragImpl(const Record *Def) { + auto StackTrace = PrettyStackTraceParse(*Def); + if (!Def->isSubClassOf(PatFrag::ClassName)) + return nullptr; + + const DagInit *Ins = Def->getValueAsDag("InOperands"); + if (Ins->getOperatorAsDef(Def->getLoc())->getName() != "ins") { + PrintError(Def, "expected 'ins' operator for " + PatFrag::ClassName + + " in operands list"); + return nullptr; + } + + const DagInit *Outs = Def->getValueAsDag("OutOperands"); + if (Outs->getOperatorAsDef(Def->getLoc())->getName() != "outs") { + PrintError(Def, "expected 'outs' operator for " + PatFrag::ClassName + + " out operands list"); + return nullptr; + } + + auto Result = std::make_unique<PatFrag>(*Def); + if (!parsePatFragParamList(*Outs, [&](StringRef Name, unsigned Kind) { + Result->addOutParam(insertStrRef(Name), (PatFrag::ParamKind)Kind); + return true; + })) + return nullptr; + + if (!parsePatFragParamList(*Ins, [&](StringRef Name, unsigned Kind) { + Result->addInParam(insertStrRef(Name), (PatFrag::ParamKind)Kind); + return true; + })) + return nullptr; + + const ListInit *Alts = Def->getValueAsListInit("Alternatives"); + unsigned AltIdx = 0; + for (const Init *Alt : *Alts) { + const auto *PatDag = dyn_cast<DagInit>(Alt); + if (!PatDag) { + PrintError(Def, "expected dag init for PatFrag pattern alternative"); + return nullptr; + } + + PatFrag::Alternative &A = Result->addAlternative(); + const auto AddPat = [&](std::unique_ptr<Pattern> Pat) { + A.Pats.push_back(std::move(Pat)); + return true; + }; + + SaveAndRestore<ArrayRef<SMLoc>> DiagLocSAR(DiagLoc, Def->getLoc()); + if (!parsePatternList( + *PatDag, AddPat, "pattern", + /*AnonPatPrefix*/ + (Def->getName() + "_alt" + Twine(AltIdx++) + "_pattern").str())) + return nullptr; + } + + if (!Result->buildOperandsTables() || !Result->checkSemantics()) + return nullptr; + + return Result; +} + +bool PatternParser::parsePatFragParamList( + const DagInit &OpsList, + function_ref<bool(StringRef, unsigned)> ParseAction) { + for (unsigned K = 0; K < OpsList.getNumArgs(); ++K) { + const StringInit *Name = OpsList.getArgName(K); + const Init *Ty = OpsList.getArg(K); + + if (!Name) { + PrintError(DiagLoc, "all operands must be named'"); + return false; + } + const std::string NameStr = Name->getAsUnquotedString(); + + PatFrag::ParamKind OpKind; + if (isSpecificDef(*Ty, "gi_imm")) + OpKind = PatFrag::PK_Imm; + else if (isSpecificDef(*Ty, "root")) + OpKind = PatFrag::PK_Root; + else if (isa<UnsetInit>(Ty) || + isSpecificDef(*Ty, "gi_mo")) // no type = gi_mo. + OpKind = PatFrag::PK_MachineOperand; + else { + PrintError( + DiagLoc, + '\'' + NameStr + + "' operand type was expected to be 'root', 'gi_imm' or 'gi_mo'"); + return false; + } + + if (!ParseAction(NameStr, (unsigned)OpKind)) + return false; + } + + return true; +} + +const PatFrag *PatternParser::parsePatFrag(const Record *Def) { + // Cache already parsed PatFrags to avoid doing extra work. + static DenseMap<const Record *, std::unique_ptr<PatFrag>> ParsedPatFrags; + + auto It = ParsedPatFrags.find(Def); + if (It != ParsedPatFrags.end()) { + SeenPatFrags.insert(It->second.get()); + return It->second.get(); + } + + std::unique_ptr<PatFrag> NewPatFrag = parsePatFragImpl(Def); + if (!NewPatFrag) { + PrintError(Def, "Could not parse " + PatFrag::ClassName + " '" + + Def->getName() + "'"); + // Put a nullptr in the map so we don't attempt parsing this again. + ParsedPatFrags[Def] = nullptr; + return nullptr; + } + + const auto *Res = NewPatFrag.get(); + ParsedPatFrags[Def] = std::move(NewPatFrag); + SeenPatFrags.insert(Res); + return Res; +} + +} // namespace gi +} // namespace llvm diff --git a/llvm/utils/TableGen/Common/GlobalISel/PatternParser.h b/llvm/utils/TableGen/Common/GlobalISel/PatternParser.h new file mode 100644 index 0000000..cd6f524 --- /dev/null +++ b/llvm/utils/TableGen/Common/GlobalISel/PatternParser.h @@ -0,0 +1,118 @@ +//===- PatternParser.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Contains tools to parse MIR patterns from TableGen DAG elements. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_GLOBALISEL_PATTERNPARSER_H +#define LLVM_UTILS_GLOBALISEL_PATTERNPARSER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/SMLoc.h" +#include <memory> + +namespace llvm { +class CodeGenTarget; +class DagInit; +class Init; +class Record; +class StringRef; +class StringInit; + +namespace gi { +class InstructionPattern; +class Pattern; +class PatFrag; + +/// Helper class to parse MIR Pattern lists. +/// +/// e.g., `(match (G_FADD $x, $y, $z), (G_FNEG $y, $z))` +class PatternParser { + const CodeGenTarget &CGT; + ArrayRef<SMLoc> DiagLoc; + + mutable SmallPtrSet<const PatFrag *, 2> SeenPatFrags; + +public: + PatternParser(const CodeGenTarget &CGT, ArrayRef<SMLoc> DiagLoc) + : CGT(CGT), DiagLoc(DiagLoc) {} + + /// Parses a list of patterns such as: + /// (Operator (Pattern1 ...), (Pattern2 ...)) + /// \param List DagInit of the expected pattern list. + /// \param ParseAction Callback to handle a succesfully parsed pattern. + /// \param Operator The name of the operator, e.g. "match" + /// \param AnonPatNamePrefix Prefix for anonymous pattern names. + /// \return true on success, false on failure. + bool + parsePatternList(const DagInit &List, + function_ref<bool(std::unique_ptr<Pattern>)> ParseAction, + StringRef Operator, StringRef AnonPatNamePrefix); + + /// \returns all PatFrags encountered by this PatternParser. + const auto &getSeenPatFrags() const { return SeenPatFrags; } + +private: + /// Parse any InstructionPattern from a TableGen Init. + /// \param Arg Init to parse. + /// \param PatName Name of the pattern that will be parsed. + /// \return the parsed pattern on success, nullptr on failure. + std::unique_ptr<Pattern> parseInstructionPattern(const Init &Arg, + StringRef PatName); + + /// Parse a WipOpcodeMatcher from a TableGen Init. + /// \param Arg Init to parse. + /// \param PatName Name of the pattern that will be parsed. + /// \return the parsed pattern on success, nullptr on failure. + std::unique_ptr<Pattern> parseWipMatchOpcodeMatcher(const Init &Arg, + StringRef PatName); + + /// Parses an Operand of an InstructionPattern from a TableGen Init. + /// \param IP InstructionPattern for which we're parsing. + /// \param OpInit Init to parse. + /// \param OpName Name of the operand to parse. + /// \return true on success, false on failure. + bool parseInstructionPatternOperand(InstructionPattern &IP, + const Init *OpInit, + const StringInit *OpName); + + /// Parses a MIFlag for an InstructionPattern from a TableGen Init. + /// \param IP InstructionPattern for which we're parsing. + /// \param Op Init to parse. + /// \return true on success, false on failure. + bool parseInstructionPatternMIFlags(InstructionPattern &IP, + const DagInit *Op); + + /// (Uncached) PatFrag parsing implementation. + /// \param Def PatFrag def to parsee. + /// \return the parsed PatFrag on success, nullptr on failure. + std::unique_ptr<PatFrag> parsePatFragImpl(const Record *Def); + + /// Parses the in or out parameter list of a PatFrag. + /// \param OpsList Init to parse. + /// \param ParseAction Callback on successful parse, with the name of + /// the parameter and its \ref PatFrag::ParamKind + /// \return true on success, false on failure. + bool + parsePatFragParamList(const DagInit &OpsList, + function_ref<bool(StringRef, unsigned)> ParseAction); + + /// Cached PatFrag parser. This avoids duplicate work by keeping track of + /// already-parsed PatFrags. + /// \param Def PatFrag def to parsee. + /// \return the parsed PatFrag on success, nullptr on failure. + const PatFrag *parsePatFrag(const Record *Def); +}; + +} // namespace gi +} // namespace llvm + +#endif diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index 39b9f8a..1ae6efd 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -36,6 +36,7 @@ #include "Common/GlobalISel/GlobalISelMatchTable.h" #include "Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.h" #include "Common/GlobalISel/MatchDataInfo.h" +#include "Common/GlobalISel/PatternParser.h" #include "Common/GlobalISel/Patterns.h" #include "Common/SubtargetFeatureInfo.h" #include "llvm/ADT/APInt.h" @@ -80,7 +81,6 @@ cl::opt<bool> DebugTypeInfer("gicombiner-debug-typeinfer", constexpr StringLiteral CXXApplyPrefix = "GICXXCustomAction_CombineApply"; constexpr StringLiteral CXXPredPrefix = "GICXXPred_MI_Predicate_"; -constexpr StringLiteral MIFlagsEnumClassName = "MIFlagEnum"; //===- CodeExpansions Helpers --------------------------------------------===// @@ -109,17 +109,6 @@ void declareTempRegExpansion(CodeExpansions &CE, unsigned TempRegID, //===- Misc. Helpers -----------------------------------------------------===// -/// Copies a StringRef into a static pool to preserve it. -/// Most Pattern classes use StringRef so we need this. -StringRef insertStrRef(StringRef S) { - if (S.empty()) - return {}; - - static StringSet<> Pool; - auto [It, Inserted] = Pool.insert(S); - return It->getKey(); -} - template <typename Container> auto keys(Container &&C) { return map_range(C, [](auto &Entry) -> auto & { return Entry.first; }); } @@ -639,8 +628,9 @@ public: SubtargetFeatureInfoMap &SubtargetFeatures, Record &RuleDef, unsigned ID, std::vector<RuleMatcher> &OutRMs) - : CGT(CGT), SubtargetFeatures(SubtargetFeatures), RuleDef(RuleDef), - RuleID(ID), OutRMs(OutRMs) {} + : Parser(CGT, RuleDef.getLoc()), CGT(CGT), + SubtargetFeatures(SubtargetFeatures), RuleDef(RuleDef), RuleID(ID), + OutRMs(OutRMs) {} /// Parses all fields in the RuleDef record. bool parseAll(); @@ -718,26 +708,6 @@ private: bool buildRuleOperandsTable(); bool parseDefs(const DagInit &Def); - bool - parsePatternList(const DagInit &List, - function_ref<bool(std::unique_ptr<Pattern>)> ParseAction, - StringRef Operator, ArrayRef<SMLoc> DiagLoc, - StringRef AnonPatNamePrefix) const; - - std::unique_ptr<Pattern> parseInstructionPattern(const Init &Arg, - StringRef PatName) const; - std::unique_ptr<Pattern> parseWipMatchOpcodeMatcher(const Init &Arg, - StringRef PatName) const; - bool parseInstructionPatternOperand(InstructionPattern &IP, - const Init *OpInit, - const StringInit *OpName) const; - bool parseInstructionPatternMIFlags(InstructionPattern &IP, - const DagInit *Op) const; - std::unique_ptr<PatFrag> parsePatFragImpl(const Record *Def) const; - bool parsePatFragParamList( - ArrayRef<SMLoc> DiagLoc, const DagInit &OpsList, - function_ref<bool(StringRef, PatFrag::ParamKind)> ParseAction) const; - const PatFrag *parsePatFrag(const Record *Def) const; bool emitMatchPattern(CodeExpansions &CE, const PatternAlternatives &Alts, const InstructionPattern &IP); @@ -781,6 +751,7 @@ private: DenseSet<const Pattern *> &SeenPats, OperandDefLookupFn LookupOperandDef, OperandMapperFnRef OperandMapper = [](const auto &O) { return O; }); + PatternParser Parser; const CodeGenTarget &CGT; SubtargetFeatureInfoMap &SubtargetFeatures; Record &RuleDef; @@ -808,9 +779,6 @@ private: SmallVector<MatchDataInfo, 2> MatchDatas; SmallVector<PatternAlternatives, 1> PermutationsToEmit; - - // print()/debug-only members. - mutable SmallPtrSet<const PatFrag *, 2> SeenPatFrags; }; bool CombineRuleBuilder::parseAll() { @@ -819,16 +787,16 @@ bool CombineRuleBuilder::parseAll() { if (!parseDefs(*RuleDef.getValueAsDag("Defs"))) return false; - if (!parsePatternList( + if (!Parser.parsePatternList( *RuleDef.getValueAsDag("Match"), [this](auto Pat) { return addMatchPattern(std::move(Pat)); }, "match", - RuleDef.getLoc(), (RuleDef.getName() + "_match").str())) + (RuleDef.getName() + "_match").str())) return false; - if (!parsePatternList( + if (!Parser.parsePatternList( *RuleDef.getValueAsDag("Apply"), [this](auto Pat) { return addApplyPattern(std::move(Pat)); }, "apply", - RuleDef.getLoc(), (RuleDef.getName() + "_apply").str())) + (RuleDef.getName() + "_apply").str())) return false; if (!buildRuleOperandsTable() || !typecheckPatterns() || !findRoots() || @@ -884,9 +852,10 @@ void CombineRuleBuilder::print(raw_ostream &OS) const { OS << " )\n"; } - if (!SeenPatFrags.empty()) { + const auto &SeenPFs = Parser.getSeenPatFrags(); + if (!SeenPFs.empty()) { OS << " (PatFrags\n"; - for (const auto *PF : SeenPatFrags) { + for (const auto *PF : Parser.getSeenPatFrags()) { PF->print(OS, /*Indent=*/" "); OS << '\n'; } @@ -1500,426 +1469,6 @@ bool CombineRuleBuilder::parseDefs(const DagInit &Def) { return true; } -bool CombineRuleBuilder::parsePatternList( - const DagInit &List, - function_ref<bool(std::unique_ptr<Pattern>)> ParseAction, - StringRef Operator, ArrayRef<SMLoc> DiagLoc, - StringRef AnonPatNamePrefix) const { - if (List.getOperatorAsDef(RuleDef.getLoc())->getName() != Operator) { - ::PrintError(DiagLoc, "Expected " + Operator + " operator"); - return false; - } - - if (List.getNumArgs() == 0) { - ::PrintError(DiagLoc, Operator + " pattern list is empty"); - return false; - } - - // The match section consists of a list of matchers and predicates. Parse each - // one and add the equivalent GIMatchDag nodes, predicates, and edges. - for (unsigned I = 0; I < List.getNumArgs(); ++I) { - Init *Arg = List.getArg(I); - std::string Name = List.getArgName(I) - ? List.getArgName(I)->getValue().str() - : ("__" + AnonPatNamePrefix + "_" + Twine(I)).str(); - - if (auto Pat = parseInstructionPattern(*Arg, Name)) { - if (!ParseAction(std::move(Pat))) - return false; - continue; - } - - if (auto Pat = parseWipMatchOpcodeMatcher(*Arg, Name)) { - if (!ParseAction(std::move(Pat))) - return false; - continue; - } - - // Parse arbitrary C++ code - if (const auto *StringI = dyn_cast<StringInit>(Arg)) { - auto CXXPat = std::make_unique<CXXPattern>(*StringI, insertStrRef(Name)); - if (!ParseAction(std::move(CXXPat))) - return false; - continue; - } - - ::PrintError(DiagLoc, - "Failed to parse pattern: '" + Arg->getAsString() + "'"); - return false; - } - - return true; -} - -static const CodeGenInstruction & -getInstrForIntrinsic(const CodeGenTarget &CGT, const CodeGenIntrinsic *I) { - StringRef Opc; - if (I->isConvergent) { - Opc = I->hasSideEffects ? "G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS" - : "G_INTRINSIC_CONVERGENT"; - } else { - Opc = I->hasSideEffects ? "G_INTRINSIC_W_SIDE_EFFECTS" : "G_INTRINSIC"; - } - - RecordKeeper &RK = I->TheDef->getRecords(); - return CGT.getInstruction(RK.getDef(Opc)); -} - -static const CodeGenIntrinsic *getCodeGenIntrinsic(Record *R) { - // Intrinsics need to have a static lifetime because the match table keeps - // references to CodeGenIntrinsic objects. - static DenseMap<const Record *, std::unique_ptr<CodeGenIntrinsic>> - AllIntrinsics; - - auto &Ptr = AllIntrinsics[R]; - if (!Ptr) - Ptr = std::make_unique<CodeGenIntrinsic>(R, std::vector<Record *>()); - return Ptr.get(); -} - -std::unique_ptr<Pattern> -CombineRuleBuilder::parseInstructionPattern(const Init &Arg, - StringRef Name) const { - const DagInit *DagPat = dyn_cast<DagInit>(&Arg); - if (!DagPat) - return nullptr; - - std::unique_ptr<InstructionPattern> Pat; - if (const DagInit *IP = getDagWithOperatorOfSubClass(Arg, "Instruction")) { - auto &Instr = CGT.getInstruction(IP->getOperatorAsDef(RuleDef.getLoc())); - Pat = - std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name)); - } else if (const DagInit *IP = - getDagWithOperatorOfSubClass(Arg, "Intrinsic")) { - Record *TheDef = IP->getOperatorAsDef(RuleDef.getLoc()); - const CodeGenIntrinsic *Intrin = getCodeGenIntrinsic(TheDef); - const CodeGenInstruction &Instr = getInstrForIntrinsic(CGT, Intrin); - Pat = - std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name)); - cast<CodeGenInstructionPattern>(*Pat).setIntrinsic(Intrin); - } else if (const DagInit *PFP = - getDagWithOperatorOfSubClass(Arg, PatFrag::ClassName)) { - const Record *Def = PFP->getOperatorAsDef(RuleDef.getLoc()); - const PatFrag *PF = parsePatFrag(Def); - if (!PF) - return nullptr; // Already diagnosed by parsePatFrag - Pat = std::make_unique<PatFragPattern>(*PF, insertStrRef(Name)); - } else if (const DagInit *BP = - getDagWithOperatorOfSubClass(Arg, BuiltinPattern::ClassName)) { - Pat = std::make_unique<BuiltinPattern>( - *BP->getOperatorAsDef(RuleDef.getLoc()), insertStrRef(Name)); - } else - return nullptr; - - for (unsigned K = 0; K < DagPat->getNumArgs(); ++K) { - Init *Arg = DagPat->getArg(K); - if (auto *DagArg = getDagWithSpecificOperator(*Arg, "MIFlags")) { - if (!parseInstructionPatternMIFlags(*Pat, DagArg)) - return nullptr; - continue; - } - - if (!parseInstructionPatternOperand(*Pat, Arg, DagPat->getArgName(K))) - return nullptr; - } - - if (!Pat->checkSemantics(RuleDef.getLoc())) - return nullptr; - - return std::move(Pat); -} - -std::unique_ptr<Pattern> -CombineRuleBuilder::parseWipMatchOpcodeMatcher(const Init &Arg, - StringRef Name) const { - const DagInit *Matcher = getDagWithSpecificOperator(Arg, "wip_match_opcode"); - if (!Matcher) - return nullptr; - - if (Matcher->getNumArgs() == 0) { - PrintError("Empty wip_match_opcode"); - return nullptr; - } - - // Each argument is an opcode that can match. - auto Result = std::make_unique<AnyOpcodePattern>(insertStrRef(Name)); - for (const auto &Arg : Matcher->getArgs()) { - Record *OpcodeDef = getDefOfSubClass(*Arg, "Instruction"); - if (OpcodeDef) { - Result->addOpcode(&CGT.getInstruction(OpcodeDef)); - continue; - } - - PrintError("Arguments to wip_match_opcode must be instructions"); - return nullptr; - } - - return std::move(Result); -} - -bool CombineRuleBuilder::parseInstructionPatternOperand( - InstructionPattern &IP, const Init *OpInit, - const StringInit *OpName) const { - const auto ParseErr = [&]() { - PrintError("cannot parse operand '" + OpInit->getAsUnquotedString() + "' "); - if (OpName) - PrintNote("operand name is '" + OpName->getAsUnquotedString() + "'"); - return false; - }; - - // untyped immediate, e.g. 0 - if (const auto *IntImm = dyn_cast<IntInit>(OpInit)) { - std::string Name = OpName ? OpName->getAsUnquotedString() : ""; - IP.addOperand(IntImm->getValue(), insertStrRef(Name), PatternType()); - return true; - } - - // typed immediate, e.g. (i32 0) - if (const auto *DagOp = dyn_cast<DagInit>(OpInit)) { - if (DagOp->getNumArgs() != 1) - return ParseErr(); - - const Record *TyDef = DagOp->getOperatorAsDef(RuleDef.getLoc()); - auto ImmTy = PatternType::get(RuleDef.getLoc(), TyDef, - "cannot parse immediate '" + - DagOp->getAsUnquotedString() + "'"); - if (!ImmTy) - return false; - - if (!IP.hasAllDefs()) { - PrintError("out operand of '" + IP.getInstName() + - "' cannot be an immediate"); - return false; - } - - const auto *Val = dyn_cast<IntInit>(DagOp->getArg(0)); - if (!Val) - return ParseErr(); - - std::string Name = OpName ? OpName->getAsUnquotedString() : ""; - IP.addOperand(Val->getValue(), insertStrRef(Name), *ImmTy); - return true; - } - - // Typed operand e.g. $x/$z in (G_FNEG $x, $z) - if (auto *DefI = dyn_cast<DefInit>(OpInit)) { - if (!OpName) { - PrintError("expected an operand name after '" + OpInit->getAsString() + - "'"); - return false; - } - const Record *Def = DefI->getDef(); - auto Ty = - PatternType::get(RuleDef.getLoc(), Def, "cannot parse operand type"); - if (!Ty) - return false; - IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), *Ty); - return true; - } - - // Untyped operand e.g. $x/$z in (G_FNEG $x, $z) - if (isa<UnsetInit>(OpInit)) { - assert(OpName && "Unset w/ no OpName?"); - IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), PatternType()); - return true; - } - - return ParseErr(); -} - -bool CombineRuleBuilder::parseInstructionPatternMIFlags( - InstructionPattern &IP, const DagInit *Op) const { - auto *CGIP = dyn_cast<CodeGenInstructionPattern>(&IP); - if (!CGIP) { - PrintError("matching/writing MIFlags is only allowed on CodeGenInstruction " - "patterns"); - return false; - } - - const auto CheckFlagEnum = [&](const Record *R) { - if (!R->isSubClassOf(MIFlagsEnumClassName)) { - PrintError("'" + R->getName() + "' is not a subclass of '" + - MIFlagsEnumClassName + "'"); - return false; - } - - return true; - }; - - if (CGIP->getMIFlagsInfo()) { - PrintError("MIFlags can only be present once on an instruction"); - return false; - } - - auto &FI = CGIP->getOrCreateMIFlagsInfo(); - for (unsigned K = 0; K < Op->getNumArgs(); ++K) { - const Init *Arg = Op->getArg(K); - - // Match/set a flag: (MIFlags FmNoNans) - if (const auto *Def = dyn_cast<DefInit>(Arg)) { - const Record *R = Def->getDef(); - if (!CheckFlagEnum(R)) - return false; - - FI.addSetFlag(R); - continue; - } - - // Do not match a flag/unset a flag: (MIFlags (not FmNoNans)) - if (const DagInit *NotDag = getDagWithSpecificOperator(*Arg, "not")) { - for (const Init *NotArg : NotDag->getArgs()) { - const DefInit *DefArg = dyn_cast<DefInit>(NotArg); - if (!DefArg) { - PrintError("cannot parse '" + NotArg->getAsUnquotedString() + - "': expected a '" + MIFlagsEnumClassName + "'"); - return false; - } - - const Record *R = DefArg->getDef(); - if (!CheckFlagEnum(R)) - return false; - - FI.addUnsetFlag(R); - continue; - } - - continue; - } - - // Copy flags from a matched instruction: (MIFlags $mi) - if (isa<UnsetInit>(Arg)) { - FI.addCopyFlag(insertStrRef(Op->getArgName(K)->getAsUnquotedString())); - continue; - } - } - - return true; -} - -std::unique_ptr<PatFrag> -CombineRuleBuilder::parsePatFragImpl(const Record *Def) const { - auto StackTrace = PrettyStackTraceParse(*Def); - if (!Def->isSubClassOf(PatFrag::ClassName)) - return nullptr; - - const DagInit *Ins = Def->getValueAsDag("InOperands"); - if (Ins->getOperatorAsDef(Def->getLoc())->getName() != "ins") { - ::PrintError(Def, "expected 'ins' operator for " + PatFrag::ClassName + - " in operands list"); - return nullptr; - } - - const DagInit *Outs = Def->getValueAsDag("OutOperands"); - if (Outs->getOperatorAsDef(Def->getLoc())->getName() != "outs") { - ::PrintError(Def, "expected 'outs' operator for " + PatFrag::ClassName + - " out operands list"); - return nullptr; - } - - auto Result = std::make_unique<PatFrag>(*Def); - if (!parsePatFragParamList(Def->getLoc(), *Outs, - [&](StringRef Name, PatFrag::ParamKind Kind) { - Result->addOutParam(insertStrRef(Name), Kind); - return true; - })) - return nullptr; - - if (!parsePatFragParamList(Def->getLoc(), *Ins, - [&](StringRef Name, PatFrag::ParamKind Kind) { - Result->addInParam(insertStrRef(Name), Kind); - return true; - })) - return nullptr; - - const ListInit *Alts = Def->getValueAsListInit("Alternatives"); - unsigned AltIdx = 0; - for (const Init *Alt : *Alts) { - const auto *PatDag = dyn_cast<DagInit>(Alt); - if (!PatDag) { - ::PrintError(Def, "expected dag init for PatFrag pattern alternative"); - return nullptr; - } - - PatFrag::Alternative &A = Result->addAlternative(); - const auto AddPat = [&](std::unique_ptr<Pattern> Pat) { - A.Pats.push_back(std::move(Pat)); - return true; - }; - - if (!parsePatternList( - *PatDag, AddPat, "pattern", Def->getLoc(), - /*AnonPatPrefix*/ - (Def->getName() + "_alt" + Twine(AltIdx++) + "_pattern").str())) - return nullptr; - } - - if (!Result->buildOperandsTables() || !Result->checkSemantics()) - return nullptr; - - return Result; -} - -bool CombineRuleBuilder::parsePatFragParamList( - ArrayRef<SMLoc> DiagLoc, const DagInit &OpsList, - function_ref<bool(StringRef, PatFrag::ParamKind)> ParseAction) const { - for (unsigned K = 0; K < OpsList.getNumArgs(); ++K) { - const StringInit *Name = OpsList.getArgName(K); - const Init *Ty = OpsList.getArg(K); - - if (!Name) { - ::PrintError(DiagLoc, "all operands must be named'"); - return false; - } - const std::string NameStr = Name->getAsUnquotedString(); - - PatFrag::ParamKind OpKind; - if (isSpecificDef(*Ty, "gi_imm")) - OpKind = PatFrag::PK_Imm; - else if (isSpecificDef(*Ty, "root")) - OpKind = PatFrag::PK_Root; - else if (isa<UnsetInit>(Ty) || - isSpecificDef(*Ty, "gi_mo")) // no type = gi_mo. - OpKind = PatFrag::PK_MachineOperand; - else { - ::PrintError( - DiagLoc, - "'" + NameStr + - "' operand type was expected to be 'root', 'gi_imm' or 'gi_mo'"); - return false; - } - - if (!ParseAction(NameStr, OpKind)) - return false; - } - - return true; -} - -const PatFrag *CombineRuleBuilder::parsePatFrag(const Record *Def) const { - // Cache already parsed PatFrags to avoid doing extra work. - static DenseMap<const Record *, std::unique_ptr<PatFrag>> ParsedPatFrags; - - auto It = ParsedPatFrags.find(Def); - if (It != ParsedPatFrags.end()) { - SeenPatFrags.insert(It->second.get()); - return It->second.get(); - } - - std::unique_ptr<PatFrag> NewPatFrag = parsePatFragImpl(Def); - if (!NewPatFrag) { - ::PrintError(Def, "Could not parse " + PatFrag::ClassName + " '" + - Def->getName() + "'"); - // Put a nullptr in the map so we don't attempt parsing this again. - ParsedPatFrags[Def] = nullptr; - return nullptr; - } - - const auto *Res = NewPatFrag.get(); - ParsedPatFrags[Def] = std::move(NewPatFrag); - SeenPatFrags.insert(Res); - return Res; -} - bool CombineRuleBuilder::emitMatchPattern(CodeExpansions &CE, const PatternAlternatives &Alts, const InstructionPattern &IP) { @@ -2956,8 +2505,8 @@ GICombinerEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules) { const Matcher *B) { auto *L = static_cast<const RuleMatcher *>(A); auto *R = static_cast<const RuleMatcher *>(B); - return std::tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) < - std::tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands()); + return std::make_tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) < + std::make_tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands()); }); for (Matcher *Rule : InputRules) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn index 12d875c..5ba91fc 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn @@ -104,6 +104,7 @@ static_library("MCTargetDesc") { "AMDGPUMCAsmInfo.cpp", "AMDGPUMCCodeEmitter.cpp", "AMDGPUMCExpr.cpp", + "AMDGPUMCKernelDescriptor.cpp", "AMDGPUMCTargetDesc.cpp", "AMDGPUTargetStreamer.cpp", "R600InstPrinter.cpp", diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/Common/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/Common/BUILD.gn index c0ea627..daa3278 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/Common/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/Common/BUILD.gn @@ -18,9 +18,11 @@ static_library("Common") { "DAGISelMatcher.cpp", "GlobalISel/CXXPredicates.cpp", "GlobalISel/CodeExpander.cpp", + "GlobalISel/CombinerUtils.cpp", "GlobalISel/GlobalISelMatchTable.cpp", "GlobalISel/GlobalISelMatchTableExecutorEmitter.cpp", "GlobalISel/MatchDataInfo.cpp", + "GlobalISel/PatternParser.cpp", "GlobalISel/Patterns.cpp", "InfoByHwMode.cpp", "OptEmitter.cpp", diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index f4bac93..28526f1 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -611,19 +611,19 @@ def LLVM_DbgLabelOp : LLVM_IntrOp<"dbg.label", [], [], [], 0> { // Variadic function intrinsics. // -def LLVM_VaStartOp : LLVM_ZeroResultIntrOp<"vastart">, +def LLVM_VaStartOp : LLVM_ZeroResultIntrOp<"vastart", [0]>, Arguments<(ins LLVM_AnyPointer:$arg_list)> { let assemblyFormat = "$arg_list attr-dict `:` qualified(type($arg_list))"; let summary = "Initializes `arg_list` for subsequent variadic argument extractions."; } -def LLVM_VaCopyOp : LLVM_ZeroResultIntrOp<"vacopy">, +def LLVM_VaCopyOp : LLVM_ZeroResultIntrOp<"vacopy", [0]>, Arguments<(ins LLVM_AnyPointer:$dest_list, LLVM_AnyPointer:$src_list)> { let assemblyFormat = "$src_list `to` $dest_list attr-dict `:` type(operands)"; let summary = "Copies the current argument position from `src_list` to `dest_list`."; } -def LLVM_VaEndOp : LLVM_ZeroResultIntrOp<"vaend">, +def LLVM_VaEndOp : LLVM_ZeroResultIntrOp<"vaend", [0]>, Arguments<(ins LLVM_AnyPointer:$arg_list)> { let assemblyFormat = "$arg_list attr-dict `:` qualified(type($arg_list))"; let summary = "Destroys `arg_list`, which has been initialized by `intr.vastart` or `intr.vacopy`."; diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h index 6c8a170..f7c1f4d 100644 --- a/mlir/include/mlir/IR/Dialect.h +++ b/mlir/include/mlir/IR/Dialect.h @@ -210,7 +210,7 @@ public: /// registration. The promised interface type can be an interface of any type /// not just a dialect interface, i.e. it may also be an /// AttributeInterface/OpInterface/TypeInterface/etc. - template <typename ConcreteT, typename InterfaceT> + template <typename InterfaceT, typename ConcreteT> void declarePromisedInterface() { unresolvedPromisedInterfaces.insert( {TypeID::get<ConcreteT>(), InterfaceT::getInterfaceID()}); @@ -221,7 +221,7 @@ public: // declarePromisedInterfaces<FunctionOpInterface, MyFuncType1, MyFuncType2>() template <typename InterfaceT, typename... ConcreteT> void declarePromisedInterfaces() { - (declarePromisedInterface<ConcreteT, InterfaceT>(), ...); + (declarePromisedInterface<InterfaceT, ConcreteT>(), ...); } /// Checks if the given interface, which is attempting to be used, is a diff --git a/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp b/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp index 6a59318..042acf6 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp @@ -48,9 +48,9 @@ void arith::ArithDialect::initialize() { #include "mlir/Dialect/Arith/IR/ArithOpsAttributes.cpp.inc" >(); addInterfaces<ArithInlinerInterface>(); - declarePromisedInterface<ArithDialect, ConvertToLLVMPatternInterface>(); - declarePromisedInterface<SelectOp, - bufferization::BufferDeallocationOpInterface>(); + declarePromisedInterface<ConvertToLLVMPatternInterface, ArithDialect>(); + declarePromisedInterface<bufferization::BufferDeallocationOpInterface, + SelectOp>(); declarePromisedInterfaces<bufferization::BufferizableOpInterface, ConstantOp, IndexCastOp, SelectOp>(); declarePromisedInterfaces<ValueBoundsOpInterface, AddIOp, ConstantOp, SubIOp, diff --git a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp index ca57171..0bdcf43 100644 --- a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp +++ b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp @@ -40,7 +40,7 @@ void complex::ComplexDialect::initialize() { #define GET_ATTRDEF_LIST #include "mlir/Dialect/Complex/IR/ComplexAttributes.cpp.inc" >(); - declarePromisedInterface<ComplexDialect, ConvertToLLVMPatternInterface>(); + declarePromisedInterface<ConvertToLLVMPatternInterface, ComplexDialect>(); addInterfaces<ComplexInlinerInterface>(); } diff --git a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp index c6b02b9..5d11f8f6 100644 --- a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp +++ b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp @@ -70,11 +70,11 @@ void ControlFlowDialect::initialize() { #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.cpp.inc" >(); addInterfaces<ControlFlowInlinerInterface>(); - declarePromisedInterface<ControlFlowDialect, ConvertToLLVMPatternInterface>(); + declarePromisedInterface<ConvertToLLVMPatternInterface, ControlFlowDialect>(); declarePromisedInterfaces<bufferization::BufferizableOpInterface, BranchOp, CondBranchOp>(); - declarePromisedInterface<CondBranchOp, - bufferization::BufferDeallocationOpInterface>(); + declarePromisedInterface<bufferization::BufferDeallocationOpInterface, + CondBranchOp>(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Func/IR/FuncOps.cpp b/mlir/lib/Dialect/Func/IR/FuncOps.cpp index ed2ecfe9..95589e8 100644 --- a/mlir/lib/Dialect/Func/IR/FuncOps.cpp +++ b/mlir/lib/Dialect/Func/IR/FuncOps.cpp @@ -42,8 +42,8 @@ void FuncDialect::initialize() { #define GET_OP_LIST #include "mlir/Dialect/Func/IR/FuncOps.cpp.inc" >(); - declarePromisedInterface<FuncDialect, DialectInlinerInterface>(); - declarePromisedInterface<FuncDialect, ConvertToLLVMPatternInterface>(); + declarePromisedInterface<DialectInlinerInterface, FuncDialect>(); + declarePromisedInterface<ConvertToLLVMPatternInterface, FuncDialect>(); declarePromisedInterfaces<bufferization::BufferizableOpInterface, CallOp, FuncOp, ReturnOp>(); } diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index a02eca8..f1b9ca5 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -216,8 +216,8 @@ void GPUDialect::initialize() { #include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc" >(); addInterfaces<GPUInlinerInterface>(); - declarePromisedInterface<TerminatorOp, - bufferization::BufferDeallocationOpInterface>(); + declarePromisedInterface<bufferization::BufferDeallocationOpInterface, + TerminatorOp>(); } static std::string getSparseHandleKeyword(SparseHandleKind kind) { diff --git a/mlir/lib/Dialect/Index/IR/IndexDialect.cpp b/mlir/lib/Dialect/Index/IR/IndexDialect.cpp index d631afa..183d0e3 100644 --- a/mlir/lib/Dialect/Index/IR/IndexDialect.cpp +++ b/mlir/lib/Dialect/Index/IR/IndexDialect.cpp @@ -19,7 +19,7 @@ using namespace mlir::index; void IndexDialect::initialize() { registerAttributes(); registerOperations(); - declarePromisedInterface<IndexDialect, ConvertToLLVMPatternInterface>(); + declarePromisedInterface<ConvertToLLVMPatternInterface, IndexDialect>(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 9e84074..94197e4 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -1044,8 +1044,8 @@ void NVVMDialect::initialize() { // Support unknown operations because not all NVVM operations are // registered. allowUnknownOperations(); - declarePromisedInterface<NVVMDialect, ConvertToLLVMPatternInterface>(); - declarePromisedInterface<NVVMTargetAttr, gpu::TargetAttrInterface>(); + declarePromisedInterface<ConvertToLLVMPatternInterface, NVVMDialect>(); + declarePromisedInterface<gpu::TargetAttrInterface, NVVMTargetAttr>(); } LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op, diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp index 0f2e75c..65b770a 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp @@ -247,7 +247,7 @@ void ROCDLDialect::initialize() { // Support unknown operations because not all ROCDL operations are registered. allowUnknownOperations(); - declarePromisedInterface<ROCDLTargetAttr, gpu::TargetAttrInterface>(); + declarePromisedInterface<gpu::TargetAttrInterface, ROCDLTargetAttr>(); } LogicalResult ROCDLDialect::verifyOperationAttribute(Operation *op, diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp index a6936fd..9e50c35 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp @@ -123,16 +123,16 @@ void mlir::linalg::LinalgDialect::initialize() { addInterfaces<LinalgInlinerInterface>(); - declarePromisedInterface<GenericOp, mesh::ShardingInterface>(); + declarePromisedInterface<mesh::ShardingInterface, GenericOp>(); declarePromisedInterfaces<mesh::ShardingInterface, #define GET_OP_LIST #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" >(); - declarePromisedInterface<CopyOp, SubsetOpInterface>(); - declarePromisedInterface<CopyOp, SubsetInsertionOpInterface>(); - declarePromisedInterface<IndexOp, ValueBoundsOpInterface>(); - declarePromisedInterface<linalg::GenericOp, TilingInterface>(); - declarePromisedInterface<linalg::GenericOp, PartialReductionOpInterface>(); + declarePromisedInterface<SubsetOpInterface, CopyOp>(); + declarePromisedInterface<SubsetInsertionOpInterface, CopyOp>(); + declarePromisedInterface<ValueBoundsOpInterface, IndexOp>(); + declarePromisedInterface<TilingInterface, linalg::GenericOp>(); + declarePromisedInterface<PartialReductionOpInterface, linalg::GenericOp>(); declarePromisedInterfaces<TilingInterface, #define GET_OP_LIST #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" diff --git a/mlir/lib/Dialect/Math/IR/MathDialect.cpp b/mlir/lib/Dialect/Math/IR/MathDialect.cpp index a71b24c..285b5ca 100644 --- a/mlir/lib/Dialect/Math/IR/MathDialect.cpp +++ b/mlir/lib/Dialect/Math/IR/MathDialect.cpp @@ -35,5 +35,5 @@ void mlir::math::MathDialect::initialize() { #include "mlir/Dialect/Math/IR/MathOps.cpp.inc" >(); addInterfaces<MathInlinerInterface>(); - declarePromisedInterface<MathDialect, ConvertToLLVMPatternInterface>(); + declarePromisedInterface<ConvertToLLVMPatternInterface, MathDialect>(); } diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp index 41082a8..3a8bd12 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp @@ -47,14 +47,14 @@ void mlir::memref::MemRefDialect::initialize() { #include "mlir/Dialect/MemRef/IR/MemRefOps.cpp.inc" >(); addInterfaces<MemRefInlinerInterface>(); - declarePromisedInterface<MemRefDialect, ConvertToLLVMPatternInterface>(); + declarePromisedInterface<ConvertToLLVMPatternInterface, MemRefDialect>(); declarePromisedInterfaces<bufferization::AllocationOpInterface, AllocOp, AllocaOp, ReallocOp>(); declarePromisedInterfaces<RuntimeVerifiableOpInterface, CastOp, ExpandShapeOp, LoadOp, ReinterpretCastOp, StoreOp, SubViewOp>(); declarePromisedInterfaces<ValueBoundsOpInterface, AllocOp, AllocaOp, CastOp, DimOp, GetGlobalOp, RankOp, SubViewOp>(); - declarePromisedInterface<MemRefType, DestructurableTypeInterface>(); + declarePromisedInterface<DestructurableTypeInterface, MemRefType>(); } /// Finds the unique dealloc operation (if one exists) for `allocValue`. diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index ddb9676..5bca8e8 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -79,7 +79,7 @@ void SCFDialect::initialize() { declarePromisedInterfaces<bufferization::BufferizableOpInterface, ConditionOp, ExecuteRegionOp, ForOp, IfOp, IndexSwitchOp, ForallOp, InParallelOp, WhileOp, YieldOp>(); - declarePromisedInterface<ForOp, ValueBoundsOpInterface>(); + declarePromisedInterface<ValueBoundsOpInterface, ForOp>(); } /// Default callback for IfOp builders. Inserts a yield without arguments. diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp index e914f46..72488d6 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp @@ -135,7 +135,7 @@ void SPIRVDialect::initialize() { // Allow unknown operations because SPIR-V is extensible. allowUnknownOperations(); - declarePromisedInterface<TargetEnvAttr, gpu::TargetAttrInterface>(); + declarePromisedInterface<gpu::TargetAttrInterface, TargetEnvAttr>(); } std::string SPIRVDialect::getAttributeName(Decoration decoration) { diff --git a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp index 4b31567..0020777 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp @@ -62,7 +62,7 @@ void TensorDialect::initialize() { ParallelInsertSliceOp>(); declarePromisedInterfaces<SubsetInsertionOpInterface, InsertSliceOp, ParallelInsertSliceOp>(); - declarePromisedInterface<ExtractSliceOp, SubsetExtractionOpInterface>(); + declarePromisedInterface<SubsetExtractionOpInterface, ExtractSliceOp>(); declarePromisedInterfaces<TilingInterface, PadOp, PackOp, UnPackOp>(); declarePromisedInterfaces<ValueBoundsOpInterface, CastOp, DimOp, EmptyOp, ExtractSliceOp, PadOp, RankOp>(); diff --git a/mlir/lib/Dialect/UB/IR/UBOps.cpp b/mlir/lib/Dialect/UB/IR/UBOps.cpp index 3a2010c..5b2cfe7 100644 --- a/mlir/lib/Dialect/UB/IR/UBOps.cpp +++ b/mlir/lib/Dialect/UB/IR/UBOps.cpp @@ -46,7 +46,7 @@ void UBDialect::initialize() { #include "mlir/Dialect/UB/IR/UBOpsAttributes.cpp.inc" >(); addInterfaces<UBInlinerInterface>(); - declarePromisedInterface<UBDialect, ConvertToLLVMPatternInterface>(); + declarePromisedInterface<ConvertToLLVMPatternInterface, UBDialect>(); } Operation *UBDialect::materializeConstant(OpBuilder &builder, Attribute value, diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 3529682..e566bfa 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -382,8 +382,8 @@ void VectorDialect::initialize() { YieldOp>(); declarePromisedInterfaces<SubsetOpInterface, TransferReadOp, TransferWriteOp>(); - declarePromisedInterface<TransferReadOp, SubsetExtractionOpInterface>(); - declarePromisedInterface<TransferWriteOp, SubsetInsertionOpInterface>(); + declarePromisedInterface<SubsetExtractionOpInterface, TransferReadOp>(); + declarePromisedInterface<SubsetInsertionOpInterface, TransferWriteOp>(); } /// Materialize a single constant operation from a given attribute value with diff --git a/mlir/test/Target/LLVMIR/Import/basic.ll b/mlir/test/Target/LLVMIR/Import/basic.ll index a059425..448b0eb 100644 --- a/mlir/test/Target/LLVMIR/Import/basic.ll +++ b/mlir/test/Target/LLVMIR/Import/basic.ll @@ -72,26 +72,26 @@ define i32 @useFreezeOp(i32 %x) { ; Varadic function definition %struct.va_list = type { ptr } -declare void @llvm.va_start(ptr) -declare void @llvm.va_copy(ptr, ptr) -declare void @llvm.va_end(ptr) +declare void @llvm.va_start.p0(ptr) +declare void @llvm.va_copy.p0(ptr, ptr) +declare void @llvm.va_end.p0(ptr) ; CHECK-LABEL: llvm.func @variadic_function define void @variadic_function(i32 %X, ...) { ; CHECK: %[[ALLOCA0:.+]] = llvm.alloca %{{.*}} x !llvm.struct<"struct.va_list", (ptr)> {{.*}} : (i32) -> !llvm.ptr %ap = alloca %struct.va_list ; CHECK: llvm.intr.vastart %[[ALLOCA0]] - call void @llvm.va_start(ptr %ap) + call void @llvm.va_start.p0(ptr %ap) ; CHECK: %[[ALLOCA1:.+]] = llvm.alloca %{{.*}} x !llvm.ptr {{.*}} : (i32) -> !llvm.ptr %aq = alloca ptr ; CHECK: llvm.intr.vacopy %[[ALLOCA0]] to %[[ALLOCA1]] - call void @llvm.va_copy(ptr %aq, ptr %ap) + call void @llvm.va_copy.p0(ptr %aq, ptr %ap) ; CHECK: llvm.intr.vaend %[[ALLOCA1]] - call void @llvm.va_end(ptr %aq) + call void @llvm.va_end.p0(ptr %aq) ; CHECK: llvm.intr.vaend %[[ALLOCA0]] - call void @llvm.va_end(ptr %ap) + call void @llvm.va_end.p0(ptr %ap) ; CHECK: llvm.return ret void } diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index 8556183..0cefb4f 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -599,11 +599,11 @@ define void @ushl_sat_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { ; CHECK-LABEL: llvm.func @va_intrinsics_test define void @va_intrinsics_test(ptr %0, ptr %1) { ; CHECK: llvm.intr.vastart %{{.*}} - call void @llvm.va_start(ptr %0) + call void @llvm.va_start.p0(ptr %0) ; CHECK: llvm.intr.vacopy %{{.*}} to %{{.*}} - call void @llvm.va_copy(ptr %1, ptr %0) + call void @llvm.va_copy.p0(ptr %1, ptr %0) ; CHECK: llvm.intr.vaend %{{.*}} - call void @llvm.va_end(ptr %0) + call void @llvm.va_end.p0(ptr %0) ret void } @@ -1076,9 +1076,9 @@ declare ptr @llvm.stacksave.p0() declare ptr addrspace(1) @llvm.stacksave.p1() declare void @llvm.stackrestore.p0(ptr) declare void @llvm.stackrestore.p1(ptr addrspace(1)) -declare void @llvm.va_start(ptr) -declare void @llvm.va_copy(ptr, ptr) -declare void @llvm.va_end(ptr) +declare void @llvm.va_start.p0(ptr) +declare void @llvm.va_copy.p0(ptr, ptr) +declare void @llvm.va_end.p0(ptr) declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index c38c7ea..97f3793 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -2251,14 +2251,14 @@ llvm.func @vararg_function(%arg0: i32, ...) { %1 = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[ALLOCA0:.+]] = alloca %struct.va_list, align 8 %2 = llvm.alloca %1 x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr - // CHECK: call void @llvm.va_start(ptr %[[ALLOCA0]]) + // CHECK: call void @llvm.va_start.p0(ptr %[[ALLOCA0]]) llvm.intr.vastart %2 : !llvm.ptr // CHECK: %[[ALLOCA1:.+]] = alloca ptr, align 8 %4 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr - // CHECK: call void @llvm.va_copy(ptr %[[ALLOCA1]], ptr %[[ALLOCA0]]) + // CHECK: call void @llvm.va_copy.p0(ptr %[[ALLOCA1]], ptr %[[ALLOCA0]]) llvm.intr.vacopy %2 to %4 : !llvm.ptr, !llvm.ptr - // CHECK: call void @llvm.va_end(ptr %[[ALLOCA1]]) - // CHECK: call void @llvm.va_end(ptr %[[ALLOCA0]]) + // CHECK: call void @llvm.va_end.p0(ptr %[[ALLOCA1]]) + // CHECK: call void @llvm.va_end.p0(ptr %[[ALLOCA0]]) llvm.intr.vaend %4 : !llvm.ptr llvm.intr.vaend %2 : !llvm.ptr // CHECK: ret void diff --git a/mlir/unittests/IR/InterfaceAttachmentTest.cpp b/mlir/unittests/IR/InterfaceAttachmentTest.cpp index 16de34c..58049a9 100644 --- a/mlir/unittests/IR/InterfaceAttachmentTest.cpp +++ b/mlir/unittests/IR/InterfaceAttachmentTest.cpp @@ -431,8 +431,8 @@ TEST(InterfaceAttachmentTest, PromisedInterfaces) { attr.hasPromiseOrImplementsInterface<TestExternalAttrInterface>()); // Add a promise `TestExternalAttrInterface`. - testDialect->declarePromisedInterface<test::SimpleAAttr, - TestExternalAttrInterface>(); + testDialect->declarePromisedInterface<TestExternalAttrInterface, + test::SimpleAAttr>(); EXPECT_TRUE( attr.hasPromiseOrImplementsInterface<TestExternalAttrInterface>()); diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index a60bdb9..a242049 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -6752,11 +6752,11 @@ void __kmp_register_library_startup(void) { int fd1 = -1; shm_name = __kmp_str_format("/%s", name); int shm_preexist = 0; - fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666); + fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600); if ((fd1 == -1) && (errno == EEXIST)) { // file didn't open because it already exists. // try opening existing file - fd1 = shm_open(shm_name, O_RDWR, 0666); + fd1 = shm_open(shm_name, O_RDWR, 0600); if (fd1 == -1) { // file didn't open KMP_WARNING(FunctionError, "Can't open SHM"); __kmp_shm_available = false; @@ -6800,11 +6800,11 @@ void __kmp_register_library_startup(void) { int fd1 = -1; temp_reg_status_file_name = __kmp_str_format("/tmp/%s", name); int tmp_preexist = 0; - fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0666); + fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600); if ((fd1 == -1) && (errno == EEXIST)) { // file didn't open because it already exists. // try opening existing file - fd1 = open(temp_reg_status_file_name, O_RDWR, 0666); + fd1 = open(temp_reg_status_file_name, O_RDWR, 0600); if (fd1 == -1) { // file didn't open if (fd1 == -1) { KMP_WARNING(FunctionError, "Can't open TEMP"); __kmp_tmp_available = false; @@ -6944,7 +6944,7 @@ void __kmp_unregister_library(void) { int fd1; if (__kmp_shm_available) { shm_name = __kmp_str_format("/%s", name); - fd1 = shm_open(shm_name, O_RDONLY, 0666); + fd1 = shm_open(shm_name, O_RDONLY, 0600); if (fd1 != -1) { // File opened successfully char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0); if (data1 != MAP_FAILED) { diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index e0790e0..eb0afbb 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -216,7 +216,6 @@ libc_support_library( ":__support_cpp_limits", ":__support_cpp_type_traits", ":__support_macros_attributes", - ":__support_macros_config", ":__support_macros_sanitizer", ], ) @@ -383,7 +382,6 @@ libc_support_library( ], deps = [ ":__support_macros_attributes", - ":__support_macros_config", ":__support_macros_properties_types", ":llvm_libc_macros_stdfix_macros", ], @@ -663,7 +661,6 @@ libc_support_library( ":__support_cpp_limits", ":__support_cpp_type_traits", ":__support_macros_attributes", - ":__support_macros_config", ], ) @@ -2318,7 +2315,6 @@ libc_support_library( ":__support_cpp_cstddef", ":__support_cpp_type_traits", ":__support_macros_attributes", - ":__support_macros_config", ":__support_macros_optimization", ":__support_macros_properties_architectures", ":__support_macros_properties_cpu_features", diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 0e658353..3c3e17bf 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -292,6 +292,10 @@ cc_library( "-ldl", "-lm", ], + "@platforms//os:macos": [ + "-pthread", + "-ldl", + ], "//conditions:default": [ "-pthread", "-ldl", |