diff options
101 files changed, 3262 insertions, 1198 deletions
diff --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp index 1041355..742d85b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp @@ -21,12 +21,12 @@ static constexpr int UnsignedASCIIUpperBound = 127; SignedCharMisuseCheck::SignedCharMisuseCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), - CharTypdefsToIgnoreList(Options.get("CharTypdefsToIgnore", "")), + CharTypedefsToIgnoreList(Options.get("CharTypedefsToIgnore", "")), DiagnoseSignedUnsignedCharComparisons( Options.get("DiagnoseSignedUnsignedCharComparisons", true)) {} void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList); + Options.store(Opts, "CharTypedefsToIgnore", CharTypedefsToIgnoreList); Options.store(Opts, "DiagnoseSignedUnsignedCharComparisons", DiagnoseSignedUnsignedCharComparisons); } @@ -39,7 +39,7 @@ BindableMatcher<clang::Stmt> SignedCharMisuseCheck::charCastExpression( // (e.g. typedef char sal_Int8). In this case, we don't need to // worry about the misinterpretation of char values. const auto IntTypedef = qualType(hasDeclaration(typedefDecl( - hasAnyName(utils::options::parseStringList(CharTypdefsToIgnoreList))))); + hasAnyName(utils::options::parseStringList(CharTypedefsToIgnoreList))))); auto CharTypeExpr = expr(); if (IsSigned) { diff --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h index 56504e5..c6d9f29 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h @@ -35,7 +35,7 @@ private: const ast_matchers::internal::Matcher<clang::QualType> &IntegerType, const std::string &CastBindName) const; - const StringRef CharTypdefsToIgnoreList; + const StringRef CharTypedefsToIgnoreList; const bool DiagnoseSignedUnsignedCharComparisons; }; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 62e1987..7e836a7 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -54,12 +54,17 @@ Potentially Breaking Changes :program:`clang-tidy-20`. Users should use the check-specific options of the same name instead. -- Renamed :program:`clang-tidy`'s option name of check - :doc:`bugprone-easily-swappable-parameters - <clang-tidy/checks/bugprone/easily-swappable-parameters>` from - ``NamePrefixSuffixSilenceDissimilarityTreshold`` to - ``NamePrefixSuffixSilenceDissimilarityThreshold``, - correcting a spelling mistake. +- Renamed a few :program:`clang-tidy` check options, as they + were misspelled: + + - `NamePrefixSuffixSilenceDissimilarityTreshold` to + `NamePrefixSuffixSilenceDissimilarityThreshold` in + :doc:`bugprone-easily-swappable-parameters + <clang-tidy/checks/bugprone/easily-swappable-parameters>` + + - `CharTypdefsToIgnore` to `CharTypedefsToIgnore` in + :doc:`bugprone-signed-char-misuse + <clang-tidy/checks/bugprone/signed-char-misuse>` Improvements to clangd ---------------------- diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst index 4edbad5..3e06e11 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst @@ -107,7 +107,7 @@ so both arguments will have the same type. Options ------- -.. option:: CharTypdefsToIgnore +.. option:: CharTypedefsToIgnore A semicolon-separated list of typedef names. In this list, we can list typedefs for ``char`` or ``signed char``, which will be ignored by the diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/signed-char-misuse-with-option.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/signed-char-misuse-with-option.cpp index 9f9d61a..c11be94 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/signed-char-misuse-with-option.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/signed-char-misuse-with-option.cpp @@ -1,6 +1,6 @@ // RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t \ // RUN: -config='{CheckOptions: \ -// RUN: {bugprone-signed-char-misuse.CharTypdefsToIgnore: "sal_Int8;int8_t"}}' \ +// RUN: {bugprone-signed-char-misuse.CharTypedefsToIgnore: "sal_Int8;int8_t"}}' \ // RUN: -- /////////////////////////////////////////////////////////////////// diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 8a5bf03..93d81e3 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_CIR_DIALECT_BUILDER_CIRBASEBUILDER_H #include "clang/AST/CharUnits.h" +#include "clang/Basic/AddressSpaces.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" @@ -129,8 +130,30 @@ public: return cir::PointerType::get(ty); } - cir::PointerType getVoidPtrTy() { - return getPointerTo(cir::VoidType::get(getContext())); + cir::PointerType getPointerTo(mlir::Type ty, cir::TargetAddressSpaceAttr as) { + return cir::PointerType::get(ty, as); + } + + cir::PointerType getPointerTo(mlir::Type ty, clang::LangAS langAS) { + if (langAS == clang::LangAS::Default) // Default address space. + return getPointerTo(ty); + + if (clang::isTargetAddressSpace(langAS)) { + unsigned addrSpace = clang::toTargetAddressSpace(langAS); + auto asAttr = cir::TargetAddressSpaceAttr::get( + getContext(), getUI32IntegerAttr(addrSpace)); + return getPointerTo(ty, asAttr); + } + + llvm_unreachable("language-specific address spaces NYI"); + } + + cir::PointerType getVoidPtrTy(clang::LangAS langAS = clang::LangAS::Default) { + return getPointerTo(cir::VoidType::get(getContext()), langAS); + } + + cir::PointerType getVoidPtrTy(cir::TargetAddressSpaceAttr as) { + return getPointerTo(cir::VoidType::get(getContext()), as); } cir::BoolAttr getCIRBoolAttr(bool state) { diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.h b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.h index 925a9a8..03a6a97 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.h +++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.h @@ -15,6 +15,7 @@ #include "mlir/IR/Attributes.h" #include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "clang/Basic/AddressSpaces.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td index f8358de..7714750 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td @@ -602,6 +602,33 @@ def CIR_VTableAttr : CIR_Attr<"VTable", "vtable", [TypedAttrInterface]> { } //===----------------------------------------------------------------------===// +// TargetAddressSpaceAttr +//===----------------------------------------------------------------------===// + +def CIR_TargetAddressSpaceAttr : CIR_Attr< "TargetAddressSpace", + "target_address_space"> { + let summary = "Represents a target-specific numeric address space"; + let description = [{ + The TargetAddressSpaceAttr represents a target-specific numeric address space, + corresponding to the LLVM IR `addressspace` qualifier and the clang + `address_space` attribute. + + A value of zero represents the default address space. The semantics of non-zero + address spaces are target-specific. + + Example: + ```mlir + // Target-specific numeric address spaces + !cir.ptr<!s32i, addrspace(target<1>)> + !cir.ptr<!s32i, addrspace(target<10>)> + ``` + }]; + + let parameters = (ins "mlir::IntegerAttr":$value); + let assemblyFormat = "`<` `target` `<` $value `>` `>`"; +} + +//===----------------------------------------------------------------------===// // ConstComplexAttr //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.h b/clang/include/clang/CIR/Dialect/IR/CIRTypes.h index bfa165c..45f646f 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.h +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.h @@ -16,6 +16,9 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Types.h" #include "mlir/Interfaces/DataLayoutInterfaces.h" +#include "clang/Basic/AddressSpaces.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "clang/CIR/Interfaces/CIRTypeInterfaces.h" namespace cir { diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td index 4eec34c..3131847 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td @@ -14,10 +14,12 @@ #define CLANG_CIR_DIALECT_IR_CIRTYPES_TD include "clang/CIR/Dialect/IR/CIRDialect.td" +include "clang/CIR/Dialect/IR/CIREnumAttr.td" include "clang/CIR/Dialect/IR/CIRTypeConstraints.td" include "clang/CIR/Interfaces/CIRTypeInterfaces.td" include "mlir/Interfaces/DataLayoutInterfaces.td" include "mlir/IR/AttrTypeBase.td" +include "mlir/IR/EnumAttr.td" //===----------------------------------------------------------------------===// // CIR Types @@ -226,32 +228,54 @@ def CIR_PointerType : CIR_Type<"Pointer", "ptr", [ ]> { let summary = "CIR pointer type"; let description = [{ - The `!cir.ptr` type represents C and C++ pointer types and C++ reference - types, other than pointers-to-members. The `pointee` type is the type - pointed to. + The `!cir.ptr` type is a typed pointer type. It is used to represent + pointers to objects in C/C++. The type of the pointed-to object is given by + the `pointee` parameter. The `addrSpace` parameter is an optional address + space attribute that specifies the address space of the pointer. If not + specified, the pointer is assumed to be in the default address space. - TODO(CIR): The address space attribute is not yet implemented. + The `!cir.ptr` type can point to any type, including fundamental types, + records, arrays, vectors, functions, and other pointers. It can also point + to incomplete types, such as incomplete records. + + Examples: + + ```mlir + !cir.ptr<!cir.int<u, 8>> + !cir.ptr<!cir.float> + !cir.ptr<!cir.record<struct "MyStruct">> + !cir.ptr<!cir.int<u, 8>, target_address_space(1)> + !cir.ptr<!cir.record<struct "MyStruct">, target_address_space(5)> + ``` }]; - let parameters = (ins "mlir::Type":$pointee); + let parameters = (ins + "mlir::Type":$pointee, + OptionalParameter< + "cir::TargetAddressSpaceAttr">:$addrSpace + ); + let skipDefaultBuilders = 1; let builders = [ - TypeBuilderWithInferredContext<(ins "mlir::Type":$pointee), [{ - return $_get(pointee.getContext(), pointee); + TypeBuilderWithInferredContext<(ins + "mlir::Type":$pointee, + CArg<"cir::TargetAddressSpaceAttr", "nullptr">:$addrSpace), [{ + return $_get(pointee.getContext(), pointee, addrSpace); }]>, - TypeBuilder<(ins "mlir::Type":$pointee), [{ - return $_get($_ctxt, pointee); + TypeBuilder<(ins + "mlir::Type":$pointee, + CArg<"cir::TargetAddressSpaceAttr", "nullptr">:$addrSpace), [{ + return $_get($_ctxt, pointee, addrSpace); }]> ]; let assemblyFormat = [{ - `<` $pointee `>` + `<` + $pointee + ( `,` ` ` custom<TargetAddressSpace>($addrSpace)^ )? + `>` }]; - let genVerifyDecl = 1; - - let skipDefaultBuilders = 1; - let extraClassDeclaration = [{ template <typename ...Types> bool isPtrTo() const { diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 0e7cec4..f7ca276 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -216,6 +216,7 @@ struct MissingFeatures { static bool dataLayoutTypeIsSized() { return false; } static bool dataLayoutTypeAllocSize() { return false; } static bool dataLayoutTypeStoreSize() { return false; } + static bool dataLayoutPtrHandlingBasedOnLangAS() { return false; } static bool deferredCXXGlobalInit() { return false; } static bool deleteArray() { return false; } static bool devirtualizeMemberFunction() { return false; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2ef6098..5a48f0b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1258,8 +1258,9 @@ def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">, HelpText<"Compression level for offload device binaries (HIP only)">; def offload_jobs_EQ : Joined<["--"], "offload-jobs=">, - HelpText<"Specify the number of threads to use for device offloading tasks" - " during compilation.">; + HelpText<"Specify the number of threads to use for device offloading tasks " + "during compilation. Can be a positive integer or the string " + "'jobserver' to use the make-style jobserver from the environment.">; defm offload_via_llvm : BoolFOption<"offload-via-llvm", LangOpts<"OffloadViaLLVM">, DefaultFalse, diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index b4c8924..be94890 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -2053,8 +2053,8 @@ mlir::Value CIRGenFunction::emitAlloca(StringRef name, mlir::Type ty, // CIR uses its own alloca address space rather than follow the target data // layout like original CodeGen. The data layout awareness should be done in // the lowering pass instead. - assert(!cir::MissingFeatures::addressSpace()); - cir::PointerType localVarPtrTy = builder.getPointerTo(ty); + cir::PointerType localVarPtrTy = + builder.getPointerTo(ty, getCIRAllocaAddressSpace()); mlir::IntegerAttr alignIntAttr = cgm.getSize(alignment); mlir::Value addr; diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 8485564..910c8a9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -76,6 +76,7 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext, SInt128Ty = cir::IntType::get(&getMLIRContext(), 128, /*isSigned=*/true); UInt8Ty = cir::IntType::get(&getMLIRContext(), 8, /*isSigned=*/false); UInt8PtrTy = cir::PointerType::get(UInt8Ty); + cirAllocaAddressSpace = getTargetCIRGenInfo().getCIRAllocaAddressSpace(); UInt16Ty = cir::IntType::get(&getMLIRContext(), 16, /*isSigned=*/false); UInt32Ty = cir::IntType::get(&getMLIRContext(), 32, /*isSigned=*/false); UInt64Ty = cir::IntType::get(&getMLIRContext(), 64, /*isSigned=*/false); diff --git a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h index cc3ce09..273ec7f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h +++ b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h @@ -14,6 +14,7 @@ #define LLVM_CLANG_LIB_CIR_CIRGENTYPECACHE_H #include "clang/AST/CharUnits.h" +#include "clang/Basic/AddressSpaces.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" namespace clang::CIRGen { @@ -73,6 +74,8 @@ struct CIRGenTypeCache { /// The alignment of size_t. unsigned char SizeAlignInBytes; + cir::TargetAddressSpaceAttr cirAllocaAddressSpace; + clang::CharUnits getSizeAlign() const { return clang::CharUnits::fromQuantity(SizeAlignInBytes); } @@ -80,6 +83,10 @@ struct CIRGenTypeCache { clang::CharUnits getPointerAlign() const { return clang::CharUnits::fromQuantity(PointerAlignInBytes); } + + cir::TargetAddressSpaceAttr getCIRAllocaAddressSpace() const { + return cirAllocaAddressSpace; + } }; } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index bb24933..e65896a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -417,7 +417,7 @@ mlir::Type CIRGenTypes::convertType(QualType type) { mlir::Type pointeeType = convertType(elemTy); - resultType = builder.getPointerTo(pointeeType); + resultType = builder.getPointerTo(pointeeType, elemTy.getAddressSpace()); break; } diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index a5c548a..dbb0312 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -16,6 +16,7 @@ #include "ABIInfo.h" #include "CIRGenTypes.h" +#include "clang/Basic/AddressSpaces.h" #include <memory> #include <utility> @@ -43,6 +44,11 @@ public: /// Returns ABI info helper for the target. const ABIInfo &getABIInfo() const { return *info; } + /// Get the address space for alloca. + virtual cir::TargetAddressSpaceAttr getCIRAllocaAddressSpace() const { + return {}; + } + /// Determine whether a call to an unprototyped functions under /// the given calling convention should use the variadic /// convention or the non-variadic convention. diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp index 95faad6..3484c59 100644 --- a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp @@ -43,6 +43,16 @@ parseFloatLiteral(mlir::AsmParser &parser, mlir::FailureOr<llvm::APFloat> &value, cir::FPTypeInterface fpType); +//===----------------------------------------------------------------------===// +// AddressSpaceAttr +//===----------------------------------------------------------------------===// + +mlir::ParseResult parseTargetAddressSpace(mlir::AsmParser &p, + cir::TargetAddressSpaceAttr &attr); + +void printTargetAddressSpace(mlir::AsmPrinter &p, + cir::TargetAddressSpaceAttr attr); + static mlir::ParseResult parseConstPtr(mlir::AsmParser &parser, mlir::IntegerAttr &value); diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index 35b4513..58973528 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -13,6 +13,7 @@ #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "mlir/IR/DialectImplementation.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypesDetails.h" #include "clang/CIR/MissingFeatures.h" @@ -38,6 +39,27 @@ parseFuncTypeParams(mlir::AsmParser &p, llvm::SmallVector<mlir::Type> ¶ms, static void printFuncTypeParams(mlir::AsmPrinter &p, mlir::ArrayRef<mlir::Type> params, bool isVarArg); +//===----------------------------------------------------------------------===// +// CIR Custom Parser/Printer Signatures +//===----------------------------------------------------------------------===// + +static mlir::ParseResult +parseFuncTypeParams(mlir::AsmParser &p, llvm::SmallVector<mlir::Type> ¶ms, + bool &isVarArg); + +static void printFuncTypeParams(mlir::AsmPrinter &p, + mlir::ArrayRef<mlir::Type> params, + bool isVarArg); + +//===----------------------------------------------------------------------===// +// AddressSpace +//===----------------------------------------------------------------------===// + +mlir::ParseResult parseTargetAddressSpace(mlir::AsmParser &p, + cir::TargetAddressSpaceAttr &attr); + +void printTargetAddressSpace(mlir::AsmPrinter &p, + cir::TargetAddressSpaceAttr attr); //===----------------------------------------------------------------------===// // Get autogenerated stuff @@ -298,6 +320,22 @@ bool RecordType::isLayoutIdentical(const RecordType &other) { //===----------------------------------------------------------------------===// llvm::TypeSize +PointerType::getTypeSizeInBits(const ::mlir::DataLayout &dataLayout, + ::mlir::DataLayoutEntryListRef params) const { + // FIXME: improve this in face of address spaces + assert(!cir::MissingFeatures::dataLayoutPtrHandlingBasedOnLangAS()); + return llvm::TypeSize::getFixed(64); +} + +uint64_t +PointerType::getABIAlignment(const ::mlir::DataLayout &dataLayout, + ::mlir::DataLayoutEntryListRef params) const { + // FIXME: improve this in face of address spaces + assert(!cir::MissingFeatures::dataLayoutPtrHandlingBasedOnLangAS()); + return 8; +} + +llvm::TypeSize RecordType::getTypeSizeInBits(const mlir::DataLayout &dataLayout, mlir::DataLayoutEntryListRef params) const { if (isUnion()) @@ -766,30 +804,39 @@ mlir::LogicalResult cir::VectorType::verify( } //===----------------------------------------------------------------------===// -// PointerType Definitions +// TargetAddressSpace definitions //===----------------------------------------------------------------------===// -llvm::TypeSize -PointerType::getTypeSizeInBits(const ::mlir::DataLayout &dataLayout, - ::mlir::DataLayoutEntryListRef params) const { - // FIXME: improve this in face of address spaces - return llvm::TypeSize::getFixed(64); -} +mlir::ParseResult parseTargetAddressSpace(mlir::AsmParser &p, + cir::TargetAddressSpaceAttr &attr) { + if (failed(p.parseKeyword("target_address_space"))) + return mlir::failure(); -uint64_t -PointerType::getABIAlignment(const ::mlir::DataLayout &dataLayout, - ::mlir::DataLayoutEntryListRef params) const { - // FIXME: improve this in face of address spaces - return 8; -} + if (failed(p.parseLParen())) + return mlir::failure(); -mlir::LogicalResult -PointerType::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError, - mlir::Type pointee) { - // TODO(CIR): Verification of the address space goes here. + int32_t targetValue; + if (failed(p.parseInteger(targetValue))) + return p.emitError(p.getCurrentLocation(), + "expected integer address space value"); + + if (failed(p.parseRParen())) + return p.emitError(p.getCurrentLocation(), + "expected ')' after address space value"); + + mlir::MLIRContext *context = p.getBuilder().getContext(); + attr = cir::TargetAddressSpaceAttr::get( + context, p.getBuilder().getUI32IntegerAttr(targetValue)); return mlir::success(); } +// The custom printer for the `addrspace` parameter in `!cir.ptr`. +// in the format of `target_address_space(N)`. +void printTargetAddressSpace(mlir::AsmPrinter &p, + cir::TargetAddressSpaceAttr attr) { + p << "target_address_space(" << attr.getValue().getUInt() << ")"; +} + //===----------------------------------------------------------------------===// // CIR Dialect //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 4bc7783..bfb1262 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -32,6 +32,7 @@ #include "mlir/Transforms/DialectConversion.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/Dialect/Passes.h" #include "clang/CIR/LoweringHelpers.h" #include "clang/CIR/MissingFeatures.h" @@ -2308,11 +2309,9 @@ mlir::LogicalResult CIRToLLVMSelectOpLowering::matchAndRewrite( static void prepareTypeConverter(mlir::LLVMTypeConverter &converter, mlir::DataLayout &dataLayout) { converter.addConversion([&](cir::PointerType type) -> mlir::Type { - // Drop pointee type since LLVM dialect only allows opaque pointers. - assert(!cir::MissingFeatures::addressSpace()); - unsigned targetAS = 0; - - return mlir::LLVM::LLVMPointerType::get(type.getContext(), targetAS); + unsigned addrSpace = + type.getAddrSpace() ? type.getAddrSpace().getValue().getUInt() : 0; + return mlir::LLVM::LLVMPointerType::get(type.getContext(), addrSpace); }); converter.addConversion([&](cir::VPtrType type) -> mlir::Type { assert(!cir::MissingFeatures::addressSpace()); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 412a176..684cc09 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9224,14 +9224,20 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, addOffloadCompressArgs(Args, CmdArgs); if (Arg *A = Args.getLastArg(options::OPT_offload_jobs_EQ)) { - int NumThreads; - if (StringRef(A->getValue()).getAsInteger(10, NumThreads) || - NumThreads <= 0) - C.getDriver().Diag(diag::err_drv_invalid_int_value) - << A->getAsString(Args) << A->getValue(); - else - CmdArgs.push_back( - Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads))); + StringRef Val = A->getValue(); + + if (Val.equals_insensitive("jobserver")) + CmdArgs.push_back(Args.MakeArgString("--wrapper-jobs=jobserver")); + else { + int NumThreads; + if (Val.getAsInteger(10, NumThreads) || NumThreads <= 0) { + C.getDriver().Diag(diag::err_drv_invalid_int_value) + << A->getAsString(Args) << Val; + } else { + CmdArgs.push_back( + Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads))); + } + } } const char *Exec = diff --git a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt index 53a2728..76bdc50 100644 --- a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt +++ b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt @@ -24,6 +24,5 @@ add_clang_library(clangDependencyScanning clangFrontend clangLex clangSerialization - clangTooling ${LLVM_PTHREAD_LIB} ) diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp index 010380d..e1f4d0d 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp @@ -9,8 +9,10 @@ #include "DependencyScannerImpl.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/DiagnosticSerialization.h" +#include "clang/Driver/Driver.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" +#include "llvm/TargetParser/Host.h" using namespace clang; using namespace tooling; @@ -332,11 +334,9 @@ public: return DepFS->getDirectiveTokens(File.getName()); } }; -} // namespace /// Sanitize diagnostic options for dependency scan. -void clang::tooling::dependencies::sanitizeDiagOpts( - DiagnosticOptions &DiagOpts) { +void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) { // Don't print 'X warnings and Y errors generated'. DiagOpts.ShowCarets = false; // Don't write out diagnostic file. @@ -355,44 +355,146 @@ void clang::tooling::dependencies::sanitizeDiagOpts( .Default(true); }); } +} // namespace -bool DependencyScanningAction::runInvocation( - std::shared_ptr<CompilerInvocation> Invocation, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, - std::shared_ptr<PCHContainerOperations> PCHContainerOps, - DiagnosticConsumer *DiagConsumer) { - // Making sure that we canonicalize the defines before we create the deep - // copy to avoid unnecessary variants in the scanner and in the resulting - // explicit command lines. - if (any(Service.getOptimizeArgs() & ScanningOptimizations::Macros)) - canonicalizeDefines(Invocation->getPreprocessorOpts()); +namespace clang::tooling::dependencies { +std::unique_ptr<DiagnosticOptions> +createDiagOptions(ArrayRef<std::string> CommandLine) { + std::vector<const char *> CLI; + for (const std::string &Arg : CommandLine) + CLI.push_back(Arg.c_str()); + auto DiagOpts = CreateAndPopulateDiagOpts(CLI); + sanitizeDiagOpts(*DiagOpts); + return DiagOpts; +} - // Make a deep copy of the original Clang invocation. - CompilerInvocation OriginalInvocation(*Invocation); +DignosticsEngineWithDiagOpts::DignosticsEngineWithDiagOpts( + ArrayRef<std::string> CommandLine, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, DiagnosticConsumer &DC) { + std::vector<const char *> CCommandLine(CommandLine.size(), nullptr); + llvm::transform(CommandLine, CCommandLine.begin(), + [](const std::string &Str) { return Str.c_str(); }); + DiagOpts = CreateAndPopulateDiagOpts(CCommandLine); + sanitizeDiagOpts(*DiagOpts); + DiagEngine = CompilerInstance::createDiagnostics(*FS, *DiagOpts, &DC, + /*ShouldOwnClient=*/false); +} - if (Scanned) { - // Scanning runs once for the first -cc1 invocation in a chain of driver - // jobs. For any dependent jobs, reuse the scanning result and just - // update the LastCC1Arguments to correspond to the new invocation. - // FIXME: to support multi-arch builds, each arch requires a separate scan - setLastCC1Arguments(std::move(OriginalInvocation)); - return true; +std::pair<std::unique_ptr<driver::Driver>, std::unique_ptr<driver::Compilation>> +buildCompilation(ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) { + SmallVector<const char *, 256> Argv; + Argv.reserve(ArgStrs.size()); + for (const std::string &Arg : ArgStrs) + Argv.push_back(Arg.c_str()); + + std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>( + Argv[0], llvm::sys::getDefaultTargetTriple(), Diags, + "clang LLVM compiler", FS); + Driver->setTitle("clang_based_tool"); + + llvm::BumpPtrAllocator Alloc; + bool CLMode = driver::IsClangCL( + driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1))); + + if (llvm::Error E = + driver::expandResponseFiles(Argv, CLMode, Alloc, FS.get())) { + Diags.Report(diag::err_drv_expand_response_file) + << llvm::toString(std::move(E)); + return std::make_pair(nullptr, nullptr); } - Scanned = true; + std::unique_ptr<driver::Compilation> Compilation( + Driver->BuildCompilation(Argv)); + if (!Compilation) + return std::make_pair(nullptr, nullptr); - // Create a compiler instance to handle the actual work. - auto ModCache = makeInProcessModuleCache(Service.getModuleCacheEntries()); - ScanInstanceStorage.emplace(std::move(Invocation), std::move(PCHContainerOps), - ModCache.get()); - CompilerInstance &ScanInstance = *ScanInstanceStorage; + if (Compilation->containsError()) + return std::make_pair(nullptr, nullptr); + + return std::make_pair(std::move(Driver), std::move(Compilation)); +} + +std::unique_ptr<CompilerInvocation> +createCompilerInvocation(ArrayRef<std::string> CommandLine, + DiagnosticsEngine &Diags) { + llvm::opt::ArgStringList Argv; + for (const std::string &Str : ArrayRef(CommandLine).drop_front()) + Argv.push_back(Str.c_str()); + + auto Invocation = std::make_unique<CompilerInvocation>(); + if (!CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags)) { + // FIXME: Should we just go on like cc1_main does? + return nullptr; + } + return Invocation; +} + +std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>> +initVFSForTUBuferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, + ArrayRef<std::string> CommandLine, + StringRef WorkingDirectory, + llvm::MemoryBufferRef TUBuffer) { + // Reset what might have been modified in the previous worker invocation. + BaseFS->setCurrentWorkingDirectory(WorkingDirectory); + + IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS; + auto OverlayFS = + llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); + auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); + InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); + auto InputPath = TUBuffer.getBufferIdentifier(); + InMemoryFS->addFile( + InputPath, 0, llvm::MemoryBuffer::getMemBufferCopy(TUBuffer.getBuffer())); + IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; + + OverlayFS->pushOverlay(InMemoryOverlay); + ModifiedFS = OverlayFS; + std::vector<std::string> ModifiedCommandLine(CommandLine); + ModifiedCommandLine.emplace_back(InputPath); + + return std::make_pair(ModifiedFS, ModifiedCommandLine); +} + +std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>> +initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, + ArrayRef<std::string> CommandLine, + StringRef WorkingDirectory, StringRef ModuleName) { + // Reset what might have been modified in the previous worker invocation. + BaseFS->setCurrentWorkingDirectory(WorkingDirectory); + + // If we're scanning based on a module name alone, we don't expect the client + // to provide us with an input file. However, the driver really wants to have + // one. Let's just make it up to make the driver happy. + auto OverlayFS = + llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); + auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); + InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); + SmallString<128> FakeInputPath; + // TODO: We should retry the creation if the path already exists. + llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath, + /*MakeAbsolute=*/false); + InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer("")); + IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; + OverlayFS->pushOverlay(InMemoryOverlay); + + std::vector<std::string> ModifiedCommandLine(CommandLine); + ModifiedCommandLine.emplace_back(FakeInputPath); + + return std::make_pair(OverlayFS, ModifiedCommandLine); +} + +bool initializeScanCompilerInstance( + CompilerInstance &ScanInstance, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + DiagnosticConsumer *DiagConsumer, DependencyScanningService &Service, + IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS) { ScanInstance.setBuildingModule(false); ScanInstance.createVirtualFileSystem(FS, DiagConsumer); // Create the compiler's actual diagnostics engine. sanitizeDiagOpts(ScanInstance.getDiagnosticOpts()); - assert(!DiagConsumerFinished && "attempt to reuse finished consumer"); ScanInstance.createDiagnostics(DiagConsumer, /*ShouldOwnClient=*/false); if (!ScanInstance.hasDiagnostics()) return false; @@ -435,6 +537,26 @@ bool DependencyScanningAction::runInvocation( ScanInstance.createSourceManager(); + // Consider different header search and diagnostic options to create + // different modules. This avoids the unsound aliasing of module PCMs. + // + // TODO: Implement diagnostic bucketing to reduce the impact of strict + // context hashing. + ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true; + ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true; + ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true; + ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true; + ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true; + ScanInstance.getHeaderSearchOpts().ModulesForceValidateUserHeaders = false; + + // Avoid some checks and module map parsing when loading PCM files. + ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false; + + return true; +} + +llvm::SmallVector<StringRef> +getInitialStableDirs(const CompilerInstance &ScanInstance) { // Create a collection of stable directories derived from the ScanInstance // for determining whether module dependencies would fully resolve from // those directories. @@ -442,7 +564,12 @@ bool DependencyScanningAction::runInvocation( const StringRef Sysroot = ScanInstance.getHeaderSearchOpts().Sysroot; if (!Sysroot.empty() && (llvm::sys::path::root_directory(Sysroot) != Sysroot)) StableDirs = {Sysroot, ScanInstance.getHeaderSearchOpts().ResourceDir}; + return StableDirs; +} +std::optional<PrebuiltModulesAttrsMap> +computePrebuiltModulesASTMap(CompilerInstance &ScanInstance, + llvm::SmallVector<StringRef> &StableDirs) { // Store a mapping of prebuilt module files and their properties like header // search options. This will prevent the implicit build to create duplicate // modules and will force reuse of the existing prebuilt module files @@ -454,12 +581,14 @@ bool DependencyScanningAction::runInvocation( ScanInstance.getPreprocessorOpts().ImplicitPCHInclude, ScanInstance, ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles, PrebuiltModulesASTMap, ScanInstance.getDiagnostics(), StableDirs)) - return false; + return {}; - // Create the dependency collector that will collect the produced - // dependencies. - // - // This also moves the existing dependency output options from the + return PrebuiltModulesASTMap; +} + +std::unique_ptr<DependencyOutputOptions> +takeDependencyOutputOptionsFrom(CompilerInstance &ScanInstance) { + // This function moves the existing dependency output options from the // invocation to the collector. The options in the invocation are reset, // which ensures that the compiler won't create new dependency collectors, // and thus won't write out the extra '.d' files to disk. @@ -472,35 +601,85 @@ bool DependencyScanningAction::runInvocation( ScanInstance.getFrontendOpts().Inputs)}; Opts->IncludeSystemHeaders = true; + return Opts; +} + +std::shared_ptr<ModuleDepCollector> initializeScanInstanceDependencyCollector( + CompilerInstance &ScanInstance, + std::unique_ptr<DependencyOutputOptions> DepOutputOpts, + StringRef WorkingDirectory, DependencyConsumer &Consumer, + DependencyScanningService &Service, CompilerInvocation &Inv, + DependencyActionController &Controller, + PrebuiltModulesAttrsMap PrebuiltModulesASTMap, + llvm::SmallVector<StringRef> &StableDirs) { + std::shared_ptr<ModuleDepCollector> MDC; switch (Service.getFormat()) { case ScanningOutputFormat::Make: ScanInstance.addDependencyCollector( std::make_shared<DependencyConsumerForwarder>( - std::move(Opts), WorkingDirectory, Consumer)); + std::move(DepOutputOpts), WorkingDirectory, Consumer)); break; case ScanningOutputFormat::P1689: case ScanningOutputFormat::Full: MDC = std::make_shared<ModuleDepCollector>( - Service, std::move(Opts), ScanInstance, Consumer, Controller, - OriginalInvocation, std::move(PrebuiltModulesASTMap), StableDirs); + Service, std::move(DepOutputOpts), ScanInstance, Consumer, Controller, + Inv, std::move(PrebuiltModulesASTMap), StableDirs); ScanInstance.addDependencyCollector(MDC); break; } - // Consider different header search and diagnostic options to create - // different modules. This avoids the unsound aliasing of module PCMs. - // - // TODO: Implement diagnostic bucketing to reduce the impact of strict - // context hashing. - ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true; - ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true; - ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true; - ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true; - ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true; - ScanInstance.getHeaderSearchOpts().ModulesForceValidateUserHeaders = false; + return MDC; +} +} // namespace clang::tooling::dependencies - // Avoid some checks and module map parsing when loading PCM files. - ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false; +bool DependencyScanningAction::runInvocation( + std::unique_ptr<CompilerInvocation> Invocation, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + std::shared_ptr<PCHContainerOperations> PCHContainerOps, + DiagnosticConsumer *DiagConsumer) { + // Making sure that we canonicalize the defines before we create the deep + // copy to avoid unnecessary variants in the scanner and in the resulting + // explicit command lines. + if (any(Service.getOptimizeArgs() & ScanningOptimizations::Macros)) + canonicalizeDefines(Invocation->getPreprocessorOpts()); + + // Make a deep copy of the original Clang invocation. + CompilerInvocation OriginalInvocation(*Invocation); + + if (Scanned) { + // Scanning runs once for the first -cc1 invocation in a chain of driver + // jobs. For any dependent jobs, reuse the scanning result and just + // update the LastCC1Arguments to correspond to the new invocation. + // FIXME: to support multi-arch builds, each arch requires a separate scan + setLastCC1Arguments(std::move(OriginalInvocation)); + return true; + } + + Scanned = true; + + // Create a compiler instance to handle the actual work. + auto ModCache = makeInProcessModuleCache(Service.getModuleCacheEntries()); + ScanInstanceStorage.emplace(std::move(Invocation), std::move(PCHContainerOps), + ModCache.get()); + CompilerInstance &ScanInstance = *ScanInstanceStorage; + + assert(!DiagConsumerFinished && "attempt to reuse finished consumer"); + if (!initializeScanCompilerInstance(ScanInstance, FS, DiagConsumer, Service, + DepFS)) + return false; + + llvm::SmallVector<StringRef> StableDirs = getInitialStableDirs(ScanInstance); + auto MaybePrebuiltModulesASTMap = + computePrebuiltModulesASTMap(ScanInstance, StableDirs); + if (!MaybePrebuiltModulesASTMap) + return false; + + auto DepOutputOpts = takeDependencyOutputOptionsFrom(ScanInstance); + + MDC = initializeScanInstanceDependencyCollector( + ScanInstance, std::move(DepOutputOpts), WorkingDirectory, Consumer, + Service, OriginalInvocation, Controller, *MaybePrebuiltModulesASTMap, + StableDirs); std::unique_ptr<FrontendAction> Action; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h index 32fbcff..71c6731 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h +++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h @@ -9,8 +9,10 @@ #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H +#include "clang/Driver/Compilation.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Serialization/ObjectFilePCHContainerReader.h" #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" @@ -30,12 +32,12 @@ public: DependencyScanningAction( DependencyScanningService &Service, StringRef WorkingDirectory, DependencyConsumer &Consumer, DependencyActionController &Controller, - llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS, + IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS, std::optional<StringRef> ModuleName = std::nullopt) : Service(Service), WorkingDirectory(WorkingDirectory), Consumer(Consumer), Controller(Controller), DepFS(std::move(DepFS)), ModuleName(ModuleName) {} - bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation, + bool runInvocation(std::unique_ptr<CompilerInvocation> Invocation, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, std::shared_ptr<PCHContainerOperations> PCHContainerOps, DiagnosticConsumer *DiagConsumer); @@ -63,7 +65,7 @@ private: StringRef WorkingDirectory; DependencyConsumer &Consumer; DependencyActionController &Controller; - llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS; + IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS; std::optional<StringRef> ModuleName; std::optional<CompilerInstance> ScanInstanceStorage; std::shared_ptr<ModuleDepCollector> MDC; @@ -72,9 +74,81 @@ private: bool DiagConsumerFinished = false; }; -// Helper functions -void sanitizeDiagOpts(DiagnosticOptions &DiagOpts); +// Helper functions and data types. +std::unique_ptr<DiagnosticOptions> +createDiagOptions(ArrayRef<std::string> CommandLine); +struct DignosticsEngineWithDiagOpts { + // We need to bound the lifetime of the DiagOpts used to create the + // DiganosticsEngine with the DiagnosticsEngine itself. + std::unique_ptr<DiagnosticOptions> DiagOpts; + IntrusiveRefCntPtr<DiagnosticsEngine> DiagEngine; + + DignosticsEngineWithDiagOpts(ArrayRef<std::string> CommandLine, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + DiagnosticConsumer &DC); +}; + +struct TextDiagnosticsPrinterWithOutput { + // We need to bound the lifetime of the data that supports the DiagPrinter + // with it together so they have the same lifetime. + std::string DiagnosticOutput; + llvm::raw_string_ostream DiagnosticsOS; + std::unique_ptr<DiagnosticOptions> DiagOpts; + TextDiagnosticPrinter DiagPrinter; + + TextDiagnosticsPrinterWithOutput(ArrayRef<std::string> CommandLine) + : DiagnosticsOS(DiagnosticOutput), + DiagOpts(createDiagOptions(CommandLine)), + DiagPrinter(DiagnosticsOS, *DiagOpts) {} +}; + +std::pair<std::unique_ptr<driver::Driver>, std::unique_ptr<driver::Compilation>> +buildCompilation(ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS); + +std::unique_ptr<CompilerInvocation> +createCompilerInvocation(ArrayRef<std::string> CommandLine, + DiagnosticsEngine &Diags); + +std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>> +initVFSForTUBuferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, + ArrayRef<std::string> CommandLine, + StringRef WorkingDirectory, + llvm::MemoryBufferRef TUBuffer); + +std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>> +initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, + ArrayRef<std::string> CommandLine, + StringRef WorkingDirectory, StringRef ModuleName); + +bool initializeScanCompilerInstance( + CompilerInstance &ScanInstance, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + DiagnosticConsumer *DiagConsumer, DependencyScanningService &Service, + IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS); + +SmallVector<StringRef> +getInitialStableDirs(const CompilerInstance &ScanInstance); + +std::optional<PrebuiltModulesAttrsMap> +computePrebuiltModulesASTMap(CompilerInstance &ScanInstance, + SmallVector<StringRef> &StableDirs); + +std::unique_ptr<DependencyOutputOptions> +takeDependencyOutputOptionsFrom(CompilerInstance &ScanInstance); + +/// Create the dependency collector that will collect the produced +/// dependencies. May return the created ModuleDepCollector depending +/// on the scanning format. +std::shared_ptr<ModuleDepCollector> initializeScanInstanceDependencyCollector( + CompilerInstance &ScanInstance, + std::unique_ptr<DependencyOutputOptions> DepOutputOpts, + StringRef WorkingDirectory, DependencyConsumer &Consumer, + DependencyScanningService &Service, CompilerInvocation &Inv, + DependencyActionController &Controller, + PrebuiltModulesAttrsMap PrebuiltModulesASTMap, + llvm::SmallVector<StringRef> &StableDirs); } // namespace dependencies } // namespace tooling } // namespace clang diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index 796e587..9515421 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -8,29 +8,9 @@ #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" #include "DependencyScannerImpl.h" -#include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/DiagnosticFrontend.h" -#include "clang/Basic/DiagnosticSerialization.h" -#include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Job.h" #include "clang/Driver/Tool.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendActions.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/Lex/PreprocessorOptions.h" -#include "clang/Serialization/ObjectFilePCHContainerReader.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" -#include "clang/Tooling/DependencyScanning/InProcessModuleCache.h" -#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" -#include "llvm/ADT/IntrusiveRefCntPtr.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/TargetParser/Host.h" -#include <optional> using namespace clang; using namespace tooling; @@ -63,32 +43,19 @@ DependencyScanningWorker::DependencyScanningWorker( } } -static std::unique_ptr<DiagnosticOptions> -createDiagOptions(const std::vector<std::string> &CommandLine) { - std::vector<const char *> CLI; - for (const std::string &Arg : CommandLine) - CLI.push_back(Arg.c_str()); - auto DiagOpts = CreateAndPopulateDiagOpts(CLI); - sanitizeDiagOpts(*DiagOpts); - return DiagOpts; -} - llvm::Error DependencyScanningWorker::computeDependencies( StringRef WorkingDirectory, const std::vector<std::string> &CommandLine, DependencyConsumer &Consumer, DependencyActionController &Controller, std::optional<llvm::MemoryBufferRef> TUBuffer) { // Capture the emitted diagnostics and report them to the client // in the case of a failure. - std::string DiagnosticOutput; - llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput); - auto DiagOpts = createDiagOptions(CommandLine); - TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, *DiagOpts); + TextDiagnosticsPrinterWithOutput DiagPrinterWithOS(CommandLine); if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller, - DiagPrinter, TUBuffer)) + DiagPrinterWithOS.DiagPrinter, TUBuffer)) return llvm::Error::success(); - return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(), - llvm::inconvertibleErrorCode()); + return llvm::make_error<llvm::StringError>( + DiagPrinterWithOS.DiagnosticsOS.str(), llvm::inconvertibleErrorCode()); } llvm::Error DependencyScanningWorker::computeDependencies( @@ -97,51 +64,24 @@ llvm::Error DependencyScanningWorker::computeDependencies( StringRef ModuleName) { // Capture the emitted diagnostics and report them to the client // in the case of a failure. - std::string DiagnosticOutput; - llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput); - auto DiagOpts = createDiagOptions(CommandLine); - TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, *DiagOpts); + TextDiagnosticsPrinterWithOutput DiagPrinterWithOS(CommandLine); if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller, - DiagPrinter, ModuleName)) + DiagPrinterWithOS.DiagPrinter, ModuleName)) return llvm::Error::success(); - return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(), - llvm::inconvertibleErrorCode()); + return llvm::make_error<llvm::StringError>( + DiagPrinterWithOS.DiagnosticsOS.str(), llvm::inconvertibleErrorCode()); } static bool forEachDriverJob( ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, llvm::function_ref<bool(const driver::Command &Cmd)> Callback) { - SmallVector<const char *, 256> Argv; - Argv.reserve(ArgStrs.size()); - for (const std::string &Arg : ArgStrs) - Argv.push_back(Arg.c_str()); - - std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>( - Argv[0], llvm::sys::getDefaultTargetTriple(), Diags, - "clang LLVM compiler", FS); - Driver->setTitle("clang_based_tool"); - - llvm::BumpPtrAllocator Alloc; - bool CLMode = driver::IsClangCL( - driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1))); - - if (llvm::Error E = - driver::expandResponseFiles(Argv, CLMode, Alloc, FS.get())) { - Diags.Report(diag::err_drv_expand_response_file) - << llvm::toString(std::move(E)); - return false; - } - - const std::unique_ptr<driver::Compilation> Compilation( - Driver->BuildCompilation(llvm::ArrayRef(Argv))); + // Compilation holds a non-owning a reference to the Driver, hence we need to + // keep the Driver alive when we use Compilation. + auto [Driver, Compilation] = buildCompilation(ArgStrs, Diags, FS); if (!Compilation) return false; - - if (Compilation->containsError()) - return false; - for (const driver::Command &Job : Compilation->getJobs()) { if (!Callback(Job)) return false; @@ -150,30 +90,21 @@ static bool forEachDriverJob( } static bool createAndRunToolInvocation( - std::vector<std::string> CommandLine, DependencyScanningAction &Action, + const std::vector<std::string> &CommandLine, + DependencyScanningAction &Action, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, std::shared_ptr<clang::PCHContainerOperations> &PCHContainerOps, DiagnosticsEngine &Diags, DependencyConsumer &Consumer) { - - // Save executable path before providing CommandLine to ToolInvocation - std::string Executable = CommandLine[0]; - - llvm::opt::ArgStringList Argv; - for (const std::string &Str : ArrayRef(CommandLine).drop_front()) - Argv.push_back(Str.c_str()); - - auto Invocation = std::make_shared<CompilerInvocation>(); - if (!CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags)) { - // FIXME: Should we just go on like cc1_main does? + auto Invocation = createCompilerInvocation(CommandLine, Diags); + if (!Invocation) return false; - } if (!Action.runInvocation(std::move(Invocation), std::move(FS), PCHContainerOps, Diags.getClient())) return false; std::vector<std::string> Args = Action.takeLastCC1Arguments(); - Consumer.handleBuildCommand({std::move(Executable), std::move(Args)}); + Consumer.handleBuildCommand({CommandLine[0], std::move(Args)}); return true; } @@ -182,24 +113,19 @@ bool DependencyScanningWorker::scanDependencies( DependencyConsumer &Consumer, DependencyActionController &Controller, DiagnosticConsumer &DC, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, std::optional<StringRef> ModuleName) { - std::vector<const char *> CCommandLine(CommandLine.size(), nullptr); - llvm::transform(CommandLine, CCommandLine.begin(), - [](const std::string &Str) { return Str.c_str(); }); - auto DiagOpts = CreateAndPopulateDiagOpts(CCommandLine); - sanitizeDiagOpts(*DiagOpts); - auto Diags = CompilerInstance::createDiagnostics(*FS, *DiagOpts, &DC, - /*ShouldOwnClient=*/false); - + DignosticsEngineWithDiagOpts DiagEngineWithCmdAndOpts(CommandLine, FS, DC); DependencyScanningAction Action(Service, WorkingDirectory, Consumer, Controller, DepFS, ModuleName); bool Success = false; if (CommandLine[1] == "-cc1") { - Success = createAndRunToolInvocation(CommandLine, Action, FS, - PCHContainerOps, *Diags, Consumer); + Success = createAndRunToolInvocation( + CommandLine, Action, FS, PCHContainerOps, + *DiagEngineWithCmdAndOpts.DiagEngine, Consumer); } else { Success = forEachDriverJob( - CommandLine, *Diags, FS, [&](const driver::Command &Cmd) { + CommandLine, *DiagEngineWithCmdAndOpts.DiagEngine, FS, + [&](const driver::Command &Cmd) { if (StringRef(Cmd.getCreator().getName()) != "clang") { // Non-clang command. Just pass through to the dependency // consumer. @@ -218,13 +144,15 @@ bool DependencyScanningWorker::scanDependencies( // system to ensure that any file system requests that // are made by the driver do not go through the // dependency scanning filesystem. - return createAndRunToolInvocation(std::move(Argv), Action, FS, - PCHContainerOps, *Diags, Consumer); + return createAndRunToolInvocation( + std::move(Argv), Action, FS, PCHContainerOps, + *DiagEngineWithCmdAndOpts.DiagEngine, Consumer); }); } if (Success && !Action.hasScanned()) - Diags->Report(diag::err_fe_expected_compiler_job) + DiagEngineWithCmdAndOpts.DiagEngine->Report( + diag::err_fe_expected_compiler_job) << llvm::join(CommandLine, " "); // Ensure finish() is called even if we never reached ExecuteAction(). @@ -238,66 +166,25 @@ bool DependencyScanningWorker::computeDependencies( StringRef WorkingDirectory, const std::vector<std::string> &CommandLine, DependencyConsumer &Consumer, DependencyActionController &Controller, DiagnosticConsumer &DC, std::optional<llvm::MemoryBufferRef> TUBuffer) { - // Reset what might have been modified in the previous worker invocation. - BaseFS->setCurrentWorkingDirectory(WorkingDirectory); - - std::optional<std::vector<std::string>> ModifiedCommandLine; - llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS; - - // If we're scanning based on a module name alone, we don't expect the client - // to provide us with an input file. However, the driver really wants to have - // one. Let's just make it up to make the driver happy. if (TUBuffer) { - auto OverlayFS = - llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); - auto InMemoryFS = - llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); - InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); - auto InputPath = TUBuffer->getBufferIdentifier(); - InMemoryFS->addFile( - InputPath, 0, - llvm::MemoryBuffer::getMemBufferCopy(TUBuffer->getBuffer())); - llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = - InMemoryFS; - - OverlayFS->pushOverlay(InMemoryOverlay); - ModifiedFS = OverlayFS; - ModifiedCommandLine = CommandLine; - ModifiedCommandLine->emplace_back(InputPath); + auto [FinalFS, FinalCommandLine] = initVFSForTUBuferScanning( + BaseFS, CommandLine, WorkingDirectory, *TUBuffer); + return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer, + Controller, DC, FinalFS, + /*ModuleName=*/std::nullopt); + } else { + BaseFS->setCurrentWorkingDirectory(WorkingDirectory); + return scanDependencies(WorkingDirectory, CommandLine, Consumer, Controller, + DC, BaseFS, /*ModuleName=*/std::nullopt); } - - const std::vector<std::string> &FinalCommandLine = - ModifiedCommandLine ? *ModifiedCommandLine : CommandLine; - auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS; - - return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer, - Controller, DC, FinalFS, /*ModuleName=*/std::nullopt); } bool DependencyScanningWorker::computeDependencies( StringRef WorkingDirectory, const std::vector<std::string> &CommandLine, DependencyConsumer &Consumer, DependencyActionController &Controller, DiagnosticConsumer &DC, StringRef ModuleName) { - // Reset what might have been modified in the previous worker invocation. - BaseFS->setCurrentWorkingDirectory(WorkingDirectory); - - // If we're scanning based on a module name alone, we don't expect the client - // to provide us with an input file. However, the driver really wants to have - // one. Let's just make it up to make the driver happy. - auto OverlayFS = - llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); - auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); - InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); - SmallString<128> FakeInputPath; - // TODO: We should retry the creation if the path already exists. - llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath, - /*MakeAbsolute=*/false); - InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer("")); - llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; - - OverlayFS->pushOverlay(InMemoryOverlay); - auto ModifiedCommandLine = CommandLine; - ModifiedCommandLine.emplace_back(FakeInputPath); + auto [OverlayFS, ModifiedCommandLine] = initVFSForByNameScanning( + BaseFS, CommandLine, WorkingDirectory, ModuleName); return scanDependencies(WorkingDirectory, ModifiedCommandLine, Consumer, Controller, DC, OverlayFS, ModuleName); diff --git a/clang/test/CIR/CodeGen/address-space.c b/clang/test/CIR/CodeGen/address-space.c new file mode 100644 index 0000000..a334b8a --- /dev/null +++ b/clang/test/CIR/CodeGen/address-space.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +// Test address space 1 +// CIR: cir.func dso_local @foo(%arg0: !cir.ptr<!s32i, target_address_space(1)> +// LLVM: define dso_local void @foo(ptr addrspace(1) %0) +// OGCG: define dso_local void @foo(ptr addrspace(1) noundef %arg) +void foo(int __attribute__((address_space(1))) *arg) { + return; +} + +// Test explicit address space 0 (should be same as default) +// CIR: cir.func dso_local @bar(%arg0: !cir.ptr<!s32i, target_address_space(0)> +// LLVM: define dso_local void @bar(ptr %0) +// OGCG: define dso_local void @bar(ptr noundef %arg) +void bar(int __attribute__((address_space(0))) *arg) { + return; +} + +// Test default address space (no attribute) +// CIR: cir.func dso_local @baz(%arg0: !cir.ptr<!s32i> +// LLVM: define dso_local void @baz(ptr %0) +// OGCG: define dso_local void @baz(ptr noundef %arg) +void baz(int *arg) { + return; +} diff --git a/clang/test/CIR/IR/invalid-addrspace.cir b/clang/test/CIR/IR/invalid-addrspace.cir new file mode 100644 index 0000000..8f188b8 --- /dev/null +++ b/clang/test/CIR/IR/invalid-addrspace.cir @@ -0,0 +1,27 @@ +// RUN: cir-opt %s -verify-diagnostics -split-input-file + +// ----- + +!u64i = !cir.int<u, 64> +// expected-error @below {{expected 'target_address_space'}} +cir.func @address_space1(%p : !cir.ptr<!u64i, foobar>) { + cir.return +} + +// ----- + +!u64i = !cir.int<u, 64> +// expected-error@below {{expected '('}} +cir.func @address_space2(%p : !cir.ptr<!u64i, target_address_space>) { + cir.return +} + +// ----- + +!u64i = !cir.int<u, 64> +// expected-error@+2 {{expected integer value}} +// expected-error@below {{expected integer address space value}} +cir.func @address_space3(%p : !cir.ptr<!u64i, target_address_space()>) { + cir.return +} + diff --git a/clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl deleted file mode 100644 index 094006f..0000000 --- a/clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL -// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV - -struct MyStruct { - float4 a; - int2 b; -}; - -// DXIL: %"class.hlsl::AppendStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.12" = type { target("dx.RawBuffer", %struct.MyStruct, 1, 0) -// DXIL: %struct.MyStruct = type <{ <4 x float>, <2 x i32> }> -// DXIL: %"class.hlsl::AppendStructuredBuffer.13" = type { target("dx.RawBuffer", i32, 1, 0) -// SPV: %"class.hlsl::AppendStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1) -// DXIL: %"class.hlsl::AppendStructuredBuffer.14" = type { target("dx.RawBuffer", <4 x i32>, 1, 0) -// SPV: %"class.hlsl::AppendStructuredBuffer.14" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) - -AppendStructuredBuffer<int16_t> BufI16; -AppendStructuredBuffer<uint16_t> BufU16; -AppendStructuredBuffer<int> BufI32; -AppendStructuredBuffer<uint> BufU32; -AppendStructuredBuffer<int64_t> BufI64; -AppendStructuredBuffer<uint64_t> BufU64; -AppendStructuredBuffer<half> BufF16; -AppendStructuredBuffer<float> BufF32; -AppendStructuredBuffer<double> BufF64; -AppendStructuredBuffer< vector<int16_t, 4> > BufI16x4; -AppendStructuredBuffer< vector<uint, 3> > BufU32x3; -AppendStructuredBuffer<half2> BufF16x2; -AppendStructuredBuffer<float3> BufF32x3; -// TODO: AppendStructuredBuffer<snorm half> BufSNormF16; -// TODO: AppendStructuredBuffer<unorm half> BufUNormF16; -// TODO: AppendStructuredBuffer<snorm float> BufSNormF32; -// TODO: AppendStructuredBuffer<unorm float> BufUNormF32; -// TODO: AppendStructuredBuffer<snorm double> BufSNormF64; -// TODO: AppendStructuredBuffer<unorm double> BufUNormF64; -AppendStructuredBuffer<MyStruct> BufMyStruct; -AppendStructuredBuffer<bool> BufBool; -AppendStructuredBuffer<bool4> BufBoolVec; - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { -} diff --git a/clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl deleted file mode 100644 index 632fd91..0000000 --- a/clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL -// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV - -struct MyStruct { - float4 a; - int2 b; -}; - -// DXIL: %"class.hlsl::ConsumeStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.12" = type { target("dx.RawBuffer", %struct.MyStruct, 1, 0) -// DXIL: %struct.MyStruct = type <{ <4 x float>, <2 x i32> }> -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.13" = type { target("dx.RawBuffer", i32, 1, 0) -// SPV: %"class.hlsl::ConsumeStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.14" = type { target("dx.RawBuffer", <4 x i32>, 1, 0) -// SPV: %"class.hlsl::ConsumeStructuredBuffer.14" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) - -ConsumeStructuredBuffer<int16_t> BufI16; -ConsumeStructuredBuffer<uint16_t> BufU16; -ConsumeStructuredBuffer<int> BufI32; -ConsumeStructuredBuffer<uint> BufU32; -ConsumeStructuredBuffer<int64_t> BufI64; -ConsumeStructuredBuffer<uint64_t> BufU64; -ConsumeStructuredBuffer<half> BufF16; -ConsumeStructuredBuffer<float> BufF32; -ConsumeStructuredBuffer<double> BufF64; -ConsumeStructuredBuffer< vector<int16_t, 4> > BufI16x4; -ConsumeStructuredBuffer< vector<uint, 3> > BufU32x3; -ConsumeStructuredBuffer<half2> BufF16x2; -ConsumeStructuredBuffer<float3> BufF32x3; -// TODO: ConsumeStructuredBuffer<snorm half> BufSNormF16; -// TODO: ConsumeStructuredBuffer<unorm half> BufUNormF16; -// TODO: ConsumeStructuredBuffer<snorm float> BufSNormF32; -// TODO: ConsumeStructuredBuffer<unorm float> BufUNormF32; -// TODO: ConsumeStructuredBuffer<snorm double> BufSNormF64; -// TODO: ConsumeStructuredBuffer<unorm double> BufUNormF64; -ConsumeStructuredBuffer<MyStruct> BufMyStruct; -ConsumeStructuredBuffer<bool> BufBool; -ConsumeStructuredBuffer<bool4> BufBoolVec; - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { -} diff --git a/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl deleted file mode 100644 index 9f0a5b7..0000000 --- a/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl +++ /dev/null @@ -1,74 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK -// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV - -// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0), target("dx.RawBuffer", half, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0), target("dx.RawBuffer", double, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0), target("dx.RawBuffer", <4 x i16>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0), target("dx.RawBuffer", <3 x i32>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0), target("dx.RawBuffer", <2 x half>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0), target("dx.RawBuffer", <3 x float>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.12" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.13" = type { target("dx.RawBuffer", <4 x i32>, 1, 0), target("dx.RawBuffer", <4 x i32>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } - -RWStructuredBuffer<int16_t> BufI16; -RWStructuredBuffer<uint16_t> BufU16; -RWStructuredBuffer<int> BufI32; -RWStructuredBuffer<uint> BufU32; -RWStructuredBuffer<int64_t> BufI64; -RWStructuredBuffer<uint64_t> BufU64; -RWStructuredBuffer<half> BufF16; -RWStructuredBuffer<float> BufF32; -RWStructuredBuffer<double> BufF64; -RWStructuredBuffer< vector<int16_t, 4> > BufI16x4; -RWStructuredBuffer< vector<uint, 3> > BufU32x3; -RWStructuredBuffer<half2> BufF16x2; -RWStructuredBuffer<float3> BufF32x3; -RWStructuredBuffer<bool> BufBool; -RWStructuredBuffer<bool4> BufBoolVec; -// TODO: RWStructuredBuffer<snorm half> BufSNormF16; -// TODO: RWStructuredBuffer<unorm half> BufUNormF16; -// TODO: RWStructuredBuffer<snorm float> BufSNormF32; -// TODO: RWStructuredBuffer<unorm float> BufUNormF32; -// TODO: RWStructuredBuffer<snorm double> BufSNormF64; -// TODO: RWStructuredBuffer<unorm double> BufUNormF64; - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { - BufI16[GI] = 0; - BufU16[GI] = 0; - BufI32[GI] = 0; - BufU32[GI] = 0; - BufI64[GI] = 0; - BufU64[GI] = 0; - BufF16[GI] = 0; - BufF32[GI] = 0; - BufF64[GI] = 0; - BufI16x4[GI] = 0; - BufU32x3[GI] = 0; - BufF16x2[GI] = 0; - BufF32x3[GI] = 0; - BufBool[GI] = false; - BufBool[GI] = false; -} diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl deleted file mode 100644 index 00216df..0000000 --- a/clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl +++ /dev/null @@ -1,61 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK -// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV - -// CHECK: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", i16, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.0" = type { target("dx.RawBuffer", i16, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.1" = type { target("dx.RawBuffer", i32, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.2" = type { target("dx.RawBuffer", i32, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.3" = type { target("dx.RawBuffer", i64, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.4" = type { target("dx.RawBuffer", i64, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.5" = type { target("dx.RawBuffer", half, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.6" = type { target("dx.RawBuffer", float, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.7" = type { target("dx.RawBuffer", double, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.12" = type { target("dx.RawBuffer", i32, 0, 0) } -// SPV: %"class.hlsl::StructuredBuffer.12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) -// CHECK: %"class.hlsl::StructuredBuffer.13" = type { target("dx.RawBuffer", <4 x i32>, 0, 0) } -// SPV: %"class.hlsl::StructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 0) - -StructuredBuffer<int16_t> BufI16; -StructuredBuffer<uint16_t> BufU16; -StructuredBuffer<int> BufI32; -StructuredBuffer<uint> BufU32; -StructuredBuffer<int64_t> BufI64; -StructuredBuffer<uint64_t> BufU64; -StructuredBuffer<half> BufF16; -StructuredBuffer<float> BufF32; -StructuredBuffer<double> BufF64; -StructuredBuffer< vector<int16_t, 4> > BufI16x4; -StructuredBuffer< vector<uint, 3> > BufU32x3; -StructuredBuffer<half2> BufF16x2; -StructuredBuffer<float3> BufF32x3; -StructuredBuffer<bool> BufBool; -StructuredBuffer<bool4> BufBoolVec; -// TODO: StructuredBuffer<snorm half> BufSNormF16; -// TODO: StructuredBuffer<unorm half> BufUNormF16; -// TODO: StructuredBuffer<snorm float> BufSNormF32; -// TODO: StructuredBuffer<unorm float> BufUNormF32; -// TODO: StructuredBuffer<snorm double> BufSNormF64; -// TODO: StructuredBuffer<unorm double> BufUNormF64; - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { - int16_t v1 = BufI16[GI]; - uint16_t v2 = BufU16[GI]; - int v3 = BufI32[GI]; - uint v4 = BufU32[GI]; - int64_t v5 = BufI64[GI]; - uint64_t v6 = BufU64[GI]; - half v7 = BufF16[GI]; - float v8 = BufF32[GI]; - double v9 = BufF64[GI]; - vector<int16_t,4> v10 = BufI16x4[GI]; - vector<int, 3> v11 = BufU32x3[GI]; - half2 v12 = BufF16x2[GI]; - float3 v13 = BufF32x3[GI]; - bool v14 = BufBool[GI]; - bool4 v15 = BufBoolVec[GI]; -} diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl new file mode 100644 index 0000000..2b286bd --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl @@ -0,0 +1,113 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=StructuredBuffer %s | FileCheck %s -DRESOURCE=StructuredBuffer -check-prefixes=DXIL-RO + +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=StructuredBuffer %s | FileCheck %s -DRESOURCE=StructuredBuffer -check-prefixes=SPV-RO + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=RWStructuredBuffer %s | FileCheck %s -DRESOURCE=RWStructuredBuffer -check-prefixes=DXIL-RW + +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=RWStructuredBuffer %s | FileCheck %s -DRESOURCE=RWStructuredBuffer -check-prefixes=SPV-RW + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=AppendStructuredBuffer %s | FileCheck %s -DRESOURCE=AppendStructuredBuffer -check-prefixes=DXIL-RW + +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=AppendStructuredBuffer %s | FileCheck %s -DRESOURCE=AppendStructuredBuffer -check-prefixes=SPV-RW + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=ConsumeStructuredBuffer %s | FileCheck %s -DRESOURCE=ConsumeStructuredBuffer -check-prefixes=DXIL-RW + +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=ConsumeStructuredBuffer %s | FileCheck %s -DRESOURCE=ConsumeStructuredBuffer -check-prefixes=SPV-RW + +// DXIL-RO: %"class.hlsl::[[RESOURCE]]" = type { target("dx.RawBuffer", i16, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.RawBuffer", i16, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.RawBuffer", i32, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.RawBuffer", i32, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.RawBuffer", i64, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.RawBuffer", i64, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.RawBuffer", half, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.RawBuffer", float, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.RawBuffer", double, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.RawBuffer", <4 x i16>, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.RawBuffer", <3 x i32>, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.RawBuffer", <2 x half>, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.RawBuffer", <3 x float>, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.RawBuffer", i32, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].13" = type { target("dx.RawBuffer", <4 x i32>, 0, 0) } + +// DXIL-RW: %"class.hlsl::[[RESOURCE]]" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.RawBuffer", half, 1, 0), target("dx.RawBuffer", half, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.RawBuffer", double, 1, 0), target("dx.RawBuffer", double, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0), target("dx.RawBuffer", <4 x i16>, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0), target("dx.RawBuffer", <3 x i32>, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.RawBuffer", <2 x half>, 1, 0), target("dx.RawBuffer", <2 x half>, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.RawBuffer", <3 x float>, 1, 0), target("dx.RawBuffer", <3 x float>, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].13" = type { target("dx.RawBuffer", <4 x i32>, 1, 0), target("dx.RawBuffer", <4 x i32>, 1, 0) } + +// SPV-RO: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 0) } + +// SPV-RW: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } + +RESOURCE<int16_t> BufI16; +RESOURCE<uint16_t> BufU16; +RESOURCE<int> BufI32; +RESOURCE<uint> BufU32; +RESOURCE<int64_t> BufI64; +RESOURCE<uint64_t> BufU64; +RESOURCE<half> BufF16; +RESOURCE<float> BufF32; +RESOURCE<double> BufF64; +RESOURCE< vector<int16_t, 4> > BufI16x4; +RESOURCE< vector<uint, 3> > BufU32x3; +RESOURCE<half2> BufF16x2; +RESOURCE<float3> BufF32x3; +RESOURCE<bool> BufBool; +RESOURCE<bool4> BufBoolVec; +// TODO: RESOURCE<snorm half> BufSNormF16; +// TODO: RESOURCE<unorm half> BufUNormF16; +// TODO: RESOURCE<snorm float> BufSNormF32; +// TODO: RESOURCE<unorm float> BufUNormF32; +// TODO: RESOURCE<snorm double> BufSNormF64; +// TODO: RESOURCE<unorm double> BufUNormF64; + +[numthreads(1,1,1)] +void main() { +} diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip index 6206020..09f1ffa 100644 --- a/clang/test/Driver/hip-options.hip +++ b/clang/test/Driver/hip-options.hip @@ -254,3 +254,9 @@ // RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=0x4 %s 2>&1 | \ // RUN: FileCheck -check-prefix=INVJOBS %s // INVJOBS: clang: error: invalid integral value '0x4' in '--offload-jobs=0x4' + +// RUN: %clang -### -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ +// RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=jobserver %s 2>&1 | \ +// RUN: FileCheck -check-prefix=JOBSV %s +// JOBSV: clang-linker-wrapper{{.*}} "--wrapper-jobs=jobserver" + diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index c060dae..1c0fb96 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -114,6 +114,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=4 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR +// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=jobserver \ +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR // CUDA-PAR: fatbinary{{.*}}-64 --create {{.*}}.fatbin diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 1419b8c..4d5b956 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1295,12 +1295,18 @@ int main(int Argc, char **Argv) { parallel::strategy = hardware_concurrency(1); if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) { - unsigned Threads = 0; - if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0) - reportError(createStringError("%s: expected a positive integer, got '%s'", - Arg->getSpelling().data(), - Arg->getValue())); - parallel::strategy = hardware_concurrency(Threads); + StringRef Val = Arg->getValue(); + if (Val.equals_insensitive("jobserver")) + parallel::strategy = jobserver_concurrency(); + else { + unsigned Threads = 0; + if (!llvm::to_integer(Val, Threads) || Threads == 0) + reportError(createStringError( + "%s: expected a positive integer or 'jobserver', got '%s'", + Arg->getSpelling().data(), Val.data())); + else + parallel::strategy = hardware_concurrency(Threads); + } } if (Args.hasArg(OPT_wrapper_time_trace_eq)) { diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index fa73e02..87f911c 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -53,7 +53,8 @@ def wrapper_time_trace_granularity : Joined<["--"], "wrapper-time-trace-granular def wrapper_jobs : Joined<["--"], "wrapper-jobs=">, Flags<[WrapperOnlyOption]>, MetaVarName<"<number>">, - HelpText<"Sets the number of parallel jobs to use for device linking">; + HelpText<"Sets the number of parallel jobs for device linking. Can be a " + "positive integer or 'jobserver'.">; def override_image : Joined<["--"], "override-image=">, Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">, diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 29cccbd..37b926e 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -14,11 +14,12 @@ #include "ClauseFinder.h" #include "flang/Evaluate/fold.h" -#include "flang/Lower/OpenMP/Clauses.h" #include <flang/Lower/AbstractConverter.h> #include <flang/Lower/ConvertType.h> #include <flang/Lower/DirectivesCommon.h> +#include <flang/Lower/OpenMP/Clauses.h> #include <flang/Lower/PFTBuilder.h> +#include <flang/Lower/Support/PrivateReductionUtils.h> #include <flang/Optimizer/Builder/FIRBuilder.h> #include <flang/Optimizer/Builder/Todo.h> #include <flang/Parser/openmp-utils.h> @@ -180,16 +181,11 @@ static void generateArrayIndices(lower::AbstractConverter &converter, for (auto v : arr->subscript()) { if (std::holds_alternative<Triplet>(v.u)) TODO(clauseLocation, "Triplet indexing in map clause is unsupported"); - auto expr = std::get<Fortran::evaluate::IndirectSubscriptIntegerExpr>(v.u); mlir::Value subscript = fir::getBase(converter.genExprValue(toEvExpr(expr.value()), stmtCtx)); - mlir::Value one = firOpBuilder.createIntegerConstant( - clauseLocation, firOpBuilder.getIndexType(), 1); - subscript = firOpBuilder.createConvert( - clauseLocation, firOpBuilder.getIndexType(), subscript); - indices.push_back(mlir::arith::SubIOp::create(firOpBuilder, clauseLocation, - subscript, one)); + indices.push_back(firOpBuilder.createConvert( + clauseLocation, firOpBuilder.getIndexType(), subscript)); } } @@ -322,10 +318,42 @@ mlir::Value createParentSymAndGenIntermediateMaps( subscriptIndices, objectList[i]); assert(!subscriptIndices.empty() && "missing expected indices for map clause"); - curValue = fir::CoordinateOp::create( - firOpBuilder, clauseLocation, - firOpBuilder.getRefType(arrType.getEleTy()), curValue, - subscriptIndices); + if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(curValue.getType())) { + // To accommodate indexing into box types of all dimensions including + // negative dimensions we have to take into consideration the lower + // bounds and extents of the data (stored in the box) and convey it + // to the ArrayCoorOp so that it can appropriately access the element + // utilising the subscript we provide and the runtime sizes stored in + // the Box. To do so we need to generate a ShapeShiftOp which combines + // both the lb (ShiftOp) and extent (ShapeOp) of the Box, giving the + // ArrayCoorOp the spatial information it needs to calculate the + // underlying address. + mlir::Value shapeShift = Fortran::lower::getShapeShift( + firOpBuilder, clauseLocation, curValue); + auto addrOp = + fir::BoxAddrOp::create(firOpBuilder, clauseLocation, curValue); + curValue = fir::ArrayCoorOp::create( + firOpBuilder, clauseLocation, + firOpBuilder.getRefType(arrType.getEleTy()), addrOp, shapeShift, + /*slice=*/mlir::Value{}, subscriptIndices, + /*typeparms=*/mlir::ValueRange{}); + } else { + // We're required to negate by one in the non-Box case as I believe + // we do not have the shape generated from the dimensions to help + // adjust the indexing. + // TODO/FIXME: This may need adjusted to support bounds of unusual + // dimensions, if that's the case then it is likely best to fold this + // branch into the above. + mlir::Value one = firOpBuilder.createIntegerConstant( + clauseLocation, firOpBuilder.getIndexType(), 1); + for (auto &v : subscriptIndices) + v = mlir::arith::SubIOp::create(firOpBuilder, clauseLocation, v, + one); + curValue = fir::CoordinateOp::create( + firOpBuilder, clauseLocation, + firOpBuilder.getRefType(arrType.getEleTy()), curValue, + subscriptIndices); + } } } @@ -415,7 +443,6 @@ mlir::Value createParentSymAndGenIntermediateMaps( currentIndicesIdx++; } } - return curValue; } diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90 index 665be5a..44a049f 100644 --- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 +++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90 @@ -785,15 +785,20 @@ end subroutine mapType_common_block_members !CHECK: %[[BOUNDS_CALC:.*]] = sub i64 %[[BOUNDS_LD_2]], 1 !CHECK: %[[OFF_PTR_CALC_0:.*]] = sub i64 %[[BOUNDS_LD]], 1 !CHECK: %[[OFF_PTR_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[OFF_PTR_1]], i32 0, i32 0 +!CHECK: %[[GEP_LB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA_0]], i32 0, i32 7, i64 0, i32 0 +!CHECK: %[[LOAD_LB:.*]] = load i64, ptr %[[GEP_LB]], align 8 +!CHECK: %[[GEP_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA_0]], i32 0, i32 7, i64 0, i32 1 +!CHECK: %[[LOAD_UB:.*]] = load i64, ptr %[[GEP_UB]], align 8 !CHECK: %[[GEP_DESC_PTR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA_0]], i32 0, i32 0 -!CHECK: %[[LOAD_DESC_PTR:.*]] = load ptr, ptr %[[GEP_DESC_PTR]], align 8 -!CHECK: %[[SZ_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA_0]], i32 0, i32 7, i32 0, i32 2 -!CHECK: %[[SZ_CALC_2:.*]] = load i64, ptr %[[SZ_CALC_1]], align 8 -!CHECK: %[[SZ_CALC_3:.*]] = mul nsw i64 1, %[[SZ_CALC_2]] -!CHECK: %[[SZ_CALC_4:.*]] = add nsw i64 %[[SZ_CALC_3]], 0 -!CHECK: %[[SZ_CALC_5:.*]] = getelementptr i8, ptr %[[LOAD_DESC_PTR]], i64 %[[SZ_CALC_4]] -!CHECK: %[[SZ_CALC_6:.*]] = getelementptr %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[SZ_CALC_5]], i32 0, i32 2 -!CHECK: %[[OFF_PTR_4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[SZ_CALC_6]], i32 0, i32 0 +!CHECK: %[[SZ_CALC_1:.*]] = load ptr, ptr %[[GEP_DESC_PTR]], align 8 +!CHECK: %[[SZ_CALC_2:.*]] = sub nsw i64 2, %[[LOAD_LB]] +!CHECK: %[[SZ_CALC_3:.*]] = mul nsw i64 %[[SZ_CALC_2]], 1 +!CHECK: %[[SZ_CALC_4:.*]] = mul nsw i64 %[[SZ_CALC_3]], 1 +!CHECK: %[[SZ_CALC_5:.*]] = add nsw i64 %[[SZ_CALC_4]], 0 +!CHECK: %[[SZ_CALC_6:.*]] = mul nsw i64 1, %[[LOAD_UB]] +!CHECK: %[[SZ_CALC_7:.*]] = getelementptr %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[SZ_CALC_1]], i64 %[[SZ_CALC_5]] +!CHECK: %[[SZ_CALC_8:.*]] = getelementptr %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[SZ_CALC_7]], i32 0, i32 2 +!CHECK: %[[OFF_PTR_4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[SZ_CALC_8]], i32 0, i32 0 !CHECK: %[[OFF_PTR_CALC_1:.*]] = sub i64 %[[OFF_PTR_CALC_0]], 0 !CHECK: %[[OFF_PTR_CALC_2:.*]] = add i64 %[[OFF_PTR_CALC_1]], 1 !CHECK: %[[OFF_PTR_CALC_3:.*]] = mul i64 1, %[[OFF_PTR_CALC_2]] @@ -838,7 +843,7 @@ end subroutine mapType_common_block_members !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 4 !CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 4 -!CHECK: store ptr %[[SZ_CALC_6]], ptr %[[OFFLOAD_PTR_ARR]], align 8 +!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[OFFLOAD_PTR_ARR]], align 8 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 5 !CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 5 diff --git a/flang/test/Lower/OpenMP/map-neg-alloca-derived-type-array.f90 b/flang/test/Lower/OpenMP/map-neg-alloca-derived-type-array.f90 new file mode 100644 index 0000000..dd8721b --- /dev/null +++ b/flang/test/Lower/OpenMP/map-neg-alloca-derived-type-array.f90 @@ -0,0 +1,27 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +subroutine map_negative_bounds_allocatable_dtype() + type derived_type + real(4), pointer :: data(:,:,:) => null() + end type + type(derived_type), allocatable :: dtype(:,:) + + !$omp target map(tofrom: dtype(-1,1)%data) + dtype(-1,1)%data(1,1,1) = 10 + !$omp end target +end subroutine + +! CHECK: %[[VAL_1:.*]] = arith.constant -1 : i64 +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i64) -> index +! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %{{.*}}, %[[VAL_5]] : (!fir.box<!fir.heap<!fir.array<?x?x!fir.type<_QFmap_negative_bounds_allocatable_dtypeTderived_type{data:!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>}>>>>, index) -> (index, index, index) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_8:.*]]:3 = fir.box_dims %{{.*}}, %[[VAL_7]] : (!fir.box<!fir.heap<!fir.array<?x?x!fir.type<_QFmap_negative_bounds_allocatable_dtypeTderived_type{data:!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>}>>>>, index) -> (index, index, index) +! CHECK: %[[VAL_9:.*]] = fir.shape_shift %[[VAL_6]]#0, %[[VAL_6]]#1, %[[VAL_8]]#0, %[[VAL_8]]#1 : (index, index, index, index) -> !fir.shapeshift<2> +! CHECK: %[[VAL_10:.*]] = fir.array_coor %{{.*}}(%[[VAL_9]]) %[[VAL_2]], %[[VAL_4]] : (!fir.heap<!fir.array<?x?x!fir.type<_QFmap_negative_bounds_allocatable_dtypeTderived_type{data:!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>}>>>, !fir.shapeshift<2>, index, index) -> !fir.ref<!fir.type<_QFmap_negative_bounds_allocatable_dtypeTderived_type{data:!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>}>> +! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_10]], data : (!fir.ref<!fir.type<_QFmap_negative_bounds_allocatable_dtypeTderived_type{data:!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>> +! CHECK: %[[VAL_12:.*]] = fir.box_offset %[[VAL_11]] base_addr : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xf32>>> +! CHECK: %[[VAL_13:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>>, f32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_12]] : !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xf32>>>) bounds({{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x?xf32>>> {name = ""} +! CHECK: %[[VAL_14:.*]] = omp.map.info var_ptr(%[[VAL_11]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>>, !fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?xf32>>>> {name = {{.*}}} diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index 6f20a02..9958b6e 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -404,8 +404,15 @@ public: GetLanguageTypeFromString(const char *string) = delete; static lldb::LanguageType GetLanguageTypeFromString(llvm::StringRef string); + /// Returns the internal LLDB name for the specified language. When presenting + /// the language name to users, use \ref GetDisplayNameForLanguageType + /// instead. static const char *GetNameForLanguageType(lldb::LanguageType language); + /// Returns a user-friendly name for the specified language. + static llvm::StringRef + GetDisplayNameForLanguageType(lldb::LanguageType language); + static void PrintAllLanguages(Stream &s, const char *prefix, const char *suffix); diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 2966ac0..e30d549 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -280,9 +280,6 @@ def parseOptionsAndInitTestdirs(): configuration.llvm_tools_dir = args.llvm_tools_dir configuration.filecheck = shutil.which("FileCheck", path=args.llvm_tools_dir) configuration.yaml2obj = shutil.which("yaml2obj", path=args.llvm_tools_dir) - configuration.yaml2macho_core = shutil.which( - "yaml2macho-core", path=args.llvm_tools_dir - ) if not configuration.get_filecheck_path(): logging.warning("No valid FileCheck executable; some tests may fail...") @@ -563,6 +560,8 @@ def setupSysPath(): if is_exe(lldbDAPExec): os.environ["LLDBDAP_EXEC"] = lldbDAPExec + configuration.yaml2macho_core = shutil.which("yaml2macho-core", path=lldbDir) + lldbPythonDir = None # The directory that contains 'lldb/__init__.py' # If our lldb supports the -P option, use it to find the python path: diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp index d4a9268..395718e 100644 --- a/lldb/source/Target/Language.cpp +++ b/lldb/source/Target/Language.cpp @@ -271,6 +271,10 @@ const char *Language::GetNameForLanguageType(LanguageType language) { return language_names[eLanguageTypeUnknown].name; } +llvm::StringRef Language::GetDisplayNameForLanguageType(LanguageType language) { + return SourceLanguage(language).GetDescription(); +} + void Language::PrintSupportedLanguagesForExpressions(Stream &s, llvm::StringRef prefix, llvm::StringRef suffix) { @@ -543,9 +547,26 @@ Language::Language() = default; // Destructor Language::~Language() = default; +static std::optional<llvm::dwarf::SourceLanguage> +ToDwarfSourceLanguage(lldb::LanguageType language_type) { + if (language_type < lldb::eLanguageTypeLastStandardLanguage) + return static_cast<llvm::dwarf::SourceLanguage>(language_type); + + switch (language_type) { + case eLanguageTypeMipsAssembler: + return llvm::dwarf::DW_LANG_Mips_Assembler; + default: + return std::nullopt; + } +} + SourceLanguage::SourceLanguage(lldb::LanguageType language_type) { - auto lname = - llvm::dwarf::toDW_LNAME((llvm::dwarf::SourceLanguage)language_type); + std::optional<llvm::dwarf::SourceLanguage> dwarf_lang = + ToDwarfSourceLanguage(language_type); + if (!dwarf_lang) + return; + + auto lname = llvm::dwarf::toDW_LNAME(*dwarf_lang); if (!lname) return; name = lname->first; @@ -560,11 +581,8 @@ lldb::LanguageType SourceLanguage::AsLanguageType() const { } llvm::StringRef SourceLanguage::GetDescription() const { - LanguageType type = AsLanguageType(); - if (type) - return Language::GetNameForLanguageType(type); return llvm::dwarf::LanguageDescription( - (llvm::dwarf::SourceLanguageName)name); + static_cast<llvm::dwarf::SourceLanguageName>(name)); } bool SourceLanguage::IsC() const { return name == llvm::dwarf::DW_LNAME_C; } diff --git a/lldb/unittests/Host/posix/HostTest.cpp b/lldb/unittests/Host/posix/HostTest.cpp index dc75b28..7135f26 100644 --- a/lldb/unittests/Host/posix/HostTest.cpp +++ b/lldb/unittests/Host/posix/HostTest.cpp @@ -15,10 +15,6 @@ #include <cerrno> #include <sys/resource.h> -#ifdef __linux__ -#include <linux/version.h> -#endif // __linux__ - using namespace lldb_private; namespace { @@ -120,12 +116,13 @@ TEST_F(HostTest, GetProcessInfoSetsPriority) { ASSERT_TRUE(Info.IsZombie().has_value()); ASSERT_FALSE(Info.IsZombie().value()); - // CoreDumping was added in kernel version 4.15. -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) - ASSERT_TRUE(Info.IsCoreDumping().has_value()); - ASSERT_FALSE(Info.IsCoreDumping().value()); -#else - ASSERT_FALSE(Info.IsCoreDumping().has_value()); -#endif + const llvm::VersionTuple host_version = HostInfo::GetOSVersion(); + ASSERT_FALSE(host_version.empty()); + if (host_version >= llvm::VersionTuple(4, 15, 0)) { + ASSERT_TRUE(Info.IsCoreDumping().has_value()); + ASSERT_FALSE(Info.IsCoreDumping().value()); + } else { + ASSERT_FALSE(Info.IsCoreDumping().has_value()); + } } #endif diff --git a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp index 9628cbd..45464db 100644 --- a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp +++ b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp @@ -38,6 +38,9 @@ using namespace lldb_private; using namespace lldb_private::transport; using namespace lldb_protocol::mcp; +// Flakey, see https://github.com/llvm/llvm-project/issues/152677. +#ifndef _WIN32 + namespace { template <typename T> Response make_response(T &&result, Id id = 1) { @@ -325,3 +328,5 @@ TEST_F(ProtocolServerMCPTest, NotificationInitialized) { EXPECT_THAT(logged_messages, testing::Contains("MCP initialization complete")); } + +#endif diff --git a/lldb/unittests/Protocol/ProtocolMCPTest.cpp b/lldb/unittests/Protocol/ProtocolMCPTest.cpp index 396e361..5f7391e 100644 --- a/lldb/unittests/Protocol/ProtocolMCPTest.cpp +++ b/lldb/unittests/Protocol/ProtocolMCPTest.cpp @@ -16,6 +16,9 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_protocol::mcp; +// Flakey, see https://github.com/llvm/llvm-project/issues/152677. +#ifndef _WIN32 + TEST(ProtocolMCPTest, Request) { Request request; request.id = 1; @@ -292,3 +295,5 @@ TEST(ProtocolMCPTest, ReadResourceResultEmpty) { EXPECT_TRUE(deserialized_result->contents.empty()); } + +#endif diff --git a/lldb/unittests/Target/CMakeLists.txt b/lldb/unittests/Target/CMakeLists.txt index 3169339..0c79675 100644 --- a/lldb/unittests/Target/CMakeLists.txt +++ b/lldb/unittests/Target/CMakeLists.txt @@ -2,6 +2,7 @@ add_lldb_unittest(TargetTests ABITest.cpp DynamicRegisterInfoTest.cpp ExecutionContextTest.cpp + Language.cpp LocateModuleCallbackTest.cpp MemoryRegionInfoTest.cpp MemoryTest.cpp diff --git a/lldb/unittests/Target/Language.cpp b/lldb/unittests/Target/Language.cpp new file mode 100644 index 0000000..a00fda78 --- /dev/null +++ b/lldb/unittests/Target/Language.cpp @@ -0,0 +1,69 @@ +//===-- LanguageTest.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Target/Language.h" +#include "lldb/lldb-enumerations.h" +#include "gtest/gtest.h" + +using namespace lldb_private; +using namespace lldb; + +namespace { +class LanguageTest : public ::testing::Test {}; +} // namespace + +TEST_F(LanguageTest, SourceLanguage_GetDescription) { + for (uint32_t i = 1; i < lldb::eNumLanguageTypes; ++i) { + // 0x29 is unassigned + if (i == 0x29) + continue; + + auto lang_type = static_cast<lldb::LanguageType>(i); + if (lang_type == lldb::eLanguageTypeLastStandardLanguage) + continue; + + SourceLanguage lang(lang_type); + + // eLanguageTypeHIP is not implemented as a DW_LNAME because of a conflict. + if (lang_type == lldb::eLanguageTypeHIP) + EXPECT_FALSE(lang); + else + EXPECT_TRUE(lang); + } + + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus).GetDescription(), + "ISO C++"); + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus_17).GetDescription(), + "ISO C++"); + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus_20).GetDescription(), + "ISO C++"); + + EXPECT_EQ(SourceLanguage(eLanguageTypeObjC).GetDescription(), "Objective C"); + EXPECT_EQ(SourceLanguage(eLanguageTypeMipsAssembler).GetDescription(), + "Assembly"); + + auto next_vendor_language = + static_cast<lldb::LanguageType>(eLanguageTypeMipsAssembler + 1); + if (next_vendor_language < eNumLanguageTypes) + EXPECT_NE(SourceLanguage(next_vendor_language).GetDescription(), "Unknown"); + + EXPECT_EQ(SourceLanguage(eLanguageTypeUnknown).GetDescription(), "Unknown"); +} + +TEST_F(LanguageTest, SourceLanguage_AsLanguageType) { + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus).AsLanguageType(), + eLanguageTypeC_plus_plus); + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus_03).AsLanguageType(), + eLanguageTypeC_plus_plus_03); + + // Vendor-specific language code. + EXPECT_EQ(SourceLanguage(eLanguageTypeMipsAssembler).AsLanguageType(), + eLanguageTypeAssembly); + EXPECT_EQ(SourceLanguage(eLanguageTypeUnknown).AsLanguageType(), + eLanguageTypeUnknown); +} diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index d9d6f0b..62c0806 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1959,6 +1959,10 @@ public: LLVM_ABI SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + /// Get all the nodes in their topological order without modifying any states. + LLVM_ABI void getTopologicallyOrderedNodes( + SmallVectorImpl<const SDNode *> &SortedNodes) const; + /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their /// topological order. Returns the number of nodes. @@ -2009,7 +2013,9 @@ public: /// function mirrors \c llvm::salvageDebugInfo. LLVM_ABI void salvageDebugInfo(SDNode &N); - LLVM_ABI void dump() const; + /// Dump the textual format of this DAG. Print nodes in sorted orders if \p + /// Sorted is true. + LLVM_ABI void dump(bool Sorted = false) const; /// In most cases this function returns the ABI alignment for a given type, /// except for illegal vector types where the alignment exceeds that of the diff --git a/llvm/include/llvm/Support/Jobserver.h b/llvm/include/llvm/Support/Jobserver.h new file mode 100644 index 0000000..6bee3b5 --- /dev/null +++ b/llvm/include/llvm/Support/Jobserver.h @@ -0,0 +1,162 @@ +//===- llvm/Support/Jobserver.h - Jobserver Client --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a client for the GNU Make jobserver protocol. This allows +// LLVM tools to coordinate parallel execution with a parent `make` process. +// +// The jobserver protocol is a mechanism for GNU Make to share its pool of +// available "job slots" with the subprocesses it invokes. This is particularly +// useful for tools that can perform parallel operations themselves (e.g., a +// multi-threaded linker or compiler). By participating in this protocol, a +// tool can ensure the total number of concurrent jobs does not exceed the +// limit specified by the user (e.g., `make -j8`). +// +// How it works: +// +// 1. Establishment: +// A child process discovers the jobserver by inspecting the `MAKEFLAGS` +// environment variable. If a jobserver is active, this variable will +// contain a `--jobserver-auth=<value>` argument. The format of `<value>` +// determines how to communicate with the server. +// +// 2. The Implicit Slot: +// Every command invoked by `make` is granted one "implicit" job slot. This +// means a tool can always perform at least one unit of work without needing +// to communicate with the jobserver. This implicit slot should NEVER be +// released back to the jobserver. +// +// 3. Acquiring and Releasing Slots: +// On POSIX systems, the jobserver is implemented as a pipe. The +// `--jobserver-auth` value specifies either a path to a named pipe +// (`fifo:PATH`) or a pair of file descriptors (`R,W`). The pipe is +// pre-loaded with single-character tokens, one for each available job slot. +// +// - To acquire an additional slot, a client reads a single-character token +// from the pipe. +// - To release a slot, the client must write the *exact same* character +// token back to the pipe. +// +// It is critical that a client releases all acquired slots before it exits, +// even in cases of error, to avoid deadlocking the build. +// +// Example: +// A multi-threaded linker invoked by `make -j8` wants to use multiple +// threads. It first checks for the jobserver. It knows it has one implicit +// slot, so it can use one thread. It then tries to acquire 7 more slots by +// reading 7 tokens from the jobserver pipe. If it only receives 3 tokens, +// it knows it can use a total of 1 (implicit) + 3 (acquired) = 4 threads. +// Before exiting, it must write the 3 tokens it read back to the pipe. +// +// For more context, see: +// - GNU Make manual on job slots: +// https://www.gnu.org/software/make/manual/html_node/Job-Slots.html +// - LLVM RFC discussion on jobserver support: +// https://discourse.llvm.org/t/rfc-adding-gnu-make-jobserver- +// support-to-llvm-for-coordinated-parallelism/87034 +// - Ninja’s jobserver support PR: +// https://github.com/ninja-build/ninja/pull/2506 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_JOBSERVER_H +#define LLVM_SUPPORT_JOBSERVER_H + +#include "llvm/ADT/StringRef.h" +#include <memory> +#include <string> + +namespace llvm { + +/// A JobSlot represents a single job slot that can be acquired from or released +/// to a jobserver pool. This class is move-only. +class JobSlot { +public: + /// Default constructor creates an invalid instance. + JobSlot() = default; + + // Move operations are allowed. + JobSlot(JobSlot &&Other) noexcept : Value(Other.Value) { + Other.Value = kInvalidValue; + } + JobSlot &operator=(JobSlot &&Other) noexcept { + if (this != &Other) { + this->Value = Other.Value; + Other.Value = kInvalidValue; + } + return *this; + } + + // Copy operations are disallowed. + JobSlot(const JobSlot &) = delete; + JobSlot &operator=(const JobSlot &) = delete; + + /// Returns true if this instance is valid (either implicit or explicit). + bool isValid() const { return Value >= 0; } + + /// Returns true if this instance represents the implicit job slot. + bool isImplicit() const { return Value == kImplicitValue; } + + static JobSlot createExplicit(uint8_t V) { + return JobSlot(static_cast<int16_t>(V)); + } + + static JobSlot createImplicit() { return JobSlot(kImplicitValue); } + + uint8_t getExplicitValue() const; + bool isExplicit() const { return isValid() && !isImplicit(); } + +private: + friend class JobserverClient; + friend class JobserverClientImpl; + + JobSlot(int16_t V) : Value(V) {} + + /// The jobserver pipe carries explicit tokens (bytes 0–255). We reserve two + /// sentinels in Value for special cases: + /// kInvalidValue (-1): no slot held + /// kImplicitValue (INT16_MAX): implicit slot granted at startup (no pipe + /// I/O) + /// + /// We use int16_t so Value can store 0–255 explicit tokens and + /// sentinels without overflow, enforces fixed 16-bit width, and avoids + /// unsigned/signed mix-ups. + static constexpr int16_t kInvalidValue = -1; + static constexpr int16_t kImplicitValue = INT16_MAX; + int16_t Value = kInvalidValue; +}; + +/// The public interface for a jobserver client. +/// This client is a lazy-initialized singleton that is created on first use. +class JobserverClient { +public: + virtual ~JobserverClient(); + + /// Tries to acquire a job slot from the pool. On failure (e.g., if the pool + /// is empty), this returns an invalid JobSlot instance. The first successful + /// call will always return the implicit slot. + virtual JobSlot tryAcquire() = 0; + + /// Releases a job slot back to the pool. + virtual void release(JobSlot Slot) = 0; + + /// Returns the number of job slots available, as determined on first use. + /// This value is cached. Returns 0 if no jobserver is active. + virtual unsigned getNumJobs() const = 0; + + /// Returns the singleton instance of the JobserverClient. + /// The instance is created on the first call to this function. + /// Returns a nullptr if no jobserver is configured or an error occurs. + static JobserverClient *getInstance(); + + /// Resets the singleton instance. For testing purposes only. + static void resetForTesting(); +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_JOBSERVER_H diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h index c26681c..c20efc7 100644 --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Jobserver.h" #include "llvm/Support/RWMutex.h" #include "llvm/Support/Threading.h" #include "llvm/Support/thread.h" @@ -180,6 +181,7 @@ private: void grow(int requested); void processTasks(ThreadPoolTaskGroup *WaitingForGroup); + void processTasksWithJobserver(); /// Threads in flight std::vector<llvm::thread> Threads; @@ -208,6 +210,8 @@ private: /// Maximum number of threads to potentially grow this pool to. const unsigned MaxThreadCount; + + JobserverClient *TheJobserver = nullptr; }; #endif // LLVM_ENABLE_THREADS diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h index d3fe0a5..8884680 100644 --- a/llvm/include/llvm/Support/Threading.h +++ b/llvm/include/llvm/Support/Threading.h @@ -142,6 +142,11 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; } /// the thread shall remain on the actual CPU socket. LLVM_ABI std::optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const; + + /// If true, the thread pool will attempt to coordinate with a GNU Make + /// jobserver, acquiring a job slot before processing a task. If no + /// jobserver is found in the environment, this is ignored. + bool UseJobserver = false; }; /// Build a strategy from a number of threads as a string provided in \p Num. @@ -210,6 +215,19 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; } return S; } + /// Returns a thread strategy that attempts to coordinate with a GNU Make + /// jobserver. The number of active threads will be limited by the number of + /// available job slots. If no jobserver is detected in the environment, this + /// strategy falls back to the default hardware_concurrency() behavior. + inline ThreadPoolStrategy jobserver_concurrency() { + ThreadPoolStrategy S; + S.UseJobserver = true; + // We can still request all threads be created, as they will simply + // block waiting for a job slot if the jobserver is the limiting factor. + S.ThreadsRequested = 0; // 0 means 'use all available' + return S; + } + /// Return the current thread id, as used in various OS system calls. /// Note that not all platforms guarantee that the value returned will be /// unique across the entire system, so portable code should not assume diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index faf7788..e3f995d 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -126,7 +126,7 @@ def G_FRAME_INDEX : GenericInstruction { } def G_GLOBAL_VALUE : GenericInstruction { - let OutOperandList = (outs type0:$dst); + let OutOperandList = (outs ptype0:$dst); let InOperandList = (ins unknown:$src); let hasSideEffects = false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 95f53fe..6ea2e27 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12698,6 +12698,45 @@ unsigned SelectionDAG::AssignTopologicalOrder() { return DAGSize; } +void SelectionDAG::getTopologicallyOrderedNodes( + SmallVectorImpl<const SDNode *> &SortedNodes) const { + SortedNodes.clear(); + // Node -> remaining number of outstanding operands. + DenseMap<const SDNode *, unsigned> RemainingOperands; + + // Put nodes without any operands into SortedNodes first. + for (const SDNode &N : allnodes()) { + checkForCycles(&N, this); + unsigned NumOperands = N.getNumOperands(); + if (NumOperands == 0) + SortedNodes.push_back(&N); + else + // Record their total number of outstanding operands. + RemainingOperands[&N] = NumOperands; + } + + // A node is pushed into SortedNodes when all of its operands (predecessors in + // the graph) are also in SortedNodes. + for (unsigned i = 0U; i < SortedNodes.size(); ++i) { + const SDNode *N = SortedNodes[i]; + for (const SDNode *U : N->users()) { + unsigned &NumRemOperands = RemainingOperands[U]; + assert(NumRemOperands && "Invalid number of remaining operands"); + --NumRemOperands; + if (!NumRemOperands) + SortedNodes.push_back(U); + } + } + + assert(SortedNodes.size() == AllNodes.size() && "Node count mismatch"); + assert(SortedNodes.front()->getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token"); + assert(SortedNodes.front()->getNumOperands() == 0 && + "First node in topological sort has operands"); + assert(SortedNodes.back()->use_empty() && + "Last node in topologic sort has users"); +} + /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 4b2a00c..fcfbfe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -1061,13 +1061,24 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { N->dump(G); } -LLVM_DUMP_METHOD void SelectionDAG::dump() const { +LLVM_DUMP_METHOD void SelectionDAG::dump(bool Sorted) const { dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; - for (const SDNode &N : allnodes()) { + auto dumpEachNode = [this](const SDNode &N) { if (!N.hasOneUse() && &N != getRoot().getNode() && (!shouldPrintInline(N, this) || N.use_empty())) DumpNodes(&N, 2, this); + }; + + if (Sorted) { + SmallVector<const SDNode *> SortedNodes; + SortedNodes.reserve(AllNodes.size()); + getTopologicallyOrderedNodes(SortedNodes); + for (const SDNode *N : SortedNodes) + dumpEachNode(*N); + } else { + for (const SDNode &N : allnodes()) + dumpEachNode(N); } if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e61558c..c35f29d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -144,6 +144,11 @@ UseMBPI("use-mbpi", cl::init(true), cl::Hidden); #ifndef NDEBUG +static cl::opt<bool> + DumpSortedDAG("dump-sorted-dags", cl::Hidden, + cl::desc("Print DAGs with sorted nodes in debug dump"), + cl::init(false)); + static cl::opt<std::string> FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, cl::desc("Only display the basic block whose name " @@ -932,7 +937,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nInitial selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -952,7 +957,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized lowered selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -974,7 +979,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nType-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -998,7 +1003,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1016,7 +1021,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nVector-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1032,7 +1037,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nVector/type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1052,7 +1057,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized vector-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1072,7 +1077,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nLegalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1092,7 +1097,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1116,7 +1121,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nSelected selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 7da972f..42b21b5 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -207,6 +207,7 @@ add_llvm_component_library(LLVMSupport InstructionCost.cpp IntEqClasses.cpp IntervalMap.cpp + Jobserver.cpp JSON.cpp KnownBits.cpp KnownFPClass.cpp diff --git a/llvm/lib/Support/Jobserver.cpp b/llvm/lib/Support/Jobserver.cpp new file mode 100644 index 0000000..9f726eb --- /dev/null +++ b/llvm/lib/Support/Jobserver.cpp @@ -0,0 +1,259 @@ +//===- llvm/Support/Jobserver.cpp - Jobserver Client Implementation -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Jobserver.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +#include <atomic> +#include <memory> +#include <mutex> +#include <new> + +#define DEBUG_TYPE "jobserver" + +using namespace llvm; + +namespace { +struct FdPair { + int Read = -1; + int Write = -1; + bool isValid() const { return Read >= 0 && Write >= 0; } +}; + +struct JobserverConfig { + enum Mode { + None, + PosixFifo, + PosixPipe, + Win32Semaphore, + }; + Mode TheMode = None; + std::string Path; + FdPair PipeFDs; +}; + +/// A helper function that checks if `Input` starts with `Prefix`. +/// If it does, it removes the prefix from `Input`, assigns the remainder to +/// `Value`, and returns true. Otherwise, it returns false. +bool getPrefixedValue(StringRef Input, StringRef Prefix, StringRef &Value) { + if (Input.consume_front(Prefix)) { + Value = Input; + return true; + } + return false; +} + +/// A helper function to parse a string in the format "R,W" where R and W are +/// non-negative integers representing file descriptors. It populates the +/// `ReadFD` and `WriteFD` output parameters. Returns true on success. +static std::optional<FdPair> getFileDescriptorPair(StringRef Input) { + FdPair FDs; + if (Input.consumeInteger(10, FDs.Read)) + return std::nullopt; + if (!Input.consume_front(",")) + return std::nullopt; + if (Input.consumeInteger(10, FDs.Write)) + return std::nullopt; + if (!Input.empty() || !FDs.isValid()) + return std::nullopt; + return FDs; +} + +/// Parses the `MAKEFLAGS` environment variable string to find jobserver +/// arguments. It splits the string into space-separated arguments and searches +/// for `--jobserver-auth` or `--jobserver-fds`. Based on the value of these +/// arguments, it determines the jobserver mode (Pipe, FIFO, or Semaphore) and +/// connection details (file descriptors or path). +Expected<JobserverConfig> parseNativeMakeFlags(StringRef MakeFlags) { + JobserverConfig Config; + if (MakeFlags.empty()) + return Config; + + // Split the MAKEFLAGS string into arguments. + SmallVector<StringRef, 8> Args; + SplitString(MakeFlags, Args); + + // If '-n' (dry-run) is present as a legacy flag (not starting with '-'), + // disable the jobserver. + if (!Args.empty() && !Args[0].starts_with("-") && Args[0].contains('n')) + return Config; + + // Iterate through arguments to find jobserver flags. + // Note that make may pass multiple --jobserver-auth flags; the last one wins. + for (StringRef Arg : Args) { + StringRef Value; + if (getPrefixedValue(Arg, "--jobserver-auth=", Value)) { + // Try to parse as a file descriptor pair first. + if (auto FDPair = getFileDescriptorPair(Value)) { + Config.TheMode = JobserverConfig::PosixPipe; + Config.PipeFDs = *FDPair; + } else { + StringRef FifoPath; + // If not FDs, try to parse as a named pipe (fifo). + if (getPrefixedValue(Value, "fifo:", FifoPath)) { + Config.TheMode = JobserverConfig::PosixFifo; + Config.Path = FifoPath.str(); + } else { + // Otherwise, assume it's a Windows semaphore. + Config.TheMode = JobserverConfig::Win32Semaphore; + Config.Path = Value.str(); + } + } + } else if (getPrefixedValue(Arg, "--jobserver-fds=", Value)) { + // This is an alternative, older syntax for the pipe-based server. + if (auto FDPair = getFileDescriptorPair(Value)) { + Config.TheMode = JobserverConfig::PosixPipe; + Config.PipeFDs = *FDPair; + } else { + return createStringError(inconvertibleErrorCode(), + "Invalid file descriptor pair in MAKEFLAGS"); + } + } + } + +// Perform platform-specific validation. +#ifdef _WIN32 + if (Config.TheMode == JobserverConfig::PosixFifo || + Config.TheMode == JobserverConfig::PosixPipe) + return createStringError( + inconvertibleErrorCode(), + "FIFO/Pipe-based jobserver is not supported on Windows"); +#else + if (Config.TheMode == JobserverConfig::Win32Semaphore) + return createStringError( + inconvertibleErrorCode(), + "Semaphore-based jobserver is not supported on this platform"); +#endif + return Config; +} + +std::once_flag GJobserverOnceFlag; +JobserverClient *GJobserver = nullptr; + +} // namespace + +namespace llvm { +class JobserverClientImpl : public JobserverClient { + bool IsInitialized = false; + std::atomic<bool> HasImplicitSlot{true}; + unsigned NumJobs = 0; + +public: + JobserverClientImpl(const JobserverConfig &Config); + ~JobserverClientImpl() override; + + JobSlot tryAcquire() override; + void release(JobSlot Slot) override; + unsigned getNumJobs() const override { return NumJobs; } + + bool isValid() const { return IsInitialized; } + +private: +#if defined(LLVM_ON_UNIX) + int ReadFD = -1; + int WriteFD = -1; + std::string FifoPath; +#elif defined(_WIN32) + void *Semaphore = nullptr; +#endif +}; +} // namespace llvm + +// Include the platform-specific parts of the class. +#if defined(LLVM_ON_UNIX) +#include "Unix/Jobserver.inc" +#elif defined(_WIN32) +#include "Windows/Jobserver.inc" +#else +// Dummy implementation for unsupported platforms. +JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {} +JobserverClientImpl::~JobserverClientImpl() = default; +JobSlot JobserverClientImpl::tryAcquire() { return JobSlot(); } +void JobserverClientImpl::release(JobSlot Slot) {} +#endif + +namespace llvm { +JobserverClient::~JobserverClient() = default; + +uint8_t JobSlot::getExplicitValue() const { + assert(isExplicit() && "Cannot get value of implicit or invalid slot"); + return static_cast<uint8_t>(Value); +} + +/// This is the main entry point for acquiring a jobserver client. It uses a +/// std::call_once to ensure the singleton `GJobserver` instance is created +/// safely in a multi-threaded environment. On first call, it reads the +/// `MAKEFLAGS` environment variable, parses it, and attempts to construct and +/// initialize a `JobserverClientImpl`. If successful, the global instance is +/// stored in `GJobserver`. Subsequent calls will return the existing instance. +JobserverClient *JobserverClient::getInstance() { + std::call_once(GJobserverOnceFlag, []() { + LLVM_DEBUG( + dbgs() + << "JobserverClient::getInstance() called for the first time.\n"); + const char *MakeFlagsEnv = getenv("MAKEFLAGS"); + if (!MakeFlagsEnv) { + errs() << "Warning: failed to create jobserver client due to MAKEFLAGS " + "environment variable not found\n"; + return; + } + + LLVM_DEBUG(dbgs() << "Found MAKEFLAGS = \"" << MakeFlagsEnv << "\"\n"); + + auto ConfigOrErr = parseNativeMakeFlags(MakeFlagsEnv); + if (Error Err = ConfigOrErr.takeError()) { + errs() << "Warning: failed to create jobserver client due to invalid " + "MAKEFLAGS environment variable: " + << toString(std::move(Err)) << "\n"; + return; + } + + JobserverConfig Config = *ConfigOrErr; + if (Config.TheMode == JobserverConfig::None) { + errs() << "Warning: failed to create jobserver client due to jobserver " + "mode missing in MAKEFLAGS environment variable\n"; + return; + } + + if (Config.TheMode == JobserverConfig::PosixPipe) { +#if defined(LLVM_ON_UNIX) + if (!areFdsValid(Config.PipeFDs.Read, Config.PipeFDs.Write)) { + errs() << "Warning: failed to create jobserver client due to invalid " + "Pipe FDs in MAKEFLAGS environment variable\n"; + return; + } +#endif + } + + auto Client = std::make_unique<JobserverClientImpl>(Config); + if (Client->isValid()) { + LLVM_DEBUG(dbgs() << "Jobserver client created successfully!\n"); + GJobserver = Client.release(); + } else + errs() << "Warning: jobserver client initialization failed.\n"; + }); + return GJobserver; +} + +/// For testing purposes only. This function resets the singleton instance by +/// destroying the existing client and re-initializing the `std::once_flag`. +/// This allows tests to simulate the first-time initialization of the +/// jobserver client multiple times. +void JobserverClient::resetForTesting() { + delete GJobserver; + GJobserver = nullptr; + // Re-construct the std::once_flag in place to reset the singleton state. + new (&GJobserverOnceFlag) std::once_flag(); +} +} // namespace llvm diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 3ac6fc7..8e0c724 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -7,12 +7,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Parallel.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/ExponentialBackoff.h" +#include "llvm/Support/Jobserver.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Threading.h" #include <atomic> #include <future> +#include <memory> +#include <mutex> #include <thread> #include <vector> @@ -49,6 +54,9 @@ public: class ThreadPoolExecutor : public Executor { public: explicit ThreadPoolExecutor(ThreadPoolStrategy S) { + if (S.UseJobserver) + TheJobserver = JobserverClient::getInstance(); + ThreadCount = S.compute_thread_count(); // Spawn all but one of the threads in another thread as spawning threads // can take a while. @@ -69,6 +77,10 @@ public: }); } + // To make sure the thread pool executor can only be created with a parallel + // strategy. + ThreadPoolExecutor() = delete; + void stop() { { std::lock_guard<std::mutex> Lock(Mutex); @@ -111,15 +123,62 @@ private: void work(ThreadPoolStrategy S, unsigned ThreadID) { threadIndex = ThreadID; S.apply_thread_strategy(ThreadID); + // Note on jobserver deadlock avoidance: + // GNU Make grants each invoked process one implicit job slot. Our + // JobserverClient models this by returning an implicit JobSlot on the + // first successful tryAcquire() in a process. This guarantees forward + // progress without requiring a dedicated "always-on" thread here. + + static thread_local std::unique_ptr<ExponentialBackoff> Backoff; + while (true) { - std::unique_lock<std::mutex> Lock(Mutex); - Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); - if (Stop) - break; - auto Task = std::move(WorkStack.back()); - WorkStack.pop_back(); - Lock.unlock(); - Task(); + if (TheJobserver) { + // Jobserver-mode scheduling: + // - Acquire one job slot (with exponential backoff to avoid busy-wait). + // - While holding the slot, drain and run tasks from the local queue. + // - Release the slot when the queue is empty or when shutting down. + // Rationale: Holding a slot amortizes acquire/release overhead over + // multiple tasks and avoids requeue/yield churn, while still enforcing + // the jobserver’s global concurrency limit. With K available slots, + // up to K workers run tasks in parallel; within each worker tasks run + // sequentially until the local queue is empty. + ExponentialBackoff Backoff(std::chrono::hours(24)); + JobSlot Slot; + do { + if (Stop) + return; + Slot = TheJobserver->tryAcquire(); + if (Slot.isValid()) + break; + } while (Backoff.waitForNextAttempt()); + + auto SlotReleaser = llvm::make_scope_exit( + [&] { TheJobserver->release(std::move(Slot)); }); + + while (true) { + std::function<void()> Task; + { + std::unique_lock<std::mutex> Lock(Mutex); + Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + if (Stop && WorkStack.empty()) + return; + if (WorkStack.empty()) + break; + Task = std::move(WorkStack.back()); + WorkStack.pop_back(); + } + Task(); + } + } else { + std::unique_lock<std::mutex> Lock(Mutex); + Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + if (Stop) + break; + auto Task = std::move(WorkStack.back()); + WorkStack.pop_back(); + Lock.unlock(); + Task(); + } } } @@ -130,9 +189,20 @@ private: std::promise<void> ThreadsCreated; std::vector<std::thread> Threads; unsigned ThreadCount; + + JobserverClient *TheJobserver = nullptr; }; -Executor *Executor::getDefaultExecutor() { +// A global raw pointer to the executor. Lifetime is managed by the +// objects created within createExecutor(). +static Executor *TheExec = nullptr; +static std::once_flag Flag; + +// This function will be called exactly once to create the executor. +// It contains the necessary platform-specific logic. Since functions +// called by std::call_once cannot return value, we have to set the +// executor as a global variable. +void createExecutor() { #ifdef _WIN32 // The ManagedStatic enables the ThreadPoolExecutor to be stopped via // llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This @@ -156,16 +226,22 @@ Executor *Executor::getDefaultExecutor() { ThreadPoolExecutor::Deleter> ManagedExec; static std::unique_ptr<ThreadPoolExecutor> Exec(&(*ManagedExec)); - return Exec.get(); + TheExec = Exec.get(); #else // ManagedStatic is not desired on other platforms. When `Exec` is destroyed // by llvm_shutdown(), worker threads will clean up and invoke TLS // destructors. This can lead to race conditions if other threads attempt to // access TLS objects that have already been destroyed. static ThreadPoolExecutor Exec(strategy); - return &Exec; + TheExec = &Exec; #endif } + +Executor *Executor::getDefaultExecutor() { + // Use std::call_once to lazily and safely initialize the executor. + std::call_once(Flag, createExecutor); + return TheExec; +} } // namespace } // namespace detail diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index c304f0f..6960268 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// // +// // This file implements a crude C++11 based thread pool. // //===----------------------------------------------------------------------===// @@ -14,6 +15,8 @@ #include "llvm/Config/llvm-config.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/ExponentialBackoff.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" @@ -33,7 +36,10 @@ ThreadPoolInterface::~ThreadPoolInterface() = default; #if LLVM_ENABLE_THREADS StdThreadPool::StdThreadPool(ThreadPoolStrategy S) - : Strategy(S), MaxThreadCount(S.compute_thread_count()) {} + : Strategy(S), MaxThreadCount(S.compute_thread_count()) { + if (Strategy.UseJobserver) + TheJobserver = JobserverClient::getInstance(); +} void StdThreadPool::grow(int requested) { llvm::sys::ScopedWriter LockGuard(ThreadsLock); @@ -45,7 +51,15 @@ void StdThreadPool::grow(int requested) { Threads.emplace_back([this, ThreadID] { set_thread_name(formatv("llvm-worker-{0}", ThreadID)); Strategy.apply_thread_strategy(ThreadID); - processTasks(nullptr); + // Note on jobserver deadlock avoidance: + // GNU Make grants each invoked process one implicit job slot. + // JobserverClient::tryAcquire() returns that implicit slot on the first + // successful call in a process, ensuring forward progress without a + // dedicated "always-on" thread. + if (TheJobserver) + processTasksWithJobserver(); + else + processTasks(nullptr); }); } } @@ -133,6 +147,96 @@ void StdThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) { } } +/// Main loop for worker threads when using a jobserver. +/// This function uses a two-level queue; it first acquires a job slot from the +/// external jobserver, then retrieves a task from the internal queue. +/// This allows the thread pool to cooperate with build systems like `make -j`. +void StdThreadPool::processTasksWithJobserver() { + while (true) { + // Acquire a job slot from the external jobserver. + // This polls for a slot and yields the thread to avoid a high-CPU wait. + JobSlot Slot; + // The timeout for the backoff can be very long, as the shutdown + // is checked on each iteration. The sleep duration is capped by MaxWait + // in ExponentialBackoff, so shutdown latency is not a problem. + ExponentialBackoff Backoff(std::chrono::hours(24)); + bool AcquiredToken = false; + do { + // Return if the thread pool is shutting down. + { + std::unique_lock<std::mutex> LockGuard(QueueLock); + if (!EnableFlag) + return; + } + + Slot = TheJobserver->tryAcquire(); + if (Slot.isValid()) { + AcquiredToken = true; + break; + } + } while (Backoff.waitForNextAttempt()); + + if (!AcquiredToken) { + // This is practically unreachable with a 24h timeout and indicates a + // deeper problem if hit. + report_fatal_error("Timed out waiting for jobserver token."); + } + + // `make_scope_exit` guarantees the job slot is released, even if the + // task throws or we exit early. This prevents deadlocking the build. + auto SlotReleaser = + make_scope_exit([&] { TheJobserver->release(std::move(Slot)); }); + + // While we hold a job slot, process tasks from the internal queue. + while (true) { + std::function<void()> Task; + ThreadPoolTaskGroup *GroupOfTask = nullptr; + + { + std::unique_lock<std::mutex> LockGuard(QueueLock); + + // Wait until a task is available or the pool is shutting down. + QueueCondition.wait(LockGuard, + [&] { return !EnableFlag || !Tasks.empty(); }); + + // If shutting down and the queue is empty, the thread can terminate. + if (!EnableFlag && Tasks.empty()) + return; + + // If the queue is empty, we're done processing tasks for now. + // Break the inner loop to release the job slot. + if (Tasks.empty()) + break; + + // A task is available. Mark it as active before releasing the lock + // to prevent race conditions with `wait()`. + ++ActiveThreads; + Task = std::move(Tasks.front().first); + GroupOfTask = Tasks.front().second; + if (GroupOfTask != nullptr) + ++ActiveGroups[GroupOfTask]; + Tasks.pop_front(); + } // The queue lock is released. + + // Run the task. The job slot remains acquired during execution. + Task(); + + // The task has finished. Update the active count and notify any waiters. + { + std::lock_guard<std::mutex> LockGuard(QueueLock); + --ActiveThreads; + if (GroupOfTask != nullptr) { + auto A = ActiveGroups.find(GroupOfTask); + if (--(A->second) == 0) + ActiveGroups.erase(A); + } + // If all tasks are complete, notify any waiting threads. + if (workCompletedUnlocked(nullptr)) + CompletionCondition.notify_all(); + } + } + } +} bool StdThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const { if (Group == nullptr) return !ActiveThreads && Tasks.empty(); diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp index 693de0e..9da357a 100644 --- a/llvm/lib/Support/Threading.cpp +++ b/llvm/lib/Support/Threading.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Threading.h" #include "llvm/Config/config.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Jobserver.h" #include <cassert> #include <optional> @@ -51,6 +52,10 @@ int llvm::get_physical_cores() { return -1; } static int computeHostNumHardwareThreads(); unsigned llvm::ThreadPoolStrategy::compute_thread_count() const { + if (UseJobserver) + if (auto JS = JobserverClient::getInstance()) + return JS->getNumJobs(); + int MaxThreadCount = UseHyperThreads ? computeHostNumHardwareThreads() : get_physical_cores(); if (MaxThreadCount <= 0) diff --git a/llvm/lib/Support/Unix/Jobserver.inc b/llvm/lib/Support/Unix/Jobserver.inc new file mode 100644 index 0000000..53bf7f2 --- /dev/null +++ b/llvm/lib/Support/Unix/Jobserver.inc @@ -0,0 +1,195 @@ +//===- llvm/Support/Unix/Jobserver.inc - Unix Jobserver Impl ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the UNIX-specific parts of the JobserverClient class. +// +//===----------------------------------------------------------------------===// + +#include <atomic> +#include <cassert> +#include <cerrno> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +namespace { +/// Returns true if the given file descriptor is a FIFO (named pipe). +bool isFifo(int FD) { + struct stat StatBuf; + if (::fstat(FD, &StatBuf) != 0) + return false; + return S_ISFIFO(StatBuf.st_mode); +} + +/// Returns true if the given file descriptors are valid. +bool areFdsValid(int ReadFD, int WriteFD) { + if (ReadFD == -1 || WriteFD == -1) + return false; + // Check if the file descriptors are actually valid by checking their flags. + return ::fcntl(ReadFD, F_GETFD) != -1 && ::fcntl(WriteFD, F_GETFD) != -1; +} +} // namespace + +/// The constructor sets up the client based on the provided configuration. +/// For pipe-based jobservers, it duplicates the inherited file descriptors, +/// sets them to close-on-exec, and makes the read descriptor non-blocking. +/// For FIFO-based jobservers, it opens the named pipe. After setup, it drains +/// all available tokens from the jobserver to determine the total number of +/// available jobs (`NumJobs`), then immediately releases them back. +JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) { + switch (Config.TheMode) { + case JobserverConfig::PosixPipe: { + // Duplicate the read and write file descriptors. + int NewReadFD = ::dup(Config.PipeFDs.Read); + if (NewReadFD < 0) + return; + int NewWriteFD = ::dup(Config.PipeFDs.Write); + if (NewWriteFD < 0) { + ::close(NewReadFD); + return; + } + // Set the new descriptors to be closed automatically on exec(). + if (::fcntl(NewReadFD, F_SETFD, FD_CLOEXEC) == -1 || + ::fcntl(NewWriteFD, F_SETFD, FD_CLOEXEC) == -1) { + ::close(NewReadFD); + ::close(NewWriteFD); + return; + } + // Set the read descriptor to non-blocking. + int flags = ::fcntl(NewReadFD, F_GETFL, 0); + if (flags == -1 || ::fcntl(NewReadFD, F_SETFL, flags | O_NONBLOCK) == -1) { + ::close(NewReadFD); + ::close(NewWriteFD); + return; + } + ReadFD = NewReadFD; + WriteFD = NewWriteFD; + break; + } + case JobserverConfig::PosixFifo: + // Open the FIFO for reading. It must be non-blocking and close-on-exec. + ReadFD = ::open(Config.Path.c_str(), O_RDONLY | O_NONBLOCK | O_CLOEXEC); + if (ReadFD < 0 || !isFifo(ReadFD)) { + if (ReadFD >= 0) + ::close(ReadFD); + ReadFD = -1; + return; + } + FifoPath = Config.Path; + // The write FD is opened on-demand in release(). + WriteFD = -1; + break; + default: + return; + } + + IsInitialized = true; + // Determine the total number of jobs by acquiring all available slots and + // then immediately releasing them. + SmallVector<JobSlot, 8> Slots; + while (true) { + auto S = tryAcquire(); + if (!S.isValid()) + break; + Slots.push_back(std::move(S)); + } + NumJobs = Slots.size(); + assert(NumJobs >= 1 && "Invalid number of jobs"); + for (auto &S : Slots) + release(std::move(S)); +} + +/// The destructor closes any open file descriptors. +JobserverClientImpl::~JobserverClientImpl() { + if (ReadFD >= 0) + ::close(ReadFD); + if (WriteFD >= 0) + ::close(WriteFD); +} + +/// Tries to acquire a job slot. The first call to this function will always +/// successfully acquire the single "implicit" slot that is granted to every +/// process started by `make`. Subsequent calls attempt to read a one-byte +/// token from the jobserver's read pipe. A successful read grants one +/// explicit job slot. The read is non-blocking; if no token is available, +/// it fails and returns an invalid JobSlot. +JobSlot JobserverClientImpl::tryAcquire() { + if (!IsInitialized) + return JobSlot(); + + // The first acquisition is always for the implicit slot. + if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) { + LLVM_DEBUG(dbgs() << "Acquired implicit job slot.\n"); + return JobSlot::createImplicit(); + } + + char Token; + ssize_t Ret; + LLVM_DEBUG(dbgs() << "Attempting to read token from FD " << ReadFD << ".\n"); + // Loop to retry on EINTR (interrupted system call). + do { + Ret = ::read(ReadFD, &Token, 1); + } while (Ret < 0 && errno == EINTR); + + if (Ret == 1) { + LLVM_DEBUG(dbgs() << "Acquired explicit token '" << Token << "'.\n"); + return JobSlot::createExplicit(static_cast<uint8_t>(Token)); + } + + LLVM_DEBUG(dbgs() << "Failed to acquire job slot, read returned " << Ret + << ".\n"); + return JobSlot(); +} + +/// Releases a job slot back to the pool. If the slot is implicit, it simply +/// resets a flag. If the slot is explicit, it writes the character token +/// associated with the slot back into the jobserver's write pipe. For FIFO +/// jobservers, this may require opening the FIFO for writing if it hasn't +/// been already. +void JobserverClientImpl::release(JobSlot Slot) { + if (!Slot.isValid()) + return; + + // Releasing the implicit slot just makes it available for the next acquire. + if (Slot.isImplicit()) { + LLVM_DEBUG(dbgs() << "Released implicit job slot.\n"); + [[maybe_unused]] bool was_already_released = + HasImplicitSlot.exchange(true, std::memory_order_release); + assert(!was_already_released && "Implicit slot released twice"); + return; + } + + uint8_t Token = Slot.getExplicitValue(); + LLVM_DEBUG(dbgs() << "Releasing explicit token '" << (char)Token << "' to FD " + << WriteFD << ".\n"); + + // For FIFO-based jobservers, the write FD might not be open yet. + // Open it on the first release. + if (WriteFD < 0) { + LLVM_DEBUG(dbgs() << "WriteFD is invalid, opening FIFO: " << FifoPath + << "\n"); + WriteFD = ::open(FifoPath.c_str(), O_WRONLY | O_CLOEXEC); + if (WriteFD < 0) { + LLVM_DEBUG(dbgs() << "Failed to open FIFO for writing.\n"); + return; + } + LLVM_DEBUG(dbgs() << "Opened FIFO as new WriteFD: " << WriteFD << "\n"); + } + + ssize_t Written; + // Loop to retry on EINTR (interrupted system call). + do { + Written = ::write(WriteFD, &Token, 1); + } while (Written < 0 && errno == EINTR); + + if (Written <= 0) { + LLVM_DEBUG(dbgs() << "Failed to write token to pipe, write returned " + << Written << "\n"); + } +} diff --git a/llvm/lib/Support/Windows/Jobserver.inc b/llvm/lib/Support/Windows/Jobserver.inc new file mode 100644 index 0000000..79028ee --- /dev/null +++ b/llvm/lib/Support/Windows/Jobserver.inc @@ -0,0 +1,79 @@ +//==- llvm/Support/Windows/Jobserver.inc - Windows Jobserver Impl -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Windows-specific parts of the JobserverClient class. +// On Windows, the jobserver is implemented using a named semaphore. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Windows/WindowsSupport.h" +#include <atomic> +#include <cassert> + +namespace llvm { +/// The constructor for the Windows jobserver client. It attempts to open a +/// handle to an existing named semaphore, the name of which is provided by +/// GNU make in the --jobserver-auth argument. If the semaphore is opened +/// successfully, the client is marked as initialized. +JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) { + Semaphore = (void *)::OpenSemaphoreA(SEMAPHORE_MODIFY_STATE | SYNCHRONIZE, + FALSE, Config.Path.c_str()); + if (Semaphore != nullptr) + IsInitialized = true; +} + +/// The destructor closes the handle to the semaphore, releasing the resource. +JobserverClientImpl::~JobserverClientImpl() { + if (Semaphore != nullptr) + ::CloseHandle((HANDLE)Semaphore); +} + +/// Tries to acquire a job slot. The first call always returns the implicit +/// slot. Subsequent calls use a non-blocking wait on the semaphore +/// (`WaitForSingleObject` with a timeout of 0). If the wait succeeds, the +/// semaphore's count is decremented, and an explicit job slot is acquired. +/// If the wait times out, it means no slots are available, and an invalid +/// slot is returned. +JobSlot JobserverClientImpl::tryAcquire() { + if (!IsInitialized) + return JobSlot(); + + // First, grant the implicit slot. + if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) { + return JobSlot::createImplicit(); + } + + // Try to acquire a slot from the semaphore without blocking. + if (::WaitForSingleObject((HANDLE)Semaphore, 0) == WAIT_OBJECT_0) { + // The explicit token value is arbitrary on Windows, as the semaphore + // count is the real resource. + return JobSlot::createExplicit(1); + } + + return JobSlot(); // Invalid slot +} + +/// Releases a job slot back to the pool. If the slot is implicit, it simply +/// resets a flag. For an explicit slot, it increments the semaphore's count +/// by one using `ReleaseSemaphore`, making the slot available to other +/// processes. +void JobserverClientImpl::release(JobSlot Slot) { + if (!IsInitialized || !Slot.isValid()) + return; + + if (Slot.isImplicit()) { + [[maybe_unused]] bool was_already_released = + HasImplicitSlot.exchange(true, std::memory_order_release); + assert(!was_already_released && "Implicit slot released twice"); + return; + } + + // Release the slot by incrementing the semaphore count. + (void)::ReleaseSemaphore((HANDLE)Semaphore, 1, NULL); +} +} // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 5630580..f98e312 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -367,19 +367,6 @@ def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> { let BaseClassOrder = 10000; } -def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> { - let CopyCost = 1; - let isAllocatable = 0; - let HasSGPR = 1; -} - -def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add M0_LO16)> { - let CopyCost = 1; - let Size = 16; - let isAllocatable = 0; - let HasSGPR = 1; -} - // TODO: Do we need to set DwarfRegAlias on register tuples? def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, @@ -797,7 +784,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16, SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16, - EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16, + EXEC_LO_LO16, EXEC_HI_LO16, M0_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16, SRC_FLAT_SCRATCH_BASE_HI_LO16)> { let Size = 16; let isAllocatable = 0; @@ -805,7 +792,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, } def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, - (add SReg_32_XM0_XEXEC, M0_CLASS)> { + (add SReg_32_XM0_XEXEC, M0)> { let AllocationPriority = 0; } @@ -830,7 +817,7 @@ def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32, // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, - (add SReg_32_XM0, M0_CLASS)> { + (add SReg_32_XM0, M0)> { let AllocationPriority = 0; let HasSGPR = 1; let BaseClassOrder = 32; diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp index ebd957c..e8c849e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp @@ -195,9 +195,9 @@ class SPIRVLegalizePointerCast : public FunctionPass { if (DstType->getElementType() != SrcType->getElementType()) { // Support bitcast between vectors of different sizes only if // the total bitwidth is the same. - auto dstBitWidth = + [[maybe_unused]] auto dstBitWidth = DstType->getElementType()->getScalarSizeInBits() * dstNumElements; - auto srcBitWidth = + [[maybe_unused]] auto srcBitWidth = SrcType->getElementType()->getScalarSizeInBits() * srcNumElements; assert(dstBitWidth == srcBitWidth && "Unsupported bitcast between vectors of different sizes."); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index ddb95a4..faeab95 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -40,6 +41,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" @@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend, "Number of function clones created during ThinLTO backend"); STATISTIC(FunctionsClonedThinBackend, "Number of functions that had clones created during ThinLTO backend"); +STATISTIC( + FunctionCloneDuplicatesThinBackend, + "Number of function clone duplicates detected during ThinLTO backend"); STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " "cloned) during whole program analysis"); STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " @@ -5186,19 +5191,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { return Changed; } +// Compute a SHA1 hash of the callsite and alloc version information of clone I +// in the summary, to use in detection of duplicate clones. +uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) { + SHA1 Hasher; + // Update hash with any callsites that call non-default (non-zero) callee + // versions. + for (auto &SN : FS->callsites()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert( + SN.Clones.size() > I && + "Callsite summary has fewer entries than other summaries in function"); + if (SN.Clones.size() <= I || !SN.Clones[I]) + continue; + uint8_t Data[sizeof(SN.Clones[I])]; + support::endian::write32le(Data, SN.Clones[I]); + Hasher.update(Data); + } + // Update hash with any allocs that have non-default (non-None) hints. + for (auto &AN : FS->allocs()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert(AN.Versions.size() > I && + "Alloc summary has fewer entries than other summaries in function"); + if (AN.Versions.size() <= I || + (AllocationType)AN.Versions[I] == AllocationType::None) + continue; + Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1)); + } + return support::endian::read64le(Hasher.result().data()); +} + static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> - &FuncToAliasMap) { + &FuncToAliasMap, + FunctionSummary *FS) { + auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) { + // We might have created this when adjusting callsite in another + // function. It should be a declaration. + assert(DeclGV->isDeclaration()); + NewGV->takeName(DeclGV); + DeclGV->replaceAllUsesWith(NewGV); + DeclGV->eraseFromParent(); + }; + + // Handle aliases to this function, and create analogous alias clones to the + // provided clone of this function. + auto CloneFuncAliases = [&](Function *NewF, unsigned I) { + if (!FuncToAliasMap.count(&F)) + return; + for (auto *A : FuncToAliasMap[&F]) { + std::string AliasName = getMemProfFuncName(A->getName(), I); + auto *PrevA = M.getNamedAlias(AliasName); + auto *NewA = GlobalAlias::create(A->getValueType(), + A->getType()->getPointerAddressSpace(), + A->getLinkage(), AliasName, NewF); + NewA->copyAttributesFrom(A); + if (PrevA) + TakeDeclNameAndReplace(PrevA, NewA); + } + }; + // The first "clone" is the original copy, we should only call this if we // needed to create new clones. assert(NumClones > 1); SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; VMaps.reserve(NumClones - 1); FunctionsClonedThinBackend++; + + // Map of hash of callsite/alloc versions to the instantiated function clone + // (possibly the original) implementing those calls. Used to avoid + // instantiating duplicate function clones. + // FIXME: Ideally the thin link would not generate such duplicate clones to + // start with, but right now it happens due to phase ordering in the function + // assignment and possible new clones that produces. We simply make each + // duplicate an alias to the matching instantiated clone recorded in the map + // (except for available_externally which are made declarations as they would + // be aliases in the prevailing module, and available_externally aliases are + // not well supported right now). + DenseMap<uint64_t, Function *> HashToFunc; + + // Save the hash of the original function version. + HashToFunc[ComputeHash(FS, 0)] = &F; + for (unsigned I = 1; I < NumClones; I++) { VMaps.emplace_back(std::make_unique<ValueToValueMapTy>()); + std::string Name = getMemProfFuncName(F.getName(), I); + auto Hash = ComputeHash(FS, I); + // If this clone would duplicate a previously seen clone, don't generate the + // duplicate clone body, just make an alias to satisfy any (potentially + // cross-module) references. + if (HashToFunc.contains(Hash)) { + FunctionCloneDuplicatesThinBackend++; + auto *Func = HashToFunc[Hash]; + if (Func->hasAvailableExternallyLinkage()) { + // Skip these as EliminateAvailableExternallyPass does not handle + // available_externally aliases correctly and we end up with an + // available_externally alias to a declaration. Just create a + // declaration for now as we know we will have a definition in another + // module. + auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone decl " << ore::NV("Decl", Decl.getCallee())); + continue; + } + auto *PrevF = M.getFunction(Name); + auto *Alias = GlobalAlias::create(Name, Func); + if (PrevF) + TakeDeclNameAndReplace(PrevF, Alias); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone alias " << ore::NV("Alias", Alias)); + + // Now handle aliases to this function, and clone those as well. + CloneFuncAliases(Func, I); + continue; + } auto *NewF = CloneFunction(&F, *VMaps.back()); + HashToFunc[Hash] = NewF; FunctionClonesThinBackend++; // Strip memprof and callsite metadata from clone as they are no longer // needed. @@ -5208,40 +5321,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( Inst.setMetadata(LLVMContext::MD_callsite, nullptr); } } - std::string Name = getMemProfFuncName(F.getName(), I); auto *PrevF = M.getFunction(Name); - if (PrevF) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevF->isDeclaration()); - NewF->takeName(PrevF); - PrevF->replaceAllUsesWith(NewF); - PrevF->eraseFromParent(); - } else + if (PrevF) + TakeDeclNameAndReplace(PrevF, NewF); + else NewF->setName(Name); updateSubprogramLinkageName(NewF, Name); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) << "created clone " << ore::NV("NewFunction", NewF)); // Now handle aliases to this function, and clone those as well. - if (!FuncToAliasMap.count(&F)) - continue; - for (auto *A : FuncToAliasMap[&F]) { - std::string Name = getMemProfFuncName(A->getName(), I); - auto *PrevA = M.getNamedAlias(Name); - auto *NewA = GlobalAlias::create(A->getValueType(), - A->getType()->getPointerAddressSpace(), - A->getLinkage(), Name, NewF); - NewA->copyAttributesFrom(A); - if (PrevA) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevA->isDeclaration()); - NewA->takeName(PrevA); - PrevA->replaceAllUsesWith(NewA); - PrevA->eraseFromParent(); - } - } + CloneFuncAliases(NewF, I); } return VMaps; } @@ -5401,7 +5491,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; bool ClonesCreated = false; unsigned NumClonesCreated = 0; - auto CloneFuncIfNeeded = [&](unsigned NumClones) { + auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) { // We should at least have version 0 which is the original copy. assert(NumClones > 0); // If only one copy needed use original. @@ -5415,7 +5505,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { assert(NumClonesCreated == NumClones); return; } - VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); + VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS); // The first "clone" is the original copy, which doesn't have a VMap. assert(VMaps.size() == NumClones - 1); Changed = true; @@ -5424,9 +5514,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { }; auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, - Function *CalledFunction) { + Function *CalledFunction, FunctionSummary *FS) { // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); + CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS); assert(!isMemProfClone(*CalledFunction)); @@ -5448,6 +5538,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // below. auto CalleeOrigName = CalledFunction->getName(); for (unsigned J = 0; J < StackNode.Clones.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Do nothing if this version calls the original version of its // callee. if (!StackNode.Clones[J]) @@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { #endif // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); + CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS); OrigAllocsThinBackend++; AllocVersionsThinBackend += AllocNode.Versions.size(); @@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // Update the allocation types per the summary info. for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and + // was created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Ignore any that didn't get an assigned allocation type. if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) continue; @@ -5670,7 +5768,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // we don't need to do ICP, but might need to clone this // function as it is the target of other cloned calls. if (NumClones) - CloneFuncIfNeeded(NumClones); + CloneFuncIfNeeded(NumClones, FS); } else { @@ -5690,7 +5788,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { } #endif - CloneCallsite(StackNode, CB, CalledFunction); + CloneCallsite(StackNode, CB, CalledFunction, FS); } } else if (CB->isTailCall() && CalledFunction) { // Locate the synthesized callsite info for the callee VI, if any was @@ -5700,7 +5798,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); assert(Callsite != MapTailCallCalleeVIToCallsite.end()); - CloneCallsite(Callsite->second, CB, CalledFunction); + CloneCallsite(Callsite->second, CB, CalledFunction, FS); } } } @@ -5846,6 +5944,10 @@ void MemProfContextDisambiguation::performICP( // check. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); @@ -5891,6 +5993,10 @@ void MemProfContextDisambiguation::performICP( // TotalCount and the number promoted. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); diff --git a/llvm/test/CodeGen/AMDGPU/.#llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/.#llvm.amdgcn.smfmac.gfx950.ll new file mode 120000 index 0000000..8747bd5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/.#llvm.amdgcn.smfmac.gfx950.ll @@ -0,0 +1 @@ +matt@mattbookAMD.56897
\ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index 2cde060..a54dc9d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2031627 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2031627 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] @@ -94,7 +94,7 @@ entry: define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %8 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -106,7 +106,7 @@ entry: define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %8 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2424842 /* regdef:SReg_32 */, def %8 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -119,7 +119,7 @@ entry: define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %8, 2031626 /* regdef:VGPR_32 */, def %9 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8, 1835018 /* regdef:VGPR_32 */, def %9 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] @@ -136,7 +136,7 @@ define float @test_multiple_register_outputs_same() #0 { define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %8, 3735562 /* regdef:VReg_64 */, def %9 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8, 3473418 /* regdef:VReg_64 */, def %9 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) @@ -171,7 +171,7 @@ define amdgpu_kernel void @test_input_vgpr_imm() { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42) ret void @@ -185,7 +185,7 @@ define amdgpu_kernel void @test_input_sgpr_imm() { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) ret void @@ -212,7 +212,7 @@ define float @test_input_vgpr(i32 %src) nounwind { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %9, 2031625 /* reguse:VGPR_32 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %9, 1835017 /* reguse:VGPR_32 */, [[COPY1]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -227,7 +227,7 @@ define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 2031626 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -244,7 +244,7 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -256,13 +256,13 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %8 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2424842 /* regdef:SReg_32 */, def %8 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %10 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2424842 /* regdef:SReg_32 */, def %10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %12, 2621449 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2424842 /* regdef:SReg_32 */, def %12, 2424841 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12 ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -285,7 +285,7 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %11, 2031626 /* regdef:VGPR_32 */, def %12, 2031626 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %11, 1835018 /* regdef:VGPR_32 */, def %12, 1835018 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13 @@ -306,10 +306,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2621450 /* regdef:SReg_32 */, def %8 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2424842 /* regdef:SReg_32 */, def %8 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-ignore-copies-crash.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-ignore-copies-crash.mir index f2d3272..137488f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-ignore-copies-crash.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-ignore-copies-crash.mir @@ -24,7 +24,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %5(s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %5(s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[FMUL]], %5, [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) @@ -33,7 +33,7 @@ body: | %2:vgpr(s32) = COPY %1(s32) %3:vgpr(s32) = G_FMUL %0, %2 %4:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %5:vgpr_32 + INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %5:vgpr_32 %6:vgpr(s32) = COPY %4(s32) %7:vgpr(s32) = nnan G_AMDGPU_FMED3 %3(s32), %5(s32), %6(s32) $vgpr0 = COPY %7(s32) diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir index 035354c..8a39d9c 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir @@ -68,7 +68,7 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.7(0x7c000000) ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2621450 /* regdef:SReg_32 */, def renamable $sgpr4 + ; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2424842 /* regdef:SReg_32 */, def renamable $sgpr4 ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -149,7 +149,7 @@ body: | successors: %bb.3(0x04000000), %bb.2(0x7c000000) liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 - INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2621450 /* regdef:SReg_32 */, def renamable $sgpr4 + INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2424842 /* regdef:SReg_32 */, def renamable $sgpr4 S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc S_CBRANCH_SCC1 %bb.2, implicit killed $scc diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir index 584d6a5..a2f0205 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir @@ -69,7 +69,7 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.7(0x7c000000) ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2621450 /* regdef:SReg_32 */, def renamable $sgpr4 + ; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2424842 /* regdef:SReg_32 */, def renamable $sgpr4 ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -151,7 +151,7 @@ body: | successors: %bb.3(0x04000000), %bb.2(0x7c000000) liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 - INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2621450 /* regdef:SReg_32 */, def renamable $sgpr4 + INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2424842 /* regdef:SReg_32 */, def renamable $sgpr4 S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc S_CBRANCH_SCC1 %bb.2, implicit killed $scc diff --git a/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir b/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir index c475efb..52cdf5a 100644 --- a/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir +++ b/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir @@ -20,13 +20,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:areg_64 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1:areg_64 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 undef %2.sub0:areg_64 = COPY %0 %2.sub1:areg_64 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, killed %2 SI_RETURN ... @@ -45,13 +45,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 undef %2.sub0:areg_64_align2 = COPY %0 %2.sub1:areg_64_align2 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -72,7 +72,7 @@ body: | ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_96 = COPY [[COPY]] ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:areg_96 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub2:areg_96 = COPY [[COPY2]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY3]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 @@ -80,7 +80,7 @@ body: | undef %3.sub0:areg_96 = COPY %0 %3.sub1:areg_96 = COPY %1 %3.sub2:areg_96 = COPY %2 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %3 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %3 SI_RETURN ... @@ -101,7 +101,7 @@ body: | ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY2]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, [[COPY3]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 @@ -109,7 +109,7 @@ body: | undef %3.sub0:areg_96_align2 = COPY %0 %3.sub1:areg_96_align2 = COPY %1 %3.sub2:areg_96_align2 = COPY %2 - INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, %3 + INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, %3 SI_RETURN ... @@ -128,13 +128,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_128 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2_sub3:areg_128 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 undef %2.sub0_sub1:areg_128 = COPY %0 %2.sub2_sub3:areg_128 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, killed %2 SI_RETURN ... @@ -153,13 +153,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2_sub3:areg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 undef %2.sub0_sub1:areg_128_align2 = COPY %0 %2.sub2_sub3:areg_128_align2 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %2 SI_RETURN ... @@ -178,13 +178,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr9 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:sgpr_32 = COPY $sgpr8 %1:sgpr_32 = COPY $sgpr9 undef %2.sub0:areg_64_align2 = COPY %0 %2.sub1:areg_64_align2 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -203,13 +203,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:areg_96 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1_sub2:areg_96 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 %1:vreg_64 = COPY $vgpr1_vgpr2 undef %2.sub0:areg_96 = COPY %0 %2.sub1_sub2:areg_96 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %2 SI_RETURN ... @@ -228,13 +228,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1_sub2:areg_96_align2 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 %1:vreg_64 = COPY $vgpr1_vgpr2 undef %2.sub0:areg_96_align2 = COPY %0 %2.sub1_sub2:areg_96_align2 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -253,13 +253,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_96 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2:areg_96 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vgpr_32 = COPY $vgpr2 undef %2.sub0_sub1:areg_96 = COPY %0 %2.sub2:areg_96 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %2 SI_RETURN ... @@ -278,13 +278,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_96_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vgpr_32 = COPY $vgpr2 undef %2.sub0_sub1:areg_96_align2 = COPY %0 %2.sub2:areg_96_align2 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -302,12 +302,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 undef %2.sub0:areg_64 = COPY %0 %2.sub1:areg_64 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, killed %2 SI_RETURN ... @@ -326,13 +326,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 undef %2.sub0:areg_64_align2 = COPY %0 %2.sub1:areg_64_align2 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -350,12 +350,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 undef %1.sub0:areg_96 = COPY %0 %1.sub1:areg_96 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %1 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %1 SI_RETURN ... @@ -373,12 +373,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 undef %1.sub0:areg_96_align2 = COPY %0 %1.sub1:areg_96_align2 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, %1 + INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, %1 SI_RETURN ... @@ -398,14 +398,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128 = COPY [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128 = COPY [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 undef %1.sub0:areg_128 = COPY %0 %1.sub1:areg_128 = COPY %0 %1.sub2:areg_128 = COPY %0 %1.sub3:areg_128 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, killed %1 + INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, killed %1 SI_RETURN ... @@ -425,14 +425,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 undef %1.sub0:areg_128_align2 = COPY %0 %1.sub1:areg_128_align2 = COPY %0 %1.sub2:areg_128_align2 = COPY %0 %1.sub3:areg_128_align2 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %1 + INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %1 SI_RETURN ... @@ -451,15 +451,15 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:areg_64 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1:areg_64 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY2]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 undef %2.sub0:areg_64 = COPY %0 %2.sub1:areg_64 = COPY %1 - INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, killed %2 - INLINEASM &"; use $0", 0 /* attdialect */, 2031625 /* reguse:VGPR_32 */, killed %0 + INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 1835017 /* reguse:VGPR_32 */, killed %0 SI_RETURN ... @@ -477,14 +477,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 undef %1.sub0:areg_64 = COPY %0 %1.sub1:areg_64 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, killed %1 - INLINEASM &"; use $0", 0 /* attdialect */, 2031625 /* reguse:VGPR_32 */, killed %0 + INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, killed %1 + INLINEASM &"; use $0", 0 /* attdialect */, 1835017 /* reguse:VGPR_32 */, killed %0 SI_RETURN ... @@ -503,16 +503,16 @@ body: | ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY [[COPY]].sub0 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:VReg_64 */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:VReg_64 */, [[COPY]] ; CHECK-NEXT: SI_RETURN %0:vgpr_32 = COPY $vgpr0 undef %1.sub0:areg_64 = COPY %0 %1.sub1:areg_64 = COPY %0 undef %2.sub0:vreg_64 = COPY %0 %2.sub1:vreg_64 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, killed %1 - INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:VReg_64 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, killed %1 + INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:VReg_64 */, killed %2 SI_RETURN ... @@ -533,13 +533,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr1 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]].sub1 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_64 = COPY $vgpr0 %0.sub1:vreg_64 = COPY $vgpr1 undef %2.sub0:areg_64 = COPY %0.sub0 %2.sub1:areg_64 = COPY %0.sub1 - INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, killed %2 SI_RETURN ... @@ -558,13 +558,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr1 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY]].sub1 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_64 = COPY $vgpr0 %0.sub1:vreg_64 = COPY $vgpr1 undef %2.sub0:areg_64_align2 = COPY %0.sub0 %2.sub1:areg_64_align2 = COPY %0.sub1 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -585,7 +585,7 @@ body: | ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_96 =COPY $vgpr0 %0.sub1:vreg_96 = COPY $vgpr1 @@ -593,7 +593,7 @@ body: | undef %3.sub0:areg_96 = COPY %0.sub0 %3.sub1:areg_96 = COPY %0.sub1 %3.sub2:areg_96 = COPY %0.sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %3 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %3 SI_RETURN ... @@ -614,7 +614,7 @@ body: | ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_96 =COPY $vgpr0 %0.sub1:vreg_96 = COPY $vgpr1 @@ -622,7 +622,7 @@ body: | undef %3.sub0:areg_96_align2 = COPY %0.sub0 %3.sub1:areg_96_align2 = COPY %0.sub1 %3.sub2:areg_96_align2 = COPY %0.sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, %3 + INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, %3 SI_RETURN ... @@ -641,13 +641,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128 = COPY [[COPY]].sub0_sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128 = COPY [[COPY]].sub2_sub3 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0_sub1:vreg_128 =COPY $vgpr0_vgpr1 %0.sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3 undef %2.sub0_sub1:areg_128 = COPY %0.sub0_sub1 %2.sub2_sub3:areg_128 = COPY %0.sub2_sub3 - INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, killed %2 SI_RETURN ... @@ -668,13 +668,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_128 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128_align2 = COPY [[COPY]].sub1 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_128 =COPY $vgpr0_vgpr1 %0.sub1:vreg_128 = COPY $vgpr2_vgpr3 undef %2.sub0_sub1:areg_128_align2 = COPY %0.sub0 %2.sub2_sub3:areg_128_align2 = COPY %0.sub1 - INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %2 SI_RETURN ... @@ -693,13 +693,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:sreg_64 = COPY $sgpr9 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY]].sub1 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:sreg_64 = COPY $sgpr8 %0.sub1:sreg_64 = COPY $sgpr9 undef %2.sub0:areg_64_align2 = COPY %0.sub0 %2.sub1:areg_64_align2 = COPY %0.sub1 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -718,13 +718,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:areg_96 = COPY [[COPY]].sub1_sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_96 =COPY $vgpr0 %0.sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2 undef %2.sub0:areg_96 = COPY %0.sub0 %2.sub1_sub2:areg_96 = COPY %0.sub1_sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %2 SI_RETURN ... @@ -743,13 +743,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:areg_96_align2 = COPY [[COPY]].sub1_sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_96 =COPY $vgpr0 %0.sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2 undef %2.sub0:areg_96_align2 = COPY %0.sub0 %2.sub1_sub2:areg_96_align2 = COPY %0.sub1_sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -768,13 +768,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96 = COPY [[COPY]].sub0_sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1 %0.sub2:vreg_96 = COPY $vgpr2 undef %2.sub0_sub1:areg_96 = COPY %0.sub0_sub1 %2.sub2:areg_96 = COPY %0.sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %2 SI_RETURN ... @@ -793,13 +793,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96_align2 = COPY [[COPY]].sub0_sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1 %0.sub2:vreg_96 = COPY $vgpr2 undef %2.sub0_sub1:areg_96_align2 = COPY %0.sub0_sub1 %2.sub2:areg_96_align2 = COPY %0.sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -817,12 +817,12 @@ body: | ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]].sub0 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_64 = COPY $vgpr0 undef %2.sub0:areg_64 = COPY %0.sub0 %2.sub1:areg_64 = COPY %0.sub0 - INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, killed %2 SI_RETURN ... @@ -841,13 +841,13 @@ body: | ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub0 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_64 = COPY $vgpr0 undef %1.sub0:areg_96 = COPY %0.sub0 %1.sub1:areg_96 = COPY %0.sub0 %1.sub2:areg_96 = COPY %0.sub0 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %1 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %1 SI_RETURN ... @@ -865,12 +865,12 @@ body: | ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]].sub0 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_64 = COPY $vgpr0 undef %1.sub0:areg_96_align2 = COPY %0.sub0 %1.sub1:areg_96_align2 = COPY %0.sub0 - INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, %1 + INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, %1 SI_RETURN ... @@ -890,14 +890,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128 = COPY [[COPY]].sub0 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_64 = COPY $vgpr0 undef %1.sub0:areg_128 = COPY %0.sub0 %1.sub1:areg_128 = COPY %0.sub0 %1.sub2:areg_128 = COPY %0.sub0 %1.sub3:areg_128 = COPY %0.sub0 - INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, killed %1 + INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, killed %1 SI_RETURN ... @@ -917,14 +917,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128_align2 = COPY [[COPY]].sub0 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_64 = COPY $vgpr0 undef %1.sub0:areg_128_align2 = COPY %0.sub0 %1.sub1:areg_128_align2 = COPY %0.sub0 %1.sub2:areg_128_align2 = COPY %0.sub0 %1.sub3:areg_128_align2 = COPY %0.sub0 - INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %1 + INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %1 SI_RETURN ... @@ -943,13 +943,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr1 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]].sub1 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_64 = COPY $vgpr0 %0.sub1:vreg_64 = COPY $vgpr1 undef %2.sub0:areg_64 = COPY %0.sub0 %2.sub1:areg_64 = COPY %0.sub1 - INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, killed %2 SI_RETURN ... @@ -968,13 +968,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64_align2 = COPY $vgpr1 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY]].sub1 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_64_align2 = COPY $vgpr0 %0.sub1:vreg_64_align2 = COPY $vgpr1 undef %2.sub0:areg_64_align2 = COPY %0.sub0 %2.sub1:areg_64_align2 = COPY %0.sub1 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -995,7 +995,7 @@ body: | ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_96 = COPY $vgpr0 %0.sub1:vreg_96 = COPY $vgpr1 @@ -1003,7 +1003,7 @@ body: | undef %3.sub0:areg_96 = COPY %0.sub0 %3.sub1:areg_96 = COPY %0.sub1 %3.sub2:areg_96 = COPY %0.sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %3 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %3 SI_RETURN ... @@ -1024,7 +1024,7 @@ body: | ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_96_align2 = COPY $vgpr0 %0.sub1:vreg_96_align2 = COPY $vgpr1 @@ -1032,7 +1032,7 @@ body: | undef %3.sub0:areg_96_align2 = COPY %0.sub0 %3.sub1:areg_96_align2 = COPY %0.sub1 %3.sub2:areg_96_align2 = COPY %0.sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, %3 + INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, %3 SI_RETURN ... @@ -1051,13 +1051,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128 = COPY [[COPY]].sub0_sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128 = COPY [[COPY]].sub2_sub3 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0_sub1:vreg_128 = COPY $vgpr0_vgpr1 %0.sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3 undef %2.sub0_sub1:areg_128 = COPY %0.sub0_sub1 %2.sub2_sub3:areg_128 = COPY %0.sub2_sub3 - INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, killed %2 SI_RETURN ... @@ -1076,13 +1076,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2_sub3:vreg_128_align2 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[COPY]].sub0_sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128_align2 = COPY [[COPY]].sub2_sub3 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0_sub1:vreg_128_align2 = COPY $vgpr0_vgpr1 %0.sub2_sub3:vreg_128_align2 = COPY $vgpr2_vgpr3 undef %2.sub0_sub1:areg_128_align2 = COPY %0.sub0_sub1 %2.sub2_sub3:areg_128_align2 = COPY %0.sub2_sub3 - INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %2 SI_RETURN ... @@ -1101,13 +1101,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:sreg_64 = COPY $sgpr9 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY]].sub1 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:sreg_64 = COPY $sgpr8 %0.sub1:sreg_64 = COPY $sgpr9 undef %2.sub0:areg_64_align2 = COPY %0.sub0 %2.sub1:areg_64_align2 = COPY %0.sub1 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -1126,13 +1126,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:areg_96 = COPY [[COPY]].sub1_sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_96 = COPY $vgpr0 %0.sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2 undef %2.sub0:areg_96 = COPY %0.sub0 %2.sub1_sub2:areg_96 = COPY %0.sub1_sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %2 SI_RETURN ... @@ -1150,13 +1150,13 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:areg_96 = COPY [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1_sub2:areg_96 = COPY [[COPY1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY2]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY2]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_96 = COPY $vgpr0 %0.sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2 undef %2.sub0:areg_96 = COPY %0.sub2 %2.sub1_sub2:areg_96 = COPY %0.sub0_sub1 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %2 SI_RETURN ... @@ -1176,13 +1176,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1_sub2:vreg_96_align2 = COPY $vgpr1_vgpr2 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:areg_96_align2 = COPY [[COPY]].sub1_sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0:vreg_96_align2 = COPY $vgpr0 %0.sub1_sub2:vreg_96_align2 = COPY $vgpr1_vgpr2 undef %2.sub0:areg_96_align2 = COPY %0.sub0 %2.sub1_sub2:areg_96_align2 = COPY %0.sub1_sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -1201,13 +1201,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96 = COPY [[COPY]].sub0_sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1 %0.sub2:vreg_96 = COPY $vgpr2 undef %2.sub0_sub1:areg_96 = COPY %0.sub0_sub1 %2.sub2:areg_96 = COPY %0.sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %2 SI_RETURN ... @@ -1226,13 +1226,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96_align2 = COPY $vgpr2 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96_align2 = COPY [[COPY]].sub0_sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0_sub1:vreg_96_align2 = COPY $vgpr0_vgpr1 %0.sub2:vreg_96_align2 = COPY $vgpr2 undef %2.sub0_sub1:areg_96_align2 = COPY %0.sub0_sub1 %2.sub2:areg_96_align2 = COPY %0.sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -1251,13 +1251,13 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96_align2 = COPY [[COPY]].sub0_sub1 ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN undef %0.sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1 %0.sub2:vreg_96 = COPY $vgpr2 undef %2.sub0_sub1:areg_96_align2 = COPY %0.sub0_sub1 %2.sub2:areg_96_align2 = COPY %0.sub2 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -1274,11 +1274,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_64 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vreg_64 = COPY $vgpr0_vgpr1 %2:areg_64 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 4325385 /* reguse:AReg_64 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4063241 /* reguse:AReg_64 */, killed %2 SI_RETURN ... @@ -1295,11 +1295,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_64_align2 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr0_vgpr1 %2:areg_64_align2 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -1316,11 +1316,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_96 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 %3:areg_96 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 6422537 /* reguse:AReg_96 */, %3 + INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96 */, %3 SI_RETURN ... @@ -1337,11 +1337,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_96_align2 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vreg_96_align2 = COPY $vgpr0_vgpr1_vgpr2 %3:areg_96_align2 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 6750217 /* reguse:AReg_96_Align2 */, %3 + INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_96_Align2 */, %3 SI_RETURN ... @@ -1358,11 +1358,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %2:areg_128 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 8847369 /* reguse:AReg_128 */, killed %2 + INLINEASM &"; use $0", 0 /* attdialect */, 8585225 /* reguse:AReg_128 */, killed %2 SI_RETURN ... @@ -1379,11 +1379,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vreg_128_align2 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %2:areg_128_align2 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %2 SI_RETURN ... @@ -1400,11 +1400,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_64_align2 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:sreg_64 = COPY $sgpr8_sgpr9 %2:areg_64_align2 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... @@ -1421,11 +1421,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_96_align2 = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY1]] ; CHECK-NEXT: SI_RETURN %0:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 %2:areg_96_align2 = COPY %0 - INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %2 + INLINEASM &"; use $0", 0 /* attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %2 SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir b/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir index c07c9ef..126cbc6 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir @@ -20,10 +20,10 @@ body: | ; CHECK-LABEL: name: foo1 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def undef %2.sub0, 2031627 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 - INLINEASM &"", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %0:vgpr_32, 2031627 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32 + INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0:vgpr_32, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32 undef %2.sub0:vreg_64 = COPY killed %0 %2.sub1:vreg_64 = COPY killed %1 FLAT_STORE_DWORDX2 killed $vgpr0_vgpr1, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: foo2 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 2031627 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 2031626 /* regdef:VGPR_32 */, def undef %2.sub0 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 - INLINEASM &"", 0 /* attdialect */, 2031627 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32, 2031626 /* regdef:VGPR_32 */, def %0:vgpr_32 + INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %0:vgpr_32 undef %2.sub0:vreg_64 = COPY killed %0 %2.sub1:vreg_64 = COPY killed %1 FLAT_STORE_DWORDX2 killed $vgpr0_vgpr1, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) @@ -62,10 +62,10 @@ body: | ; CHECK-LABEL: name: foo3 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def undef %2.sub0, 2031627 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 - INLINEASM &"", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %1:vgpr_32, 2031627 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32 + INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1:vgpr_32, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32 undef %2.sub0:vreg_64 = COPY killed %1 %2.sub1:vreg_64 = COPY killed %0 FLAT_STORE_DWORDX2 killed $vgpr0_vgpr1, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) @@ -83,10 +83,10 @@ body: | ; CHECK-LABEL: name: foo4 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 2031627 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 2031626 /* regdef:VGPR_32 */, def undef %2.sub0 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 - INLINEASM &"", 0 /* attdialect */, 2031627 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32, 2031626 /* regdef:VGPR_32 */, def %1:vgpr_32 + INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %1:vgpr_32 undef %2.sub0:vreg_64 = COPY killed %1 %2.sub1:vreg_64 = COPY killed %0 FLAT_STORE_DWORDX2 killed $vgpr0_vgpr1, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) diff --git a/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir index 1635b09..4dfdb56 100644 --- a/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir @@ -370,7 +370,7 @@ body: | ; HAZARD-LABEL: name: inline_sdwa_hazard ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode ; HAZARD-NEXT: {{ $}} - ; HAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + ; HAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 ; HAZARD-NEXT: S_NOP 0 ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) ; HAZARD-NEXT: S_ENDPGM 0 @@ -378,10 +378,10 @@ body: | ; NOHAZARD-LABEL: name: inline_sdwa_hazard ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode ; NOHAZARD-NEXT: {{ $}} - ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) ; NOHAZARD-NEXT: S_ENDPGM 0 - INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) S_ENDPGM 0 ... @@ -397,17 +397,17 @@ body: | ; HAZARD-NEXT: {{ $}} ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) ; HAZARD-NEXT: S_NOP 0 - ; HAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + ; HAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 ; HAZARD-NEXT: S_ENDPGM 0 ; ; NOHAZARD-LABEL: name: sdwa_inline_hazard ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode ; NOHAZARD-NEXT: {{ $}} ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) - ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 ; NOHAZARD-NEXT: S_ENDPGM 0 renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) - INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 S_ENDPGM 0 ... @@ -421,19 +421,19 @@ body: | ; HAZARD-LABEL: name: inline_inline_hazard ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode ; HAZARD-NEXT: {{ $}} - ; HAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + ; HAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 ; HAZARD-NEXT: S_NOP 0 - ; HAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + ; HAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 ; HAZARD-NEXT: S_ENDPGM 0 ; ; NOHAZARD-LABEL: name: inline_inline_hazard ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode ; NOHAZARD-NEXT: {{ $}} - ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 - ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 + ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 ; NOHAZARD-NEXT: S_ENDPGM 0 - INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 - INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0, 2031625 /* reguse:VGPR_32 */, $vgpr1 + INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 + INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0, 1835017 /* reguse:VGPR_32 */, $vgpr1 S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir index 97f8fd6..6b9b77c 100644 --- a/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir +++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir @@ -1112,11 +1112,11 @@ body: | ; GCN-NEXT: S_WAITCNT 0 ; GCN-NEXT: renamable $vgpr2 = V_CVT_SCALEF32_PK_FP4_F16_e64 8, killed $vgpr0, 0, killed $vgpr1, 4, killed $vgpr2, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, killed renamable $vgpr2 + ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, killed renamable $vgpr2 ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31 S_WAITCNT 0 renamable $vgpr2 = V_CVT_SCALEF32_PK_FP4_F16_e64 8, killed $vgpr0, 0, killed $vgpr1, 4, killed $vgpr2, 0, implicit $mode, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, killed renamable $vgpr2 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, killed renamable $vgpr2 S_SETPC_B64_return undef $sgpr30_sgpr31 ... diff --git a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir index 63db24a..2573acd 100644 --- a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir +++ b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir @@ -486,7 +486,7 @@ body: | ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; CHECK-NEXT: INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 40239113 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 39976969 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: S_ENDPGM 0 bb.0: S_NOP 0, implicit-def $agpr0 @@ -516,7 +516,7 @@ body: | S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 - INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 40239113 /* reguse:VReg_512_Align2 */, %0:vreg_512_align2 + INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 39976969 /* reguse:VReg_512_Align2 */, %0:vreg_512_align2 S_ENDPGM 0 ... @@ -1368,7 +1368,7 @@ body: | ; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1) ; CHECK-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 40239113 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 39976969 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} @@ -1408,7 +1408,7 @@ body: | undef %2.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1) early-clobber %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %2, 0, 0, 0, implicit $mode, implicit $exec early-clobber %4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 40239113 /* reguse:VReg_512_Align2 */, %4 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 39976969 /* reguse:VReg_512_Align2 */, %4 S_CBRANCH_VCCNZ %bb.1, implicit $vcc S_BRANCH %bb.2 @@ -1726,7 +1726,7 @@ body: | ; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1) ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 40239113 /* reguse:VReg_512_Align2 */, renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 39976969 /* reguse:VReg_512_Align2 */, renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} @@ -1763,7 +1763,7 @@ body: | undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1) %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec %4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 40239113 /* reguse:VReg_512_Align2 */, %4 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 39976969 /* reguse:VReg_512_Align2 */, %4 S_CBRANCH_VCCNZ %bb.1, implicit $vcc S_BRANCH %bb.2 diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll index 5b3e486..32b1b9b 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,16 +8,16 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10682378 /* regdef:SGPR_128 */, def %13 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10420234 /* regdef:SGPR_128 */, def %13 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %13 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 10682377 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 10420233 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10682378 /* regdef:SGPR_128 */, def %11 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10420234 /* regdef:SGPR_128 */, def %11 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %11 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 10682377 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 10420233 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -27,16 +27,16 @@ define amdgpu_kernel void @s_input_output_i128() { define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7929866 /* regdef:VReg_128 */, def %13 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7667722 /* regdef:VReg_128 */, def %13 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %13 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7929865 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7667721 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 8257546 /* regdef:VReg_128_Align2 */, def %11 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7995402 /* regdef:VReg_128_Align2 */, def %11 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %11 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 8257545 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7995401 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -47,16 +47,16 @@ define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 8847370 /* regdef:AReg_128 */, def %13 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 8585226 /* regdef:AReg_128 */, def %13 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %13 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 8847369 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 8585225 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 9568266 /* regdef:AReg_128_Align2 */, def %11 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 9306122 /* regdef:AReg_128_Align2 */, def %11 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %11 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir index 8acf32e..e6a5234 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir @@ -18,21 +18,21 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], 256, 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 256, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 512, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX12-NEXT: SI_RETURN %0:vgpr_32 = V_ADD_U32_e64 %stack.0, 256, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e64 %stack.0, 512, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -53,27 +53,27 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, [[V_ADD_U32_e64_]], 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], -156, 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, %stack.0, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 512, %stack.0, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX12-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 100, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]] ; GFX12-NEXT: SI_RETURN %0:vgpr_32 = V_ADD_U32_e64 256, %stack.0, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e64 512, %stack.0, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 %2:vgpr_32 = V_ADD_U32_e64 %stack.0, 100, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %2 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %2 SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir index 1690826..98b7f4a 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir @@ -21,9 +21,9 @@ body: | ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX803-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_CO_U32_e64_]] - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[V_ADD_CO_U32_e64_]], implicit-def dead $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets @@ -31,9 +31,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[V_ADD_U32_e64_]], implicit-def dead $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets @@ -41,10 +41,10 @@ body: | ; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[COPY1]], implicit-def dead $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets @@ -52,9 +52,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[V_ADD_U32_e64_]], implicit-def dead $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets @@ -62,15 +62,15 @@ body: | ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[COPY1]], implicit-def dead $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX12-NEXT: SI_RETURN %0:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -88,42 +88,42 @@ body: | bb.0: ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc ; GFX803: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX803-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc ; GFX900: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX900-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc ; GFX942: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX942-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc ; GFX10: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX10-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc ; GFX12: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc ; GFX12-NEXT: SI_RETURN %0:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1, implicit $vcc + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1, implicit $vcc SI_RETURN ... @@ -144,9 +144,9 @@ body: | ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX803-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_CO_U32_e64_]] - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[V_ADD_CO_U32_e64_]], implicit-def dead $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets @@ -154,9 +154,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[V_ADD_U32_e64_]], implicit-def dead $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets @@ -164,10 +164,10 @@ body: | ; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[COPY1]], implicit-def dead $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets @@ -175,9 +175,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[V_ADD_U32_e64_]], implicit-def dead $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets @@ -185,15 +185,15 @@ body: | ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[COPY1]], implicit-def dead $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX12-NEXT: SI_RETURN %0:vgpr_32 = V_ADD_CO_U32_e32 8, %stack.0, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_CO_U32_e32 16, %stack.0, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -214,9 +214,9 @@ body: | ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX803-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_CO_U32_e64_]] - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX803-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[V_ADD_CO_U32_e64_]], 0, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets @@ -224,9 +224,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX900-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[V_ADD_U32_e64_]], 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets @@ -234,9 +234,9 @@ body: | ; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX942-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[S_ADD_I32_]], 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets @@ -244,9 +244,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[V_ADD_U32_e64_]], 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets @@ -254,14 +254,14 @@ body: | ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[S_ADD_I32_]], 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX12-NEXT: SI_RETURN %0:vgpr_32, dead %2:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -279,42 +279,42 @@ body: | bb.0: ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc ; GFX803: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX803-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX803-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]] ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc ; GFX900: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX900-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX900-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]] ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc ; GFX942: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX942-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX942-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]] ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX10-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]] ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc ; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX12-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]] %0:vgpr_32, %2:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN implicit %2 ... @@ -332,42 +332,42 @@ body: | bb.0: ; GFX803-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets ; GFX803: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX803-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets ; GFX900: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX900-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets ; GFX942: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX10-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets ; GFX12: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX12-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX12-NEXT: SI_RETURN %0:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %0 %1:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %1 SI_RETURN ... @@ -385,42 +385,42 @@ body: | bb.0: ; GFX803-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets ; GFX803: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX803-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets ; GFX900: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX900-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets ; GFX942: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX10-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets ; GFX12: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX12-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX12-NEXT: SI_RETURN %0:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %0 %1:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %1 SI_RETURN ... @@ -443,9 +443,9 @@ body: | ; GFX803-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX803-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX803-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def dead $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX803-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], %stack.0, implicit-def dead $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets @@ -454,9 +454,9 @@ body: | ; GFX900-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX900-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def dead $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX900-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], %stack.0, implicit-def dead $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets @@ -465,9 +465,9 @@ body: | ; GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def dead $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], %stack.0, implicit-def dead $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets @@ -476,9 +476,9 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX10-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def dead $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX10-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], %stack.0, implicit-def dead $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets @@ -487,17 +487,17 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def dead $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX12-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], %stack.0, implicit-def dead $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX12-NEXT: SI_RETURN %0:sreg_32 = COPY $sgpr4 %1:sreg_32 = COPY $sgpr5 %2:sreg_32 = S_ADD_I32 %0, %stack.0, implicit-def dead $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %2 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %2 %3:sreg_32 = S_ADD_I32 %1, %stack.0, implicit-def dead $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %3 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %3 SI_RETURN ... @@ -520,9 +520,9 @@ body: | ; GFX803-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX803-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX803-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY]], implicit-def dead $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX803-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY1]], implicit-def dead $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets_commute @@ -531,9 +531,9 @@ body: | ; GFX900-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX900-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY]], implicit-def dead $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX900-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY1]], implicit-def dead $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets_commute @@ -542,9 +542,9 @@ body: | ; GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY]], implicit-def dead $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY1]], implicit-def dead $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets_commute @@ -553,9 +553,9 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX10-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY]], implicit-def dead $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX10-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY1]], implicit-def dead $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets_commute @@ -564,17 +564,17 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY]], implicit-def dead $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX12-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY1]], implicit-def dead $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX12-NEXT: SI_RETURN %0:sreg_32 = COPY $sgpr4 %1:sreg_32 = COPY $sgpr5 %2:sreg_32 = S_ADD_I32 %stack.0, %0, implicit-def dead $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %2 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %2 %3:sreg_32 = S_ADD_I32 %stack.0, %1, implicit-def dead $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %3 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %3 SI_RETURN ... @@ -592,48 +592,48 @@ body: | bb.0: ; GFX803-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc ; GFX803: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX803-NEXT: S_NOP 0, implicit $scc ; GFX803-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX803-NEXT: SI_RETURN implicit $scc ; ; GFX900-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc ; GFX900: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX900-NEXT: S_NOP 0, implicit $scc ; GFX900-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN implicit $scc ; ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc ; GFX942: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX942-NEXT: S_NOP 0, implicit $scc ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX942-NEXT: SI_RETURN implicit $scc ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX10-NEXT: S_NOP 0, implicit $scc ; GFX10-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX10-NEXT: SI_RETURN implicit $scc ; ; GFX12-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc ; GFX12: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_]] ; GFX12-NEXT: S_NOP 0, implicit $scc ; GFX12-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX12-NEXT: SI_RETURN implicit $scc %0:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %0 S_NOP 0, implicit $scc %1:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2621449 /* reguse:SReg_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:SReg_32 */, %1 SI_RETURN implicit $scc ... @@ -656,9 +656,9 @@ body: | ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX803-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX803-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets @@ -667,9 +667,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX900-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets @@ -678,9 +678,9 @@ body: | ; GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 ; GFX942-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX942-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets @@ -689,9 +689,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX10-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets @@ -700,15 +700,15 @@ body: | ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 ; GFX12-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX12-NEXT: SI_RETURN %vgpr_offset:vgpr_32 = COPY $vgpr0 %0:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -731,9 +731,9 @@ body: | ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX803-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX803-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute @@ -742,9 +742,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX900-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute @@ -753,9 +753,9 @@ body: | ; GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 ; GFX942-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX942-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute @@ -764,9 +764,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX10-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute @@ -775,15 +775,15 @@ body: | ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 ; GFX12-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX12-NEXT: SI_RETURN %vgpr_offset:vgpr_32 = COPY $vgpr0 %0:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %vgpr_offset, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %vgpr_offset, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -805,9 +805,9 @@ body: | ; GFX803-NEXT: {{ $}} ; GFX803-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX803-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets @@ -815,9 +815,9 @@ body: | ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX900-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets @@ -825,9 +825,9 @@ body: | ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX942-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets @@ -836,9 +836,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX10-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets @@ -848,16 +848,16 @@ body: | ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, [[COPY]], implicit-def dead $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, [[COPY1]], implicit-def dead $vcc, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX12-NEXT: SI_RETURN %sgpr_offset:sreg_32 = COPY $sgpr8 %0:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -880,9 +880,9 @@ body: | ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX803-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX803-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets @@ -891,9 +891,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX900-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX900-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets @@ -903,10 +903,10 @@ body: | ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX942-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[COPY]], 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX942-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[COPY1]], 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets @@ -915,9 +915,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets @@ -926,15 +926,15 @@ body: | ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[S_MOV_B32_]], 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[S_MOV_B32_]], 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX12-NEXT: SI_RETURN %sgpr_offset:sreg_32 = COPY $sgpr8 %0:vgpr_32, dead %2:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -957,9 +957,9 @@ body: | ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX803-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX803-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX803-NEXT: SI_RETURN ; ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute @@ -968,9 +968,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX900-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX900-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute @@ -980,10 +980,10 @@ body: | ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY %sgpr_offset ; GFX942-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[COPY]], 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY %sgpr_offset ; GFX942-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[COPY1]], 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute @@ -992,9 +992,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute @@ -1003,15 +1003,15 @@ body: | ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], %sgpr_offset, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX12-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], %sgpr_offset, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX12-NEXT: SI_RETURN %sgpr_offset:sreg_32 = COPY $sgpr8 %0:vgpr_32, dead %2:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir index 9183fe5..19ca463 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir @@ -20,16 +20,16 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, [[V_ADD_U32_e64_]], implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets ; GFX942: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, %stack.0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 512, %stack.0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets @@ -37,21 +37,21 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, [[V_ADD_U32_e64_]], implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets ; GFX12: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, %stack.0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 512, %stack.0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX12-NEXT: SI_RETURN %0:vgpr_32 = V_ADD_U32_e32 256, %stack.0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e32 512, %stack.0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -72,16 +72,16 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, [[V_ADD_U32_e64_]], implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets ; GFX942: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, %stack.0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 16, %stack.0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets @@ -89,21 +89,21 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, [[V_ADD_U32_e64_]], implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets ; GFX12: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, %stack.0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 16, %stack.0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX12-NEXT: SI_RETURN %0:vgpr_32 = V_ADD_U32_e32 8, %stack.0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e32 16, %stack.0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -124,16 +124,16 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX900-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 8, [[V_ADD_U32_e64_]], 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets ; GFX942: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX942-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets @@ -141,21 +141,21 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 8, [[V_ADD_U32_e64_]], 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX12-NEXT: SI_RETURN %0:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -178,9 +178,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX900-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets @@ -188,9 +188,9 @@ body: | ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX942-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets @@ -199,9 +199,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX10-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets @@ -209,15 +209,15 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX12-NEXT: SI_RETURN %vgpr_offset:vgpr_32 = COPY $vgpr0 %0:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -240,9 +240,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX900-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute @@ -250,9 +250,9 @@ body: | ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX942-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute @@ -261,9 +261,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX10-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute @@ -271,15 +271,15 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 ; GFX12-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX12-NEXT: SI_RETURN %vgpr_offset:vgpr_32 = COPY $vgpr0 %0:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -301,9 +301,9 @@ body: | ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX900-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets @@ -311,9 +311,9 @@ body: | ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX942-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets @@ -322,9 +322,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX10-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets @@ -332,15 +332,15 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX12-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX12-NEXT: SI_RETURN %sgpr_offset:sreg_32 = COPY $sgpr8 %0:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -363,9 +363,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX900-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets @@ -373,9 +373,9 @@ body: | ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX942-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX942-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets @@ -384,9 +384,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets @@ -394,15 +394,15 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX12-NEXT: SI_RETURN %sgpr_offset:sreg_32 = COPY $sgpr8 %0:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -425,9 +425,9 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX900-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute @@ -435,9 +435,9 @@ body: | ; GFX942-NEXT: {{ $}} ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX942-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX942-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute @@ -446,9 +446,9 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute @@ -456,15 +456,15 @@ body: | ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX12-NEXT: SI_RETURN %sgpr_offset:sreg_32 = COPY $sgpr8 %0:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... @@ -486,16 +486,16 @@ body: | ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX900-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 8, [[V_ADD_U32_e64_]], 1, implicit $exec - ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX900-NEXT: SI_RETURN ; ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier ; GFX942: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 1, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX942-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 1, implicit $exec - ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier @@ -503,21 +503,21 @@ body: | ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]] - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[COPY]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 8, [[V_ADD_U32_e64_]], 1, implicit $exec - ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX10-NEXT: SI_RETURN ; ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 1, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 1, implicit $exec - ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX12-NEXT: SI_RETURN %0:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, /*clamp*/1, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %0 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %0 %1:vgpr_32 = V_ADD_U32_e64 16, %stack.0, /*clamp*/1, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %1 SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir index 8f228b7..c117473 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir @@ -6429,7 +6429,7 @@ body: | ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %22, 2031625 /* reguse:VGPR_32 */, [[V_CVT_I32_F64_e32_4]] + ; GFX908-NEXT: INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %22, 1835017 /* reguse:VGPR_32 */, [[V_CVT_I32_F64_e32_4]] ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.1: ; GFX908-NEXT: successors: %bb.2(0x80000000) @@ -6478,7 +6478,7 @@ body: | %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0 %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0 %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0 - INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %22:vgpr_32, 2031625 /* reguse:VGPR_32 */, %4 + INLINEASM &"v_or_b32 $0, 0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %22:vgpr_32, 1835017 /* reguse:VGPR_32 */, %4 %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir index c19d5a6..8b2c588 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir @@ -33,7 +33,7 @@ name: asm_write_vgpr_accvgpr_write_read body: | bb.0: - INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0 + INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0 $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ... @@ -47,7 +47,7 @@ name: asm_write_vgpr_accvgpr_write_read_partialnop body: | bb.0: - INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr0 + INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr0 S_NOP 0 $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ... @@ -60,7 +60,7 @@ name: asm_write_vgpr_accvgpr_write_read_otherreg body: | bb.0: liveins: $vgpr0 - INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def $vgpr1 + INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def $vgpr1 $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll index f88b1bf..95638c7 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -11,10 +11,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX908: bb.0 (%ir-block.0): ; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5 ; REGALLOC-GFX908-NEXT: {{ $}} - ; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, undef %6:agpr_32 - ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7929866 /* regdef:VReg_128 */, def %25 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:AGPR_32 */, undef %6:agpr_32 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7667722 /* regdef:VReg_128 */, def %25 ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %25 - ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3735562 /* regdef:VReg_64 */, def %27 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3473418 /* regdef:VReg_64 */, def %27 ; REGALLOC-GFX908-NEXT: SI_SPILL_AV64_SAVE %27, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]] ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1) @@ -36,10 +36,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX908-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; PEI-GFX908-NEXT: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; PEI-GFX908-NEXT: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 - ; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, undef renamable $agpr0 - ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7929866 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 + ; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:AGPR_32 */, undef renamable $agpr0 + ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7667722 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec - ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3735562 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 + ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3473418 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 ; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec @@ -60,10 +60,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX90A: bb.0 (%ir-block.0): ; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5 ; REGALLOC-GFX90A-NEXT: {{ $}} - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, undef %6:agpr_32 - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 8257546 /* regdef:VReg_128_Align2 */, def %23 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:AGPR_32 */, undef %6:agpr_32 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7995402 /* regdef:VReg_128_Align2 */, def %23 ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %23 - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3997706 /* regdef:VReg_64_Align2 */, def %21 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3735562 /* regdef:VReg_64_Align2 */, def %21 ; REGALLOC-GFX90A-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY %21 ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1) ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) @@ -79,10 +79,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX90A: bb.0 (%ir-block.0): ; PEI-GFX90A-NEXT: liveins: $sgpr4_sgpr5 ; PEI-GFX90A-NEXT: {{ $}} - ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, undef renamable $agpr0 - ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 8257546 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 + ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:AGPR_32 */, undef renamable $agpr0 + ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7995402 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec - ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3997706 /* regdef:VReg_64_Align2 */, def renamable $vgpr2_vgpr3 + ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3735562 /* regdef:VReg_64_Align2 */, def renamable $vgpr2_vgpr3 ; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1) ; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir index 80afe7a..94aa6fe 100644 --- a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir @@ -43,17 +43,17 @@ machineFunctionInfo: body: | bb.0: - INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2424842 /* regdef:AGPR_32 */, implicit-def $agpr0 + INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2228234 /* regdef:AGPR_32 */, implicit-def $agpr0 %14:vgpr_32 = COPY killed $agpr0 - INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 40042506 /* regdef:VReg_512 */, def %7, 19464202 /* regdef:VReg_256 */, def %8, 7929866 /* regdef:VReg_128 */, def %9, 5963786 /* regdef:VReg_96 */, def %10, 5963786 /* regdef:VReg_96 */, def %11 + INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 39780362 /* regdef:VReg_512 */, def %7, 19202058 /* regdef:VReg_256 */, def %8, 7667722 /* regdef:VReg_128 */, def %9, 5701642 /* regdef:VReg_96 */, def %10, 5701642 /* regdef:VReg_96 */, def %11 INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 40042505 /* reguse:VReg_512 */, %7 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 19464201 /* reguse:VReg_256 */, %8 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7929865 /* reguse:VReg_128 */, %9 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5963785 /* reguse:VReg_96 */, %10 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5963785 /* reguse:VReg_96 */, %11 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 39780361 /* reguse:VReg_512 */, %7 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 19202057 /* reguse:VReg_256 */, %8 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7667721 /* reguse:VReg_128 */, %9 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5701641 /* reguse:VReg_96 */, %10 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5701641 /* reguse:VReg_96 */, %11 $agpr1 = COPY %14 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, killed $agpr1 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:AGPR_32 */, killed $agpr1 SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir index e1148c4..35be513 100644 --- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir +++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir @@ -73,7 +73,7 @@ body: | # (1) %0.sub0 + %0.sub0 and (2) %0.sub1 + %0.sub1 # Check that renaming (2) does not inadvertently rename (1). # CHECK-LABEL: name: test2 -# CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def undef %0.sub0, 2031626 /* regdef:VGPR_32 */, def dead %1.sub1, 2147483657 /* reguse tiedto:$0 */, undef %0.sub0(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %1.sub1(tied-def 5) +# CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %0.sub0, 1835018 /* regdef:VGPR_32 */, def dead %1.sub1, 2147483657 /* reguse tiedto:$0 */, undef %0.sub0(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %1.sub1(tied-def 5) name: test2 body: | bb.0: @@ -81,7 +81,7 @@ body: | bb.1: undef %0.sub1:vreg_64 = V_ALIGNBIT_B32_e64 %0.sub0:vreg_64, %0.sub0:vreg_64, 16, implicit $exec - INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def undef %0.sub0:vreg_64, 2031626 /* regdef:VGPR_32 */, def %0.sub1:vreg_64, 2147483657 /* reguse tiedto:$0 */, undef %0.sub0:vreg_64(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %0.sub1:vreg_64(tied-def 5) + INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %0.sub0:vreg_64, 1835018 /* regdef:VGPR_32 */, def %0.sub1:vreg_64, 2147483657 /* reguse tiedto:$0 */, undef %0.sub0:vreg_64(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %0.sub1:vreg_64(tied-def 5) S_BRANCH %bb.1 ... diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir index ad490f8..c58b9da 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir @@ -43,7 +43,7 @@ body: | ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3997705 /* reguse:VReg_64_Align2 */, [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3735561 /* reguse:VReg_64_Align2 */, [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]] ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 %1:av_64_align2 = COPY $vgpr0_vgpr1 @@ -51,7 +51,7 @@ body: | %3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1) %4:vreg_128_align2 = COPY %3 %5:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3997705 /* reguse:VReg_64_Align2 */, %5 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3735561 /* reguse:VReg_64_Align2 */, %5 SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-insert-extract.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-insert-extract.mir index 0b4e662..bae84f4 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-insert-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-insert-extract.mir @@ -19,7 +19,7 @@ body: | ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]] ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub2_sub3:areg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]].sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -30,7 +30,7 @@ body: | %4:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec undef %5.sub0_sub1:areg_128_align2 = COPY %4 %5.sub2_sub3 = IMPLICIT_DEF - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %5 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %5 GLOBAL_STORE_DWORDX4 %0, %5, 0, 0, implicit $exec :: (store (s128), addrspace 1) GLOBAL_STORE_DWORDX2 %0, %5.sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1) SI_RETURN @@ -172,7 +172,7 @@ body: | ; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]].sub2_sub3:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub2_sub3 ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub2_sub3:areg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]].sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -183,7 +183,7 @@ body: | undef %4.sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec undef %5.sub0_sub1:areg_128_align2 = COPY %4.sub2_sub3 %5.sub2_sub3 = IMPLICIT_DEF - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %5 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %5 GLOBAL_STORE_DWORDX4 %0, %5, 0, 0, implicit $exec :: (store (s128), addrspace 1) GLOBAL_STORE_DWORDX2 %0, %5.sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1) SI_RETURN @@ -208,7 +208,7 @@ body: | ; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub2 ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub2_sub3:areg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]].sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -219,7 +219,7 @@ body: | undef %4.sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec undef %5.sub1:areg_128_align2 = COPY %4.sub2 %5.sub2_sub3 = IMPLICIT_DEF - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %5 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %5 GLOBAL_STORE_DWORDX4 %0, %5, 0, 0, implicit $exec :: (store (s128), addrspace 1) GLOBAL_STORE_DWORDX2 %0, %5.sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1) SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-src2-chain.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-src2-chain.mir index dcf3b8b..e9f45e3 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-src2-chain.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-src2-chain.mir @@ -17,7 +17,7 @@ body: | ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1) ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]] - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -26,7 +26,7 @@ body: | %3:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1) %4:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec undef %5.sub0_sub1:areg_128_align2 = COPY %4 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %5 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %5 GLOBAL_STORE_DWORDX4 %0, %5, 0, 0, implicit $exec :: (store (s128), addrspace 1) SI_RETURN ... @@ -47,7 +47,7 @@ body: | ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1) ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]] - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -56,7 +56,7 @@ body: | %3:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1) %4:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3.sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec undef %5.sub0_sub1:areg_128_align2 = COPY %4 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %5 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %5 GLOBAL_STORE_DWORDX4 %0, %5, 0, 0, implicit $exec :: (store (s128), addrspace 1) SI_RETURN ... @@ -79,7 +79,7 @@ body: | ; CHECK-NEXT: dead %other_use:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub0_sub1 ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_1:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_1]] - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -90,7 +90,7 @@ body: | %other_use:vreg_64_align2 = COPY %4.sub0_sub1 %5:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec %6:areg_64_align2 = COPY %5 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %6:areg_64_align2 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %6:areg_64_align2 GLOBAL_STORE_DWORDX2 %0, %6, 0, 0, implicit $exec :: (store (s64), addrspace 1) SI_RETURN ... @@ -114,7 +114,7 @@ body: | ; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_e64_1:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_2:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_1]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_2]] - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_64_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4653065 /* reguse:AReg_64_Align2 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -126,7 +126,7 @@ body: | undef %5.sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4, 0, 0, 0, implicit $mode, implicit $exec %6:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %5.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec %7:areg_64_align2 = COPY %6 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_64_Align2 */, %7 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4653065 /* reguse:AReg_64_Align2 */, %7 GLOBAL_STORE_DWORDX2 %0, %7, 0, 0, implicit $exec :: (store (s64), addrspace 1) SI_RETURN @@ -151,7 +151,7 @@ body: | ; CHECK-NEXT: dead %other_use:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_1]].sub0_sub1 ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_2:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_1]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_2]] - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -163,7 +163,7 @@ body: | %other_use:vreg_64_align2 = COPY %5.sub0_sub1 %6:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %5.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec undef %8.sub0_sub1:areg_128_align2 = COPY %6 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %8:areg_128_align2 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %8:areg_128_align2 GLOBAL_STORE_DWORDX4 %0, %8, 0, 0, implicit $exec :: (store (s128), addrspace 1) SI_RETURN @@ -189,7 +189,7 @@ body: | ; CHECK-NEXT: dead %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_1]].sub0_sub1 ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_2:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_1]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[V_MFMA_F64_4X4X4F64_e64_2]] - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:AGPR_32 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s32), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -202,7 +202,7 @@ body: | %other_use1:vreg_64_align2 = COPY %5.sub0_sub1 %6:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %5.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec %8:agpr_32 = COPY %6 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, %8:agpr_32 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:AGPR_32 */, %8:agpr_32 GLOBAL_STORE_DWORD %0, %8, 0, 0, implicit $exec :: (store (s32), addrspace 1) SI_RETURN @@ -231,7 +231,7 @@ body: | ; CHECK-NEXT: dead %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub2_sub3 ; CHECK-NEXT: dead %other_use2:vreg_64 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1_sub2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]] - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -245,7 +245,7 @@ body: | %other_use1:vreg_64_align2 = COPY %4.sub2_sub3 %other_use2:vreg_64 = COPY %4.sub1_sub2 %6:areg_128_align2 = COPY %4 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9568265 /* reguse:AReg_128_Align2 */, %6:areg_128_align2 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9306121 /* reguse:AReg_128_Align2 */, %6:areg_128_align2 GLOBAL_STORE_DWORDX4 %0, %6, 0, 0, implicit $exec :: (store (s128), addrspace 1) SI_RETURN ... @@ -273,7 +273,7 @@ body: | ; CHECK-NEXT: %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub2_sub3 ; CHECK-NEXT: dead %other_use2:vreg_64 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1_sub2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1_sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], %other_use1, 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -287,7 +287,7 @@ body: | %other_use1:vreg_64_align2 = COPY %4.sub2_sub3 %other_use2:vreg_64 = COPY %4.sub1_sub2 %6:areg_64 = COPY %4.sub1_sub2 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4325385 /* reguse:AReg_64 */, %6:areg_64 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4063241 /* reguse:AReg_64 */, %6:areg_64 GLOBAL_STORE_DWORDX2 %0, %other_use1, 0, 0, implicit $exec :: (store (s64), addrspace 1) SI_RETURN ... @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub2_sub3 ; CHECK-NEXT: dead %other_use2:vreg_64 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1_sub2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1_sub2 - ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4325385 /* reguse:AReg_64 */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4063241 /* reguse:AReg_64 */, [[COPY3]] ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], %other_use1, 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -327,7 +327,7 @@ body: | %other_use1:vreg_64_align2 = COPY %4.sub2_sub3 %other_use2:vreg_64 = COPY %4.sub1_sub2 %6:areg_64 = COPY %4.sub1_sub2 - INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4325385 /* reguse:AReg_64 */, %6:areg_64 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4063241 /* reguse:AReg_64 */, %6:areg_64 GLOBAL_STORE_DWORDX2 %0, %other_use1, 0, 0, implicit $exec :: (store (s64), addrspace 1) SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir index 3c7dd64..af882c0 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir @@ -37,7 +37,7 @@ body: | ; CHECK-NEXT: dead [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub1:vreg_512 = COPY [[COPY]].sub1 - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def dead [[COPY1]], 2031626 /* regdef:VGPR_32 */, def dead [[COPY]].sub1, 2031625 /* reguse:VGPR_32 */, [[COPY1]], 2031625 /* reguse:VGPR_32 */, [[COPY]].sub1 + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def dead [[COPY1]], 1835018 /* regdef:VGPR_32 */, def dead [[COPY]].sub1, 1835017 /* reguse:VGPR_32 */, [[COPY1]], 1835017 /* reguse:VGPR_32 */, [[COPY]].sub1 ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub0:vreg_512 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub3:vreg_512 = COPY [[COPY]].sub3 ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_1]] @@ -63,7 +63,7 @@ body: | undef %11.sub0:vreg_512 = COPY %4.sub0 %12:vgpr_32 = COPY %4.sub0 %11.sub1:vreg_512 = COPY %4.sub1 - INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def dead %12:vgpr_32, 2031626 /* regdef:VGPR_32 */, def dead %4.sub1:vreg_512, 2031625 /* reguse:VGPR_32 */, %12:vgpr_32, 2031625 /* reguse:VGPR_32 */, %4.sub1:vreg_512 + INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def dead %12:vgpr_32, 1835018 /* regdef:VGPR_32 */, def dead %4.sub1:vreg_512, 1835017 /* reguse:VGPR_32 */, %12:vgpr_32, 1835017 /* reguse:VGPR_32 */, %4.sub1:vreg_512 %11.sub2:vreg_512 = COPY undef %1 %11.sub3:vreg_512 = COPY %4.sub3 %11.sub5:vreg_512 = COPY undef %1 diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir index 24df4b8..3e8e187 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir @@ -40,18 +40,18 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def dead %11 + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def dead %11 ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, implicit $exec :: (store (s32), addrspace 1) ; CHECK-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 3) - ; CHECK-NEXT: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %15, 2031626 /* regdef:VGPR_32 */, def %16 + ; CHECK-NEXT: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %15, 1835018 /* regdef:VGPR_32 */, def %16 ; CHECK-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec ; CHECK-NEXT: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_2]], 0, 0, implicit $exec ; CHECK-NEXT: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec - ; CHECK-NEXT: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %21, 2031626 /* regdef:VGPR_32 */, def %22 + ; CHECK-NEXT: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %21, 1835018 /* regdef:VGPR_32 */, def %22 ; CHECK-NEXT: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_2]], 0, 0, implicit $exec - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def dead [[V_MOV_B32_e32_3]], 2031626 /* regdef:VGPR_32 */, def dead [[V_MOV_B32_e32_4]], 2031625 /* reguse:VGPR_32 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_3]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_4]](tied-def 5), 2031625 /* reguse:VGPR_32 */, %15, 2031625 /* reguse:VGPR_32 */, %16, 2031625 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_1]], 2031625 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_]], 2031625 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_3]], 2031625 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_2]] + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def dead [[V_MOV_B32_e32_3]], 1835018 /* regdef:VGPR_32 */, def dead [[V_MOV_B32_e32_4]], 1835017 /* reguse:VGPR_32 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_3]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_4]](tied-def 5), 1835017 /* reguse:VGPR_32 */, %15, 1835017 /* reguse:VGPR_32 */, %16, 1835017 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_1]], 1835017 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_]], 1835017 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_3]], 1835017 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_2]] ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_1]] ; CHECK-NEXT: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store (s32), addrspace 3) ; CHECK-NEXT: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store (s32), addrspace 3) @@ -94,21 +94,21 @@ body: | %10:vgpr_32 = IMPLICIT_DEF bb.1: - INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %11:vgpr_32 + INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %11:vgpr_32 GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1) %13:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 3) - INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %15:vgpr_32, 2031626 /* regdef:VGPR_32 */, def %16:vgpr_32 + INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %15:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %16:vgpr_32 %17:vgpr_32 = DS_READ_B32_gfx9 %6, 0, 0, implicit $exec %18:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec %19:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec - INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %21:vgpr_32, 2031626 /* regdef:VGPR_32 */, def %22:vgpr_32 + INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %21:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %22:vgpr_32 %23:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec %24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %5.sub1:vreg_64 = COPY %6 %25:vgpr_32 = V_ADD_U32_e32 1, %10, implicit $exec %26:sreg_64_xexec = V_CMP_GT_U32_e64 64, %25, implicit $exec %27:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def dead %24:vgpr_32, 2031626 /* regdef:VGPR_32 */, def dead %27:vgpr_32, 2031625 /* reguse:VGPR_32 */, %13.sub0:vreg_64, 2147483657 /* reguse tiedto:$0 */, %24:vgpr_32(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %27:vgpr_32(tied-def 5), 2031625 /* reguse:VGPR_32 */, %15, 2031625 /* reguse:VGPR_32 */, %16, 2031625 /* reguse:VGPR_32 */, %18, 2031625 /* reguse:VGPR_32 */, %17, 2031625 /* reguse:VGPR_32 */, %23, 2031625 /* reguse:VGPR_32 */, %19 + INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def dead %24:vgpr_32, 1835018 /* regdef:VGPR_32 */, def dead %27:vgpr_32, 1835017 /* reguse:VGPR_32 */, %13.sub0:vreg_64, 2147483657 /* reguse tiedto:$0 */, %24:vgpr_32(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %27:vgpr_32(tied-def 5), 1835017 /* reguse:VGPR_32 */, %15, 1835017 /* reguse:VGPR_32 */, %16, 1835017 /* reguse:VGPR_32 */, %18, 1835017 /* reguse:VGPR_32 */, %17, 1835017 /* reguse:VGPR_32 */, %23, 1835017 /* reguse:VGPR_32 */, %19 DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store (s32), addrspace 3) DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store (s32), addrspace 3) DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll index 4893bff..4286da3 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll @@ -12,10 +12,10 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 { ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec ; GCN-NEXT: [[AV_MOV_1:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec ; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[AV_MOV_]], [[AV_MOV_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def undef %14.sub0 + ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %14.sub0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] ; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %24:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1) - ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3735561 /* reguse:VReg_64 */, %14 + ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3473417 /* reguse:VReg_64 */, %14 ; GCN-NEXT: S_ENDPGM 0 %v0 = call i32 asm sideeffect "; def $0", "=v"() %tmp = insertelement <2 x i32> poison, i32 %v0, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir index b6d630e..da6b57c 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir @@ -28,9 +28,9 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (load (s32), addrspace 3) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def [[V_MOV_B32_e32_]], 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_]](tied-def 3) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_]] - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def undef [[V_MOV_B32_e32_]].sub0, 2031626 /* regdef:VGPR_32 */, def undef [[V_MOV_B32_e32_]].sub1 + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def [[V_MOV_B32_e32_]], 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_]] + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def undef [[V_MOV_B32_e32_]].sub0, 1835018 /* regdef:VGPR_32 */, def undef [[V_MOV_B32_e32_]].sub1 ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub1 ; CHECK-NEXT: $sgpr10 = S_MOV_B32 -1 ; CHECK-NEXT: S_BRANCH %bb.1 @@ -41,9 +41,9 @@ body: | bb.1: %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load (s32), addrspace 3) - INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %0, 2147483657 /* reguse tiedto:$0 */, %0(tied-def 3) - INLINEASM &"", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %2 - INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def undef %0.sub0, 2031626 /* regdef:VGPR_32 */, def %0.sub1 + INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 2147483657 /* reguse tiedto:$0 */, %0(tied-def 3) + INLINEASM &"", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %2 + INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %0.sub0, 1835018 /* regdef:VGPR_32 */, def %0.sub1 S_NOP 0, implicit %0.sub1 $sgpr10 = S_MOV_B32 -1 S_BRANCH %bb.1 @@ -69,9 +69,9 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (load (s32), addrspace 3) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def [[V_MOV_B32_e32_]], 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_]](tied-def 3) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_]] - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def undef [[V_MOV_B32_e32_]].sub1, 2031626 /* regdef:VGPR_32 */, def undef [[V_MOV_B32_e32_]].sub0 + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def [[V_MOV_B32_e32_]], 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[DS_READ_B32_gfx9_]] + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def undef [[V_MOV_B32_e32_]].sub1, 1835018 /* regdef:VGPR_32 */, def undef [[V_MOV_B32_e32_]].sub0 ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub1 ; CHECK-NEXT: $sgpr10 = S_MOV_B32 -1 ; CHECK-NEXT: S_BRANCH %bb.1 @@ -82,9 +82,9 @@ body: | bb.1: %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load (s32), addrspace 3) - INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %0, 2147483657 /* reguse tiedto:$0 */, %0(tied-def 3) - INLINEASM &"", 1 /* sideeffect attdialect */, 2031625 /* reguse:VGPR_32 */, %2 - INLINEASM &"", 1 /* sideeffect attdialect */, 2031626 /* regdef:VGPR_32 */, def %0.sub1, 2031626 /* regdef:VGPR_32 */, def undef %0.sub0 + INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 2147483657 /* reguse tiedto:$0 */, %0(tied-def 3) + INLINEASM &"", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, %2 + INLINEASM &"", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %0.sub1, 1835018 /* regdef:VGPR_32 */, def undef %0.sub0 S_NOP 0, implicit %0.sub1 $sgpr10 = S_MOV_B32 -1 S_BRANCH %bb.1 diff --git a/llvm/test/TableGen/ResolveSchedClass.td b/llvm/test/TableGen/ResolveSchedClass.td new file mode 100644 index 0000000..8c9ef1e --- /dev/null +++ b/llvm/test/TableGen/ResolveSchedClass.td @@ -0,0 +1,18 @@ +// RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s -o - | FileCheck %s + +include "llvm/Target/Target.td" + +def TestTargetInstrInfo : InstrInfo; + +def TestTarget : Target { + let InstructionSet = TestTargetInstrInfo; +} + +// CHECK: unsigned resolveVariantSchedClassImpl(unsigned SchedClass, +// CHECK-NEXT: const MCInst *MI, const MCInstrInfo *MCII, const MCSubtargetInfo &STI, unsigned CPUID) + +// CHECK: unsigned resolveVariantSchedClass(unsigned SchedClass, +// CHECK-NEXT: const MCInst *MI, const MCInstrInfo *MCII, +// CHECK-NEXT: unsigned CPUID) const override { +// CHECK-NEXT: return TestTarget_MC::resolveVariantSchedClassImpl(SchedClass, MI, MCII, *this, CPUID); +// CHECK-NEXT: } diff --git a/llvm/test/ThinLTO/X86/memprof-dups.ll b/llvm/test/ThinLTO/X86/memprof-dups.ll new file mode 100644 index 0000000..8accc83 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-dups.ll @@ -0,0 +1,138 @@ +;; Check that duplicate spurious duplicate (identical) clones are simply +;; created as aliases to the first identical copy, rather than creating +;; multiple clones that call the same callee clones or have the same +;; allocation types. This currently happens in some cases due to additional +;; cloning performed during function assignment. +;; +;; The ThinLTO combined summary was manually modified as described there +;; to force multiple identical copies of various functions. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: rm -rf %t && split-file %s %t && cd %t +; RUN: llvm-as src.ll -o src.o +; RUN: llvm-as src.o.thinlto.ll -o src.o.thinlto.bc +; RUN: opt -passes=memprof-context-disambiguation -stats \ +; RUN: -memprof-import-summary=src.o.thinlto.bc \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: src.o -S 2>&1 | FileCheck %s + +; CHECK: created clone bar.memprof.1 +;; Duplicates of bar are created as declarations since bar is available_externally, +;; and the compiler does not well support available_externally aliases. +; CHECK: created clone decl bar.memprof.2 +; CHECK: created clone decl bar.memprof.3 +; CHECK: created clone _Z3foov.memprof.1 +;; Duplicates of _Z3foov are created as aliases to the appropriate materialized +;; clone of _Z3foov. +; CHECK: created clone alias _Z3foov.memprof.2 +; CHECK: created clone alias _Z3foov.memprof.3 + +;--- src.ll +source_filename = "memprof-distrib-alias.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_Z8fooAliasv = alias ptr (...), ptr @_Z3foov + +;; Original alias is unchanged. +; CHECK: @_Z8fooAliasv = alias ptr (...), ptr @_Z3foov{{$}} +;; We create an equivalent alias for the cloned def @_Z3foov.memprof.1. +; CHECK: @_Z8fooAliasv.memprof.1 = alias ptr (...), ptr @_Z3foov.memprof.1 + +;; We should also create aliases for the duplicate clones of _Z3foov +;; (_Z3foov.memprof.2 and _Z3foov.memprof.3) to the versions they are duplicates +;; of, and ditto for the associated @_Z8fooAliasv clones. +;; +;; _Z3foov.memprof.2 is a duplicate of original _Z3foov, and thus so is _Z8fooAliasv.memprof.2 +; CHECK: @_Z3foov.memprof.2 = alias ptr (), ptr @_Z3foov{{$}} +; CHECK: @_Z8fooAliasv.memprof.2 = alias ptr (...), ptr @_Z3foov{{$}} +;; _Z3foov.memprof.3 is a duplicate of _Z3foov.memprof.1, and thus so is _Z8fooAliasv.memprof.3 +; CHECK: @_Z3foov.memprof.3 = alias ptr (), ptr @_Z3foov.memprof.1 +; CHECK: @_Z8fooAliasv.memprof.3 = alias ptr (...), ptr @_Z3foov.memprof.1 + +; CHECK-LABEL: define i32 @main() +define i32 @main() #0 { +entry: + ;; The first call to bar does not allocate cold memory. It should call + ;; the original function, which eventually calls the original allocation + ;; decorated with a "notcold" attribute. + ; CHECK: call {{.*}} @bar() + %call = call ptr @bar(), !callsite !0 + ;; The second call to bar allocates cold memory. It should call the cloned + ;; function which eventually calls a cloned allocation decorated with a + ;; "cold" attribute. + ; CHECK: call {{.*}} @bar.memprof.1() + %call1 = call ptr @bar(), !callsite !1 + ret i32 0 +} + +; CHECK-LABEL: define available_externally i32 @bar() +define available_externally i32 @bar() #0 { +entry: + ; CHECK: call {{.*}} @_Z8fooAliasv() + %call = call ptr @_Z8fooAliasv(), !callsite !8 + ret i32 0 +} + +declare ptr @_Znam(i64) + +; CHECK-LABEL: define ptr @_Z3foov() +define ptr @_Z3foov() #0 { +entry: + ; CHECK: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]] + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +; We create actual clone for bar.memprof.1. +; CHECK: define available_externally i32 @bar.memprof.1() +; CHECK: call {{.*}} @_Z3foov.memprof.1() + +;; bar.memprof.2 and bar.memprof.3 are duplicates (of original bar and +;; bar.memprof.1, respectively). However, they are available externally, +;; so rather than create an alias we simply create a declaration, since the +;; compiler does not fully support available_externally aliases. +; CHECK: declare i32 @bar.memprof.2 +; CHECK: declare i32 @bar.memprof.3 + +; We create actual clone for foo.memprof.1. +; CHECK: define {{.*}} @_Z3foov.memprof.1() +; CHECK: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]] + +; CHECK: attributes #[[NOTCOLD]] = { "memprof"="notcold" } +; CHECK: attributes #[[COLD]] = { "memprof"="cold" } + +; CHECK: 4 memprof-context-disambiguation - Number of function clone duplicates detected during ThinLTO backend +; CHECK: 2 memprof-context-disambiguation - Number of function clones created during ThinLTO backend + +attributes #0 = { noinline optnone } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 1234, i64 8632435727821051414} +!5 = !{!6, !"cold"} +!6 = !{i64 9086428284934609951, i64 1234, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 1234} + +;--- src.o.thinlto.ll +; ModuleID = 'src.o.thinlto.ll' +source_filename = "src.o.thinlto.bc" + +^0 = module: (path: "src.o", hash: (1720506022, 1575514144, 2506794664, 3599359797, 3160884478)) +^1 = gv: (guid: 6583049656999245004, summaries: (alias: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), aliasee: ^2))) +;; Summary for _Z3foov, where the allocs part has been manually modified to add +;; two additional clones that are the same as the prior versions: +;; ... allocs: ((versions: (notcold, cold, notcold, cold), ... +^2 = gv: (guid: 9191153033785521275, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (notcold, cold, notcold, cold), memProf: ((type: notcold, stackIds: (1234, 8632435727821051414)), (type: cold, stackIds: (1234, 15025054523792398438)))))))) +^3 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^4)), callsites: ((callee: ^4, clones: (0), stackIds: (8632435727821051414)), (callee: ^4, clones: (1), stackIds: (15025054523792398438)))))) +;; Summary for bar, where the callsites part has been manually modified to add +;; two additional clones that are the same as the prior clones: +;; ... callsites: ((callee: ^1, clones: (0, 1, 0, 1), ... +^4 = gv: (guid: 16434608426314478903, summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0, 1, 0, 1), stackIds: (1234)))))) +^6 = flags: 353 +^7 = blockcount: 0 diff --git a/llvm/test/ThinLTO/X86/memprof_imported_internal.ll b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll index a6e254c..09784f8 100644 --- a/llvm/test/ThinLTO/X86/memprof_imported_internal.ll +++ b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll @@ -63,14 +63,14 @@ ; CHECK: tail call void @_ZL9internal1v.llvm.3267420853450984672() ; CHECK: tail call void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() ; CHECK-LABEL: declare void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() -;; We should have 2 clones of src2.cc's internal1 function, calling a single -;; clone of external2. +;; We should have one clone of src2.cc's internal1 function, calling a single +;; clone of external2, and a second clone that was detected to be a duplicate +;; of the first that becomes a declaration (since this is available_externally - +;; in the module with the prevailing copy it would be an alias to clone 1). ; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.1() ; CHECK: tail call void @_Z9external2v.memprof.1() ; CHECK: tail call void @_Z9external2v.memprof.1() -; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.2() -; CHECK: tail call void @_Z9external2v.memprof.1() -; CHECK: tail call void @_Z9external2v.memprof.1() +; CHECK: declare void @_ZL9internal1v.llvm.3267420853450984672.memprof.2() ; CHECK-NOT: memprof ;--- src1.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected index bc49669d..eaf9f52 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected @@ -7,10 +7,10 @@ define i64 @i64_test(i64 %i) nounwind readnone { ; CHECK-NEXT: t0: ch,glue = EntryToken ; CHECK-NEXT: t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %8 ; CHECK-NEXT: t4: i32,ch = CopyFromReg # D:1 t0, Register:i32 %9 -; CHECK-NEXT: t50: i64 = REG_SEQUENCE # D:1 TargetConstant:i32<80>, t2, TargetConstant:i32<3>, t4, TargetConstant:i32<11> +; CHECK-NEXT: t50: i64 = REG_SEQUENCE # D:1 TargetConstant:i32<76>, t2, TargetConstant:i32<3>, t4, TargetConstant:i32<11> ; CHECK-NEXT: t27: i32,ch = BUFFER_LOAD_DWORD_OFFEN<Mem:(dereferenceable load (s32) from %ir.loc, align 8, addrspace 5)> TargetFrameIndex:i32<0>, Register:v4i32 $sgpr0_sgpr1_sgpr2_sgpr3, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i1<0>, t0 ; CHECK-NEXT: t30: i32,ch = BUFFER_LOAD_DWORD_OFFEN<Mem:(dereferenceable load (s32) from %ir.loc + 4, basealign 8, addrspace 5)> TargetFrameIndex:i32<0>, Register:v4i32 $sgpr0_sgpr1_sgpr2_sgpr3, TargetConstant:i32<0>, TargetConstant:i32<4>, TargetConstant:i32<0>, TargetConstant:i1<0>, t0 -; CHECK-NEXT: t33: v2i32 = REG_SEQUENCE # D:1 TargetConstant:i32<80>, t27, TargetConstant:i32<3>, t30, TargetConstant:i32<11> +; CHECK-NEXT: t33: v2i32 = REG_SEQUENCE # D:1 TargetConstant:i32<76>, t27, TargetConstant:i32<3>, t30, TargetConstant:i32<11> ; CHECK-NEXT: t10: i64 = V_ADD_U64_PSEUDO # D:1 t50, t33 ; CHECK-NEXT: t24: i32 = EXTRACT_SUBREG # D:1 t10, TargetConstant:i32<3> ; CHECK-NEXT: t17: ch,glue = CopyToReg # D:1 t0, Register:i32 $vgpr0, t24 diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index d1dfb1d..25efa00 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -52,6 +52,7 @@ add_llvm_unittest(SupportTests IndexedAccessorTest.cpp InstructionCostTest.cpp InterleavedRangeTest.cpp + JobserverTest.cpp JSONTest.cpp KnownBitsTest.cpp LEB128Test.cpp diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp new file mode 100644 index 0000000..ddee023 --- /dev/null +++ b/llvm/unittests/Support/JobserverTest.cpp @@ -0,0 +1,442 @@ +//===- llvm/unittest/Support/JobserverTest.cpp ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Jobserver.h unit tests. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Jobserver.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Parallel.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" +#include <future> +#include <random> +#include <stdlib.h> + +#if defined(LLVM_ON_UNIX) +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include <atomic> +#include <condition_variable> +#include <fcntl.h> +#include <mutex> +#include <sys/stat.h> +#include <thread> +#include <unistd.h> +#elif defined(_WIN32) +#include <windows.h> +#endif + +#define DEBUG_TYPE "jobserver-test" + +using namespace llvm; + +namespace { + +// RAII helper to set an environment variable for the duration of a test. +class ScopedEnvironment { + std::string Name; + std::string OldValue; + bool HadOldValue; + +public: + ScopedEnvironment(const char *Name, const char *Value) : Name(Name) { +#if defined(_WIN32) + char *Old = nullptr; + size_t OldLen; + errno_t err = _dupenv_s(&Old, &OldLen, Name); + if (err == 0 && Old != nullptr) { + HadOldValue = true; + OldValue = Old; + free(Old); + } else { + HadOldValue = false; + } + _putenv_s(Name, Value); +#else + const char *Old = getenv(Name); + if (Old) { + HadOldValue = true; + OldValue = Old; + } else { + HadOldValue = false; + } + setenv(Name, Value, 1); +#endif + } + + ~ScopedEnvironment() { +#if defined(_WIN32) + if (HadOldValue) + _putenv_s(Name.c_str(), OldValue.c_str()); + else + // On Windows, setting an environment variable to an empty string + // unsets it, making getenv() return NULL. + _putenv_s(Name.c_str(), ""); +#else + if (HadOldValue) + setenv(Name.c_str(), OldValue.c_str(), 1); + else + unsetenv(Name.c_str()); +#endif + } +}; + +TEST(Jobserver, Slot) { + // Default constructor creates an invalid slot. + JobSlot S1; + EXPECT_FALSE(S1.isValid()); + EXPECT_FALSE(S1.isImplicit()); + + // Create an implicit slot. + JobSlot S2 = JobSlot::createImplicit(); + EXPECT_TRUE(S2.isValid()); + EXPECT_TRUE(S2.isImplicit()); + + // Create an explicit slot. + JobSlot S3 = JobSlot::createExplicit(42); + EXPECT_TRUE(S3.isValid()); + EXPECT_FALSE(S3.isImplicit()); + + // Test move construction. + JobSlot S4 = std::move(S2); + EXPECT_TRUE(S4.isValid()); + EXPECT_TRUE(S4.isImplicit()); + EXPECT_FALSE(S2.isValid()); // S2 is now invalid. + + // Test move assignment. + S1 = std::move(S3); + EXPECT_TRUE(S1.isValid()); + EXPECT_FALSE(S1.isImplicit()); + EXPECT_FALSE(S3.isValid()); // S3 is now invalid. +} + +// Test fixture for parsing tests to ensure the singleton state is +// reset between each test case. +class JobserverParsingTest : public ::testing::Test { +protected: + void TearDown() override { JobserverClient::resetForTesting(); } +}; + +TEST_F(JobserverParsingTest, NoMakeflags) { + // No MAKEFLAGS, should be null. + ScopedEnvironment Env("MAKEFLAGS", ""); + // On Unix, setting an env var to "" makes getenv() return an empty + // string, not NULL. We must call unsetenv() to test the case where + // the variable is truly not present. +#if !defined(_WIN32) + unsetenv("MAKEFLAGS"); +#endif + EXPECT_EQ(JobserverClient::getInstance(), nullptr); +} + +TEST_F(JobserverParsingTest, EmptyMakeflags) { + // Empty MAKEFLAGS, should be null. + ScopedEnvironment Env("MAKEFLAGS", ""); + EXPECT_EQ(JobserverClient::getInstance(), nullptr); +} + +TEST_F(JobserverParsingTest, DryRunFlag) { + // Dry-run flag 'n', should be null. + ScopedEnvironment Env("MAKEFLAGS", "n -j --jobserver-auth=fifo:/tmp/foo"); + EXPECT_EQ(JobserverClient::getInstance(), nullptr); +} + +// Separate fixture for non-threaded client tests. +class JobserverClientTest : public JobserverParsingTest {}; + +#if defined(LLVM_ON_UNIX) +// RAII helper to create and clean up a temporary FIFO file. +class ScopedFifo { + SmallString<128> Path; + bool IsValid = false; + +public: + ScopedFifo() { + // To get a unique, non-colliding name for a FIFO, we use the + // createTemporaryFile function to reserve a name in the filesystem. + std::error_code EC = + sys::fs::createTemporaryFile("jobserver-test", "fifo", Path); + if (EC) + return; + // Then we immediately remove the regular file it created, but keep the + // unique path. + sys::fs::remove(Path); + // Finally, we create the FIFO at that safe, unique path. + if (mkfifo(Path.c_str(), 0600) != 0) + return; + IsValid = true; + } + + ~ScopedFifo() { + if (IsValid) + sys::fs::remove(Path); + } + + const char *c_str() const { return Path.data(); } + bool isValid() const { return IsValid; } +}; + +TEST_F(JobserverClientTest, UnixClientFifo) { + // This test covers basic FIFO client creation and behavior with an empty + // FIFO. No job tokens are available. + ScopedFifo F; + ASSERT_TRUE(F.isValid()); + + // Intentionally inserted \t in environment string. + std::string Makeflags = " \t -j4\t \t--jobserver-auth=fifo:"; + Makeflags += F.c_str(); + ScopedEnvironment Env("MAKEFLAGS", Makeflags.c_str()); + + JobserverClient *Client = JobserverClient::getInstance(); + ASSERT_NE(Client, nullptr); + + // Get the implicit token. + JobSlot S1 = Client->tryAcquire(); + EXPECT_TRUE(S1.isValid()); + EXPECT_TRUE(S1.isImplicit()); + + // FIFO is empty, next acquire fails. + JobSlot S2 = Client->tryAcquire(); + EXPECT_FALSE(S2.isValid()); + + // Release does not write to the pipe for the implicit token. + Client->release(std::move(S1)); + + // Re-acquire the implicit token. + S1 = Client->tryAcquire(); + EXPECT_TRUE(S1.isValid()); +} + +#if LLVM_ENABLE_THREADS +// Test fixture for tests that use the jobserver strategy. It creates a +// temporary FIFO, sets MAKEFLAGS, and provides a helper to pre-load the FIFO +// with job tokens, simulating `make -jN`. +class JobserverStrategyTest : public JobserverParsingTest { +protected: + std::unique_ptr<ScopedFifo> TheFifo; + std::thread MakeThread; + std::atomic<bool> StopMakeThread{false}; + // Save and restore the global parallel strategy to avoid interfering with + // other tests in the same process. + ThreadPoolStrategy SavedStrategy; + + void SetUp() override { + SavedStrategy = parallel::strategy; + TheFifo = std::make_unique<ScopedFifo>(); + ASSERT_TRUE(TheFifo->isValid()); + + std::string MakeFlags = "--jobserver-auth=fifo:"; + MakeFlags += TheFifo->c_str(); + setenv("MAKEFLAGS", MakeFlags.c_str(), 1); + } + + void TearDown() override { + if (MakeThread.joinable()) { + StopMakeThread = true; + MakeThread.join(); + } + unsetenv("MAKEFLAGS"); + TheFifo.reset(); + // Restore the original strategy to ensure subsequent tests are unaffected. + parallel::strategy = SavedStrategy; + } + + // Starts a background thread that emulates `make`. It populates the FIFO + // with initial tokens and then recycles tokens released by clients. + void startMakeProxy(int NumInitialJobs) { + MakeThread = std::thread([this, NumInitialJobs]() { + LLVM_DEBUG(dbgs() << "[MakeProxy] Thread started.\n"); + // Open the FIFO for reading and writing. This call does not block. + int RWFd = open(TheFifo->c_str(), O_RDWR); + LLVM_DEBUG(dbgs() << "[MakeProxy] Opened FIFO " << TheFifo->c_str() + << " with O_RDWR, FD=" << RWFd << "\n"); + if (RWFd == -1) { + LLVM_DEBUG( + dbgs() + << "[MakeProxy] ERROR: Failed to open FIFO with O_RDWR. Errno: " + << errno << "\n"); + return; + } + + // Populate with initial jobs. + LLVM_DEBUG(dbgs() << "[MakeProxy] Writing " << NumInitialJobs + << " initial tokens.\n"); + for (int i = 0; i < NumInitialJobs; ++i) { + if (write(RWFd, "+", 1) != 1) { + LLVM_DEBUG(dbgs() + << "[MakeProxy] ERROR: Failed to write initial token " << i + << ".\n"); + close(RWFd); + return; + } + } + LLVM_DEBUG(dbgs() << "[MakeProxy] Finished writing initial tokens.\n"); + + // Make the read non-blocking so we can periodically check StopMakeThread. + int flags = fcntl(RWFd, F_GETFL, 0); + fcntl(RWFd, F_SETFL, flags | O_NONBLOCK); + + while (!StopMakeThread) { + char Token; + ssize_t Ret = read(RWFd, &Token, 1); + if (Ret == 1) { + LLVM_DEBUG(dbgs() << "[MakeProxy] Read token '" << Token + << "' to recycle.\n"); + // A client released a token, 'make' makes it available again. + std::this_thread::sleep_for(std::chrono::microseconds(100)); + ssize_t WRet; + do { + WRet = write(RWFd, &Token, 1); + } while (WRet < 0 && errno == EINTR); + if (WRet <= 0) { + LLVM_DEBUG( + dbgs() + << "[MakeProxy] ERROR: Failed to write recycled token.\n"); + break; // Error, stop the proxy. + } + LLVM_DEBUG(dbgs() + << "[MakeProxy] Wrote token '" << Token << "' back.\n"); + } else if (Ret < 0 && errno != EAGAIN && errno != EWOULDBLOCK) { + LLVM_DEBUG(dbgs() << "[MakeProxy] ERROR: Read failed with errno " + << errno << ".\n"); + break; // Error, stop the proxy. + } + // Yield to prevent this thread from busy-waiting. + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + LLVM_DEBUG(dbgs() << "[MakeProxy] Thread stopping.\n"); + close(RWFd); + }); + + // Give the proxy thread a moment to start and populate the FIFO. + // This is a simple way to avoid a race condition where the client starts + // before the initial tokens are in the pipe. + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } +}; + +TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) { + // This test simulates `make -j3`. We will have 1 implicit job slot and + // we will add 2 explicit job tokens to the FIFO, for a total of 3. + const int NumExplicitJobs = 2; + const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 for the implicit slot + const int NumTasks = 8; // More tasks than available slots. + + LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs + << " jobs.\n"); + startMakeProxy(NumExplicitJobs); + LLVM_DEBUG(dbgs() << "MakeProxy is running.\n"); + + // Create the thread pool. Its constructor will call jobserver_concurrency() + // and create a client that reads from our pre-loaded FIFO. + StdThreadPool Pool(jobserver_concurrency()); + + std::atomic<int> ActiveTasks{0}; + std::atomic<int> MaxActiveTasks{0}; + std::atomic<int> CompletedTasks{0}; + std::mutex M; + std::condition_variable CV; + + // Dispatch more tasks than there are job slots. The pool should block + // and only run up to `ConcurrencyLimit` tasks at once. + for (int i = 0; i < NumTasks; ++i) { + Pool.async([&, i] { + // Track the number of concurrently running tasks. + int CurrentActive = ++ActiveTasks; + LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive + << "\n"); + int OldMax = MaxActiveTasks.load(); + while (CurrentActive > OldMax) + MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive); + + std::this_thread::sleep_for(std::chrono::milliseconds(25)); + + --ActiveTasks; + if (++CompletedTasks == NumTasks) { + std::lock_guard<std::mutex> Lock(M); + CV.notify_one(); + } + }); + } + + // Wait for all tasks to complete. + std::unique_lock<std::mutex> Lock(M); + CV.wait(Lock, [&] { return CompletedTasks == NumTasks; }); + + LLVM_DEBUG(dbgs() << "Test finished. Max active tasks was " << MaxActiveTasks + << ".\n"); + // The key assertion: the maximum number of concurrent tasks should + // not have exceeded the limit imposed by the jobserver. + EXPECT_LE(MaxActiveTasks, ConcurrencyLimit); + EXPECT_EQ(CompletedTasks, NumTasks); +} + +TEST_F(JobserverStrategyTest, ParallelForIsLimited) { + // This test verifies that llvm::parallelFor respects the jobserver limit. + const int NumExplicitJobs = 3; + const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 implicit + const int NumTasks = 20; + + LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs + << " jobs.\n"); + startMakeProxy(NumExplicitJobs); + LLVM_DEBUG(dbgs() << "MakeProxy is running.\n"); + + // Set the global strategy. parallelFor will use this. + parallel::strategy = jobserver_concurrency(); + + std::atomic<int> ActiveTasks{0}; + std::atomic<int> MaxActiveTasks{0}; + + parallelFor(0, NumTasks, [&](int i) { + int CurrentActive = ++ActiveTasks; + LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive + << "\n"); + int OldMax = MaxActiveTasks.load(); + while (CurrentActive > OldMax) + MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive); + + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + --ActiveTasks; + }); + + LLVM_DEBUG(dbgs() << "ParallelFor finished. Max active tasks was " + << MaxActiveTasks << ".\n"); + EXPECT_LE(MaxActiveTasks, ConcurrencyLimit); +} + +TEST_F(JobserverStrategyTest, ParallelSortIsLimited) { + // This test serves as an integration test to ensure parallelSort completes + // correctly when running under the jobserver strategy. It doesn't directly + // measure concurrency but verifies correctness. + const int NumExplicitJobs = 3; + startMakeProxy(NumExplicitJobs); + + parallel::strategy = jobserver_concurrency(); + + std::vector<int> V(1024); + // Fill with random data + std::mt19937 randEngine; + std::uniform_int_distribution<int> dist; + for (int &i : V) + i = dist(randEngine); + + parallelSort(V.begin(), V.end()); + ASSERT_TRUE(llvm::is_sorted(V)); +} + +#endif // LLVM_ENABLE_THREADS + +#endif // defined(LLVM_ON_UNIX) + +} // end anonymous namespace diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 0f42d49..b0a309c 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -1761,7 +1761,7 @@ void SubtargetEmitter::emitSchedModelHelpers(const std::string &ClassName, << "\n::resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI," << " const MCInstrInfo *MCII, unsigned CPUID) const {\n" << " return " << Target << "_MC" - << "::resolveVariantSchedClassImpl(SchedClass, MI, MCII, CPUID);\n" + << "::resolveVariantSchedClassImpl(SchedClass, MI, MCII, *this, CPUID);\n" << "} // " << ClassName << "::resolveVariantSchedClass\n\n"; STIPredicateExpander PE(Target, /*Indent=*/0); @@ -1923,7 +1923,8 @@ void SubtargetEmitter::parseFeaturesFunction(raw_ostream &OS) { void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { OS << "namespace " << Target << "_MC {\n" << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass,\n" - << " const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID) {\n"; + << " const MCInst *MI, const MCInstrInfo *MCII, " + << "const MCSubtargetInfo &STI, unsigned CPUID) {\n"; emitSchedModelHelpersImpl(OS, /* OnlyExpandMCPredicates */ true); OS << "}\n"; OS << "} // end namespace " << Target << "_MC\n\n"; @@ -1945,7 +1946,7 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { << " const MCInst *MI, const MCInstrInfo *MCII,\n" << " unsigned CPUID) const override {\n" << " return " << Target << "_MC" - << "::resolveVariantSchedClassImpl(SchedClass, MI, MCII, CPUID);\n"; + << "::resolveVariantSchedClassImpl(SchedClass, MI, MCII, *this, CPUID);\n"; OS << " }\n"; if (TGT.getHwModes().getNumModeIds() > 1) { OS << " unsigned getHwModeSet() const override;\n"; @@ -2073,7 +2074,8 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "class DFAPacketizer;\n"; OS << "namespace " << Target << "_MC {\n" << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass," - << " const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID);\n" + << " const MCInst *MI, const MCInstrInfo *MCII, " + << "const MCSubtargetInfo &STI, unsigned CPUID);\n" << "} // end namespace " << Target << "_MC\n\n"; OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n" << " explicit " << ClassName << "(const Triple &TT, StringRef CPU, " diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn index 6ca766ca..38ba466 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn @@ -103,6 +103,7 @@ static_library("Support") { "IntEqClasses.cpp", "IntervalMap.cpp", "JSON.cpp", + "Jobserver.cpp", "KnownBits.cpp", "KnownFPClass.cpp", "LEB128.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn index 42c1a15..a25f058 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn @@ -56,6 +56,7 @@ unittest("SupportTests") { "InstructionCostTest.cpp", "InterleavedRangeTest.cpp", "JSONTest.cpp", + "JobserverTest.cpp", "KnownBitsTest.cpp", "LEB128Test.cpp", "LineIteratorTest.cpp", diff --git a/openmp/runtime/test/transform/tile/intfor.f90 b/openmp/runtime/test/transform/tile/intfor.F90 index dac0de6..4ca9f14 100644 --- a/openmp/runtime/test/transform/tile/intfor.f90 +++ b/openmp/runtime/test/transform/tile/intfor.F90 @@ -10,6 +10,7 @@ ! RUN: %t-ub18.exe | FileCheck %s --match-full-lines program tile_intfor_1d + implicit none integer i print *, 'do' |