diff options
77 files changed, 664 insertions, 311 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index 88aef89..50d4c03 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -671,9 +671,12 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo, return RValue::get(results[0]); } - case cir::TEK_Complex: - cgm.errorNYI(loc, "unsupported evaluation kind of function call result"); - return getUndefRValue(retTy); + case cir::TEK_Complex: { + mlir::ResultRange results = theCall->getOpResults(); + assert(!results.empty() && + "Expected at least one result for complex rvalue"); + return RValue::getComplex(results[0]); + } } llvm_unreachable("Invalid evaluation kind"); } diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.h b/clang/lib/CIR/CodeGen/CIRGenCleanup.h index 9acf8b1..61a09a5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCleanup.h +++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.h @@ -15,6 +15,7 @@ #define CLANG_LIB_CIR_CODEGEN_CIRGENCLEANUP_H #include "Address.h" +#include "CIRGenModule.h" #include "EHScopeStack.h" #include "mlir/IR/Value.h" @@ -257,5 +258,53 @@ inline void EHScopeStack::popCatch() { deallocate(EHCatchScope::getSizeForNumHandlers(scope.getNumHandlers())); } +/// The exceptions personality for a function. +struct EHPersonality { + const char *personalityFn = nullptr; + + // If this is non-null, this personality requires a non-standard + // function for rethrowing an exception after a catchall cleanup. + // This function must have prototype void(void*). + const char *catchallRethrowFn = nullptr; + + static const EHPersonality &get(CIRGenModule &cgm, + const clang::FunctionDecl *fd); + static const EHPersonality &get(CIRGenFunction &cgf); + + static const EHPersonality GNU_C; + static const EHPersonality GNU_C_SJLJ; + static const EHPersonality GNU_C_SEH; + static const EHPersonality GNU_ObjC; + static const EHPersonality GNU_ObjC_SJLJ; + static const EHPersonality GNU_ObjC_SEH; + static const EHPersonality GNUstep_ObjC; + static const EHPersonality GNU_ObjCXX; + static const EHPersonality NeXT_ObjC; + static const EHPersonality GNU_CPlusPlus; + static const EHPersonality GNU_CPlusPlus_SJLJ; + static const EHPersonality GNU_CPlusPlus_SEH; + static const EHPersonality MSVC_except_handler; + static const EHPersonality MSVC_C_specific_handler; + static const EHPersonality MSVC_CxxFrameHandler3; + static const EHPersonality GNU_Wasm_CPlusPlus; + static const EHPersonality XL_CPlusPlus; + static const EHPersonality ZOS_CPlusPlus; + + /// Does this personality use landingpads or the family of pad instructions + /// designed to form funclets? + bool usesFuncletPads() const { + return isMSVCPersonality() || isWasmPersonality(); + } + + bool isMSVCPersonality() const { + return this == &MSVC_except_handler || this == &MSVC_C_specific_handler || + this == &MSVC_CxxFrameHandler3; + } + + bool isWasmPersonality() const { return this == &GNU_Wasm_CPlusPlus; } + + bool isMSVCXXPersonality() const { return this == &MSVC_CxxFrameHandler3; } +}; + } // namespace clang::CIRGen #endif // CLANG_LIB_CIR_CODEGEN_CIRGENCLEANUP_H diff --git a/clang/lib/CIR/CodeGen/CIRGenException.cpp b/clang/lib/CIR/CodeGen/CIRGenException.cpp index 717a3e0..67f46ff 100644 --- a/clang/lib/CIR/CodeGen/CIRGenException.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenException.cpp @@ -18,6 +18,171 @@ using namespace clang; using namespace clang::CIRGen; +const EHPersonality EHPersonality::GNU_C = {"__gcc_personality_v0", nullptr}; +const EHPersonality EHPersonality::GNU_C_SJLJ = {"__gcc_personality_sj0", + nullptr}; +const EHPersonality EHPersonality::GNU_C_SEH = {"__gcc_personality_seh0", + nullptr}; +const EHPersonality EHPersonality::NeXT_ObjC = {"__objc_personality_v0", + nullptr}; +const EHPersonality EHPersonality::GNU_CPlusPlus = {"__gxx_personality_v0", + nullptr}; +const EHPersonality EHPersonality::GNU_CPlusPlus_SJLJ = { + "__gxx_personality_sj0", nullptr}; +const EHPersonality EHPersonality::GNU_CPlusPlus_SEH = { + "__gxx_personality_seh0", nullptr}; +const EHPersonality EHPersonality::GNU_ObjC = {"__gnu_objc_personality_v0", + "objc_exception_throw"}; +const EHPersonality EHPersonality::GNU_ObjC_SJLJ = { + "__gnu_objc_personality_sj0", "objc_exception_throw"}; +const EHPersonality EHPersonality::GNU_ObjC_SEH = { + "__gnu_objc_personality_seh0", "objc_exception_throw"}; +const EHPersonality EHPersonality::GNU_ObjCXX = { + "__gnustep_objcxx_personality_v0", nullptr}; +const EHPersonality EHPersonality::GNUstep_ObjC = { + "__gnustep_objc_personality_v0", nullptr}; +const EHPersonality EHPersonality::MSVC_except_handler = {"_except_handler3", + nullptr}; +const EHPersonality EHPersonality::MSVC_C_specific_handler = { + "__C_specific_handler", nullptr}; +const EHPersonality EHPersonality::MSVC_CxxFrameHandler3 = { + "__CxxFrameHandler3", nullptr}; +const EHPersonality EHPersonality::GNU_Wasm_CPlusPlus = { + "__gxx_wasm_personality_v0", nullptr}; +const EHPersonality EHPersonality::XL_CPlusPlus = {"__xlcxx_personality_v1", + nullptr}; +const EHPersonality EHPersonality::ZOS_CPlusPlus = {"__zos_cxx_personality_v2", + nullptr}; + +static const EHPersonality &getCPersonality(const TargetInfo &target, + const CodeGenOptions &cgOpts) { + const llvm::Triple &triple = target.getTriple(); + if (triple.isWindowsMSVCEnvironment()) + return EHPersonality::MSVC_CxxFrameHandler3; + if (cgOpts.hasSjLjExceptions()) + return EHPersonality::GNU_C_SJLJ; + if (cgOpts.hasDWARFExceptions()) + return EHPersonality::GNU_C; + if (cgOpts.hasSEHExceptions()) + return EHPersonality::GNU_C_SEH; + return EHPersonality::GNU_C; +} + +static const EHPersonality &getObjCPersonality(const TargetInfo &target, + const LangOptions &langOpts, + const CodeGenOptions &cgOpts) { + const llvm::Triple &triple = target.getTriple(); + if (triple.isWindowsMSVCEnvironment()) + return EHPersonality::MSVC_CxxFrameHandler3; + + switch (langOpts.ObjCRuntime.getKind()) { + case ObjCRuntime::FragileMacOSX: + return getCPersonality(target, cgOpts); + case ObjCRuntime::MacOSX: + case ObjCRuntime::iOS: + case ObjCRuntime::WatchOS: + return EHPersonality::NeXT_ObjC; + case ObjCRuntime::GNUstep: + if (langOpts.ObjCRuntime.getVersion() >= VersionTuple(1, 7)) + return EHPersonality::GNUstep_ObjC; + [[fallthrough]]; + case ObjCRuntime::GCC: + case ObjCRuntime::ObjFW: + if (cgOpts.hasSjLjExceptions()) + return EHPersonality::GNU_ObjC_SJLJ; + if (cgOpts.hasSEHExceptions()) + return EHPersonality::GNU_ObjC_SEH; + return EHPersonality::GNU_ObjC; + } + llvm_unreachable("bad runtime kind"); +} + +static const EHPersonality &getCXXPersonality(const TargetInfo &target, + const CodeGenOptions &cgOpts) { + const llvm::Triple &triple = target.getTriple(); + if (triple.isWindowsMSVCEnvironment()) + return EHPersonality::MSVC_CxxFrameHandler3; + if (triple.isOSAIX()) + return EHPersonality::XL_CPlusPlus; + if (cgOpts.hasSjLjExceptions()) + return EHPersonality::GNU_CPlusPlus_SJLJ; + if (cgOpts.hasDWARFExceptions()) + return EHPersonality::GNU_CPlusPlus; + if (cgOpts.hasSEHExceptions()) + return EHPersonality::GNU_CPlusPlus_SEH; + if (cgOpts.hasWasmExceptions()) + return EHPersonality::GNU_Wasm_CPlusPlus; + return EHPersonality::GNU_CPlusPlus; +} + +/// Determines the personality function to use when both C++ +/// and Objective-C exceptions are being caught. +static const EHPersonality &getObjCXXPersonality(const TargetInfo &target, + const LangOptions &langOpts, + const CodeGenOptions &cgOpts) { + if (target.getTriple().isWindowsMSVCEnvironment()) + return EHPersonality::MSVC_CxxFrameHandler3; + + switch (langOpts.ObjCRuntime.getKind()) { + // In the fragile ABI, just use C++ exception handling and hope + // they're not doing crazy exception mixing. + case ObjCRuntime::FragileMacOSX: + return getCXXPersonality(target, cgOpts); + + // The ObjC personality defers to the C++ personality for non-ObjC + // handlers. Unlike the C++ case, we use the same personality + // function on targets using (backend-driven) SJLJ EH. + case ObjCRuntime::MacOSX: + case ObjCRuntime::iOS: + case ObjCRuntime::WatchOS: + return getObjCPersonality(target, langOpts, cgOpts); + + case ObjCRuntime::GNUstep: + return EHPersonality::GNU_ObjCXX; + + // The GCC runtime's personality function inherently doesn't support + // mixed EH. Use the ObjC personality just to avoid returning null. + case ObjCRuntime::GCC: + case ObjCRuntime::ObjFW: + return getObjCPersonality(target, langOpts, cgOpts); + } + llvm_unreachable("bad runtime kind"); +} + +static const EHPersonality &getSEHPersonalityMSVC(const llvm::Triple &triple) { + return triple.getArch() == llvm::Triple::x86 + ? EHPersonality::MSVC_except_handler + : EHPersonality::MSVC_C_specific_handler; +} + +const EHPersonality &EHPersonality::get(CIRGenModule &cgm, + const FunctionDecl *fd) { + const llvm::Triple &triple = cgm.getTarget().getTriple(); + const LangOptions &langOpts = cgm.getLangOpts(); + const CodeGenOptions &cgOpts = cgm.getCodeGenOpts(); + const TargetInfo &target = cgm.getTarget(); + + // Functions using SEH get an SEH personality. + if (fd && fd->usesSEHTry()) + return getSEHPersonalityMSVC(triple); + + if (langOpts.ObjC) { + return langOpts.CPlusPlus ? getObjCXXPersonality(target, langOpts, cgOpts) + : getObjCPersonality(target, langOpts, cgOpts); + } + return langOpts.CPlusPlus ? getCXXPersonality(target, cgOpts) + : getCPersonality(target, cgOpts); +} + +const EHPersonality &EHPersonality::get(CIRGenFunction &cgf) { + const auto *fg = cgf.curCodeDecl; + // For outlined finallys and filters, use the SEH personality in case they + // contain more SEH. This mostly only affects finallys. Filters could + // hypothetically use gnu statement expressions to sneak in nested SEH. + fg = fg ? fg : cgf.curSEHParent.getDecl(); + return get(cgf.cgm, dyn_cast_or_null<FunctionDecl>(fg)); +} + void CIRGenFunction::emitCXXThrowExpr(const CXXThrowExpr *e) { const llvm::Triple &triple = getTarget().getTriple(); if (cgm.getLangOpts().OpenMPIsTargetDevice && diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index d791130..c3fcd1a6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -120,6 +120,8 @@ public: /// Tracks function scope overall cleanup handling. EHScopeStack ehStack; + GlobalDecl curSEHParent; + llvm::DenseMap<const clang::ValueDecl *, clang::FieldDecl *> lambdaCaptureFields; clang::FieldDecl *lambdaThisCaptureField = nullptr; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index a8a9c51..1d0dfd0b 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3791,18 +3791,12 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, if (Current.is(TT_FunctionDeclarationName)) return true; - if (!Current.Tok.getIdentifierInfo()) + if (Current.isNoneOf(tok::identifier, tok::kw_operator)) return false; const auto *Prev = Current.getPreviousNonComment(); assert(Prev); - if (Prev->is(tok::coloncolon)) - Prev = Prev->Previous; - - if (!Prev) - return false; - const auto &Previous = *Prev; if (const auto *PrevPrev = Previous.getPreviousNonComment(); @@ -3851,6 +3845,8 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, // Find parentheses of parameter list. if (Current.is(tok::kw_operator)) { + if (Line.startsWith(tok::kw_friend)) + return true; if (Previous.Tok.getIdentifierInfo() && Previous.isNoneOf(tok::kw_return, tok::kw_co_return)) { return true; diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index 4e89af4..3fb78dc 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -1468,3 +1468,30 @@ void calling_function_with_default_arg() { // OGCG: store float 0x40019999A0000000, ptr %[[DEFAULT_ARG_IMAG_PTR]], align 4 // OGCG: %[[TMP_DEFAULT_ARG:.*]] = load <2 x float>, ptr %[[DEFAULT_ARG_ADDR]], align 4 // OGCG: call void @_Z33function_with_complex_default_argCf(<2 x float> {{.*}} %[[TMP_DEFAULT_ARG]]) + +void calling_function_that_return_complex() { + float _Complex a = complex_type_return_type(); +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a", init] +// CIR: %[[RESULT:.*]] = cir.call @_Z24complex_type_return_typev() : () -> !cir.complex<!cir.float> +// CIR: cir.store{{.*}} %[[RESULT]], %[[A_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> + +// TODO(CIR): the difference between the CIR LLVM and OGCG is because the lack of calling convention lowering, + +// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[RESULT:.*]] = call { float, float } @_Z24complex_type_return_typev() +// LLVM: store { float, float } %[[RESULT]], ptr %[[A_ADDR]], align 4 + +// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG: %[[RESULT_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG: %[[RESULT:.*]] = call noundef <2 x float> @_Z24complex_type_return_typev() +// OGCG: store <2 x float> %[[RESULT]], ptr %[[RESULT_ADDR]], align 4 +// OGCG: %[[RESULT_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT_ADDR]], i32 0, i32 0 +// OGCG: %[[RESULT_REAL:.*]] = load float, ptr %[[RESULT_REAL_PTR]], align 4 +// OGCG: %[[RESULT_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT_ADDR]], i32 0, i32 1 +// OGCG: %[[RESULT_IMAG:.*]] = load float, ptr %[[RESULT_IMAG_PTR]], align 4 +// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG: store float %[[RESULT_REAL]], ptr %[[A_REAL_PTR]], align 4 +// OGCG: store float %[[RESULT_IMAG]], ptr %[[A_IMAG_PTR]], align 4 diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index f363738..ca99940 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1129,6 +1129,11 @@ TEST_F(TokenAnnotatorTest, UnderstandsOverloadedOperators) { ASSERT_EQ(Tokens.size(), 7u) << Tokens; // Not TT_FunctionDeclarationName. EXPECT_TOKEN(Tokens[3], tok::kw_operator, TT_Unknown); + + Tokens = annotate("SomeAPI::operator()();"); + ASSERT_EQ(Tokens.size(), 9u) << Tokens; + // Not TT_FunctionDeclarationName. + EXPECT_TOKEN(Tokens[2], tok::kw_operator, TT_Unknown); } TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) { diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 9fddd47..a852555 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -610,14 +610,6 @@ LLVM_ABI ModulePass *createCheckDebugMachineModulePass(); /// caller saved registers with stack slots. LLVM_ABI extern char &FixupStatepointCallerSavedID; -/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics -/// or split the data to two <128 x i32>. -LLVM_ABI FunctionPass *createX86LowerAMXTypePass(); - -/// The pass transforms amx intrinsics to scalar operation if the function has -/// optnone attribute or it is O0. -LLVM_ABI FunctionPass *createX86LowerAMXIntrinsicsPass(); - /// When learning an eviction policy, extract score(reward) information, /// otherwise this does nothing LLVM_ABI FunctionPass *createRegAllocScoringPass(); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index f05febf..24a0cb7 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -1768,7 +1768,7 @@ private: // FIXME: We should be able to derive FailedSymsForQuery from each query once // we fix how the detach operation works. struct EmitQueries { - JITDylib::AsynchronousSymbolQuerySet Updated; + JITDylib::AsynchronousSymbolQuerySet Completed; JITDylib::AsynchronousSymbolQuerySet Failed; DenseMap<AsynchronousSymbolQuery *, std::shared_ptr<SymbolDependenceMap>> FailedSymsForQuery; diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index ead7655..a235975 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -13,7 +13,10 @@ #define LLVM_SUPPORT_SPECIALCASELIST_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/RadixTree.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/GlobPattern.h" @@ -162,6 +165,10 @@ private: }; std::vector<GlobMatcher::Glob> Globs; + + RadixTree<iterator_range<StringRef::const_iterator>, + SmallVector<const GlobMatcher::Glob *, 1>> + PrefixToGlob; }; /// Represents a set of patterns and their line numbers diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9e78ec9..8ea1326 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4030,7 +4030,6 @@ bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { /// if it is simplified. class SimplificationTracker { DenseMap<Value *, Value *> Storage; - const SimplifyQuery &SQ; // Tracks newly created Phi nodes. The elements are iterated by insertion // order. PhiNodeSet AllPhiNodes; @@ -4038,8 +4037,6 @@ class SimplificationTracker { SmallPtrSet<SelectInst *, 32> AllSelectNodes; public: - SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {} - Value *Get(Value *V) { do { auto SV = Storage.find(V); @@ -4049,30 +4046,6 @@ public: } while (true); } - Value *Simplify(Value *Val) { - SmallVector<Value *, 32> WorkList; - SmallPtrSet<Value *, 32> Visited; - WorkList.push_back(Val); - while (!WorkList.empty()) { - auto *P = WorkList.pop_back_val(); - if (!Visited.insert(P).second) - continue; - if (auto *PI = dyn_cast<Instruction>(P)) - if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) { - for (auto *U : PI->users()) - WorkList.push_back(cast<Value>(U)); - Put(PI, V); - PI->replaceAllUsesWith(V); - if (auto *PHI = dyn_cast<PHINode>(PI)) - AllPhiNodes.erase(PHI); - if (auto *Select = dyn_cast<SelectInst>(PI)) - AllSelectNodes.erase(Select); - PI->eraseFromParent(); - } - } - return Get(Val); - } - void Put(Value *From, Value *To) { Storage.insert({From, To}); } void ReplacePhi(PHINode *From, PHINode *To) { @@ -4133,8 +4106,7 @@ private: /// Common Type for all different fields in addressing modes. Type *CommonType = nullptr; - /// SimplifyQuery for simplifyInstruction utility. - const SimplifyQuery &SQ; + const DataLayout &DL; /// Original Address. Value *Original; @@ -4143,8 +4115,8 @@ private: Value *CommonValue = nullptr; public: - AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) - : SQ(_SQ), Original(OriginalValue) {} + AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue) + : DL(DL), Original(OriginalValue) {} ~AddressingModeCombiner() { eraseCommonValueIfDead(); } @@ -4256,7 +4228,7 @@ private: // Keep track of keys where the value is null. We will need to replace it // with constant null when we know the common type. SmallVector<Value *, 2> NullValue; - Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); + Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); for (auto &AM : AddrModes) { Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); if (DV) { @@ -4306,7 +4278,7 @@ private: // simplification is possible only if original phi/selects were not // simplified yet. // Using this mapping we can find the current value in AddrToBase. - SimplificationTracker ST(SQ); + SimplificationTracker ST; // First step, DFS to create PHI nodes for all intermediate blocks. // Also fill traverse order for the second step. @@ -4465,7 +4437,6 @@ private: PHI->addIncoming(ST.Get(Map[PV]), B); } } - Map[Current] = ST.Simplify(V); } } @@ -5856,8 +5827,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the graph are compatible. bool PhiOrSelectSeen = false; SmallVector<Instruction *, 16> AddrModeInsts; - const SimplifyQuery SQ(*DL, TLInfo); - AddressingModeCombiner AddrModes(SQ, Addr); + AddressingModeCombiner AddrModes(*DL, Addr); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 8d413a3..d029ac5 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -2901,13 +2901,23 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR, for (auto &SN : ER.Ready) IL_collectQueries( - EQ.Updated, SN->defs(), + EQ.Completed, SN->defs(), [](JITDylib::SymbolTableEntry &E) { E.setState(SymbolState::Ready); }, [](AsynchronousSymbolQuery &Q, JITDylib &JD, NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) { Q.notifySymbolMetRequiredState(SymbolStringPtr(Name), E.getSymbol()); }); + // std::erase_if is not available in C++17, and llvm::erase_if does not work + // here. + for (auto it = EQ.Completed.begin(), end = EQ.Completed.end(); it != end;) { + if ((*it)->isComplete()) { + ++it; + } else { + it = EQ.Completed.erase(it); + } + } + #ifdef EXPENSIVE_CHECKS verifySessionState("exiting ExecutionSession::IL_emit"); #endif @@ -3043,9 +3053,8 @@ Error ExecutionSession::OL_notifyEmitted( } } - for (auto &UQ : EmitQueries->Updated) - if (UQ->isComplete()) - UQ->handleComplete(*this); + for (auto &UQ : EmitQueries->Completed) + UQ->handleComplete(*this); // If there are any bad dependencies then return an error. if (!BadDeps.empty()) { diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index f74e52a..c27f627 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -89,14 +89,32 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) { return A.Name.size() < B.Name.size(); }); } + + for (const auto &G : reverse(Globs)) { + StringRef Prefix = G.Pattern.prefix(); + + auto &V = PrefixToGlob.emplace(Prefix).first->second; + V.emplace_back(&G); + } } void SpecialCaseList::GlobMatcher::match( StringRef Query, llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { - for (const auto &G : reverse(Globs)) - if (G.Pattern.match(Query)) - return Cb(G.Name, G.LineNo); + if (!PrefixToGlob.empty()) { + for (const auto &[_, V] : PrefixToGlob.find_prefixes(Query)) { + for (const auto *G : V) { + if (G->Pattern.match(Query)) { + Cb(G->Name, G->LineNo); + // As soon as we find a match in the vector, we can break for this + // vector, since the globs are already sorted by priority within the + // prefix group. However, we continue searching other prefix groups in + // the map, as they may contain a better match overall. + break; + } + } + } + } } SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d5117da..457e540 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5151,7 +5151,15 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // GPR32 zeroing if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) { - if (Subtarget.hasZeroCycleZeroingGPR32()) { + if (Subtarget.hasZeroCycleZeroingGPR64() && + !Subtarget.hasZeroCycleZeroingGPR32()) { + MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + assert(DestRegX.isValid() && "Destination super-reg not valid"); + BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else if (Subtarget.hasZeroCycleZeroingGPR32()) { BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) .addImm(0) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 6261fad..706ab2b 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -160,6 +160,14 @@ FunctionPass *createX86PartialReductionPass(); /// // Analyzes and emits pseudos to support Win x64 Unwind V2. FunctionPass *createX86WinEHUnwindV2Pass(); +/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics +/// or split the data to two <128 x i32>. +FunctionPass *createX86LowerAMXTypePass(); + +/// The pass transforms amx intrinsics to scalar operation if the function has +/// optnone attribute or it is O0. +FunctionPass *createX86LowerAMXIntrinsicsPass(); + InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &, const X86RegisterBankInfo &); diff --git a/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir b/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir index f34d3ed..6b2a31b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir +++ b/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir @@ -35,7 +35,7 @@ body: | ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-LABEL: name: f0 ; CHECK-NOZCZ-GPR32-ZCZ-GPR64: liveins: $x0, $lr ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: {{ $}} - ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: $w0 = ORRWrr $wzr, $wzr + ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: $x0 = MOVZXi 0, 0 ; CHECK-NOZCZ-GPR32-ZCZ-GPR64-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 ; ; CHECK-ZCZ-GPR32-ZCZ-GPR64-LABEL: name: f0 diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-gpr.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-gpr.ll index dc64306..0f284aa 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-gpr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-gpr.ll @@ -1,41 +1,44 @@ -; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR32-NOZCZ-GPR64 ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gpr32 | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32 -; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gpr64 | FileCheck %s -check-prefixes=ALL,ZCZ-GPR64 -; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gpr64 | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR32-ZCZ-GPR64 +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR32-NOZCZ-GPR64 ; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32,ZCZ-GPR64 ; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32,ZCZ-GPR64 -; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR32-NOZCZ-GPR64 ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=kryo | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32,ZCZ-GPR64 ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=falkor | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32,ZCZ-GPR64 define i8 @ti8() { entry: ; ALL-LABEL: ti8: -; NOZCZ-GPR: mov w0, wzr +; NOZCZ-GPR32-NOZCZ-GPR64: mov w0, wzr ; ZCZ-GPR32: mov w0, #0 +; NOZCZ-GPR32-ZCZ-GPR64: mov x0, #0 ret i8 0 } define i16 @ti16() { entry: ; ALL-LABEL: ti16: -; NOZCZ-GPR: mov w0, wzr +; NOZCZ-GPR32-NOZCZ-GPR64: mov w0, wzr ; ZCZ-GPR32: mov w0, #0 +; NOZCZ-GPR32-ZCZ-GPR64: mov x0, #0 ret i16 0 } define i32 @ti32() { entry: ; ALL-LABEL: ti32: -; NOZCZ-GPR: mov w0, wzr +; NOZCZ-GPR32-NOZCZ-GPR64: mov w0, wzr ; ZCZ-GPR32: mov w0, #0 +; NOZCZ-GPR32-ZCZ-GPR64: mov x0, #0 ret i32 0 } define i64 @ti64() { entry: ; ALL-LABEL: ti64: -; NOZCZ-GPR: mov x0, xzr +; NOZCZ-GPR32-NOZCZ-GPR64 mov x0, xzr ; ZCZ-GPR64: mov x0, #0 ret i64 0 } diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/br1.ll b/llvm/test/CodeGen/Mips/Fast-ISel/br1.ll index b5bdf84..9630dab 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/br1.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/br1.ll @@ -31,4 +31,4 @@ if.end: ; preds = %entry, %if.then } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/loadstore2.ll b/llvm/test/CodeGen/Mips/Fast-ISel/loadstore2.ll index a5c1cec0..d3d2e8b 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/loadstore2.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/loadstore2.ll @@ -80,6 +80,6 @@ entry: ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll b/llvm/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll index bc6f2c5..e685465 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll @@ -17,5 +17,5 @@ entry: } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/logopm.ll b/llvm/test/CodeGen/Mips/Fast-ISel/logopm.ll index 90db1fd..f3b902b 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/logopm.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/logopm.ll @@ -590,8 +590,8 @@ entry: ret void } -attributes #0 = { noinline nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noinline nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } attributes #2 = { nounwind } !llvm.module.flags = !{!0} diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll b/llvm/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll index d1a0574..eca0d16 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll @@ -51,4 +51,4 @@ entry: ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/simplestorei.ll b/llvm/test/CodeGen/Mips/Fast-ISel/simplestorei.ll index ee174dd..33b4ef8 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/simplestorei.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/simplestorei.ll @@ -63,6 +63,6 @@ entry: ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/beqzc.ll b/llvm/test/CodeGen/Mips/beqzc.ll index 28f3f8c..42eb392 100644 --- a/llvm/test/CodeGen/Mips/beqzc.ll +++ b/llvm/test/CodeGen/Mips/beqzc.ll @@ -14,7 +14,7 @@ entry: ret i32 0 } -attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/Mips/beqzc1.ll b/llvm/test/CodeGen/Mips/beqzc1.ll index 915f34e..01bb5f1 100644 --- a/llvm/test/CodeGen/Mips/beqzc1.ll +++ b/llvm/test/CodeGen/Mips/beqzc1.ll @@ -19,6 +19,6 @@ if.end: ; preds = %if.then, %entry ret i32 0 } -attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/Mips/brsize3.ll b/llvm/test/CodeGen/Mips/brsize3.ll index 1aea201..20aab184 100644 --- a/llvm/test/CodeGen/Mips/brsize3.ll +++ b/llvm/test/CodeGen/Mips/brsize3.ll @@ -33,7 +33,7 @@ x: ; preds = %x, %entry } -attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } attributes #1 = { nounwind } !1 = !{i32 45} diff --git a/llvm/test/CodeGen/Mips/brsize3a.ll b/llvm/test/CodeGen/Mips/brsize3a.ll index de866f2..b1ebbd8 100644 --- a/llvm/test/CodeGen/Mips/brsize3a.ll +++ b/llvm/test/CodeGen/Mips/brsize3a.ll @@ -20,7 +20,7 @@ x: ; preds = %x, %entry } -attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } attributes #1 = { nounwind } !1 = !{i32 45} diff --git a/llvm/test/CodeGen/Mips/ci2.ll b/llvm/test/CodeGen/Mips/ci2.ll index a949729..4901d8d 100644 --- a/llvm/test/CodeGen/Mips/ci2.ll +++ b/llvm/test/CodeGen/Mips/ci2.ll @@ -33,7 +33,7 @@ if.end: ; preds = %if.else, %if.then ; constisle .4byte 305419896 # 0x12345678 } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } attributes #1 = { nounwind } !1 = !{i32 103} diff --git a/llvm/test/CodeGen/Mips/cmplarge.ll b/llvm/test/CodeGen/Mips/cmplarge.ll index db7f37a..bfb6080 100644 --- a/llvm/test/CodeGen/Mips/cmplarge.ll +++ b/llvm/test/CodeGen/Mips/cmplarge.ll @@ -33,6 +33,6 @@ for.end: ; preds = %for.body, %entry ; cmp16: .end getSubImagesLuma declare i32 @iClip3(...) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } attributes #2 = { nounwind } diff --git a/llvm/test/CodeGen/Mips/const1.ll b/llvm/test/CodeGen/Mips/const1.ll index 750912d..7915d66 100644 --- a/llvm/test/CodeGen/Mips/const1.ll +++ b/llvm/test/CodeGen/Mips/const1.ll @@ -28,7 +28,7 @@ entry: ; CHECK: .4byte 262991277 } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } !llvm.ident = !{!0} diff --git a/llvm/test/CodeGen/Mips/const4a.ll b/llvm/test/CodeGen/Mips/const4a.ll index 245abbf..e88ffd3 100644 --- a/llvm/test/CodeGen/Mips/const4a.ll +++ b/llvm/test/CodeGen/Mips/const4a.ll @@ -172,8 +172,8 @@ declare void @goo(...) #1 declare void @hoo(...) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } -attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } +attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } !llvm.ident = !{!0} diff --git a/llvm/test/CodeGen/Mips/const6.ll b/llvm/test/CodeGen/Mips/const6.ll index f40eeef..480a958 100644 --- a/llvm/test/CodeGen/Mips/const6.ll +++ b/llvm/test/CodeGen/Mips/const6.ll @@ -154,8 +154,8 @@ entry: declare void @hoo(...) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } -attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } +attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } !llvm.ident = !{!0} diff --git a/llvm/test/CodeGen/Mips/const6a.ll b/llvm/test/CodeGen/Mips/const6a.ll index 720edd3a..eb62e27 100644 --- a/llvm/test/CodeGen/Mips/const6a.ll +++ b/llvm/test/CodeGen/Mips/const6a.ll @@ -23,7 +23,7 @@ entry: ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } attributes #1 = { nounwind } !1 = !{i32 121} diff --git a/llvm/test/CodeGen/Mips/ctlz.ll b/llvm/test/CodeGen/Mips/ctlz.ll index 3cc1569..49eb36f 100644 --- a/llvm/test/CodeGen/Mips/ctlz.ll +++ b/llvm/test/CodeGen/Mips/ctlz.ll @@ -22,6 +22,6 @@ declare i32 @llvm.ctlz.i32(i32, i1) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Mips/delay-slot-fill-forward.ll b/llvm/test/CodeGen/Mips/delay-slot-fill-forward.ll index 7c41641..43fd36b 100644 --- a/llvm/test/CodeGen/Mips/delay-slot-fill-forward.ll +++ b/llvm/test/CodeGen/Mips/delay-slot-fill-forward.ll @@ -161,7 +161,7 @@ if.end461: ; preds = %if.end436, %for.bod ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="mips32r2" "target-features"="+mips32r2,+nooddspreg,+fpxx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="mips32r2" "target-features"="+mips32r2,+nooddspreg,+fpxx" "use-soft-float"="false" } attributes #1 = { nounwind } !llvm.ident = !{!0} diff --git a/llvm/test/CodeGen/Mips/f16abs.ll b/llvm/test/CodeGen/Mips/f16abs.ll index 23bf402..242d8ff 100644 --- a/llvm/test/CodeGen/Mips/f16abs.ll +++ b/llvm/test/CodeGen/Mips/f16abs.ll @@ -29,8 +29,8 @@ declare double @fabs(double) #1 declare float @fabsf(float) #1 -attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } -attributes #1 = { nounwind optsize readnone "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } +attributes #1 = { nounwind optsize readnone "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } attributes #2 = { nounwind optsize readnone } diff --git a/llvm/test/CodeGen/Mips/fp16instrinsmc.ll b/llvm/test/CodeGen/Mips/fp16instrinsmc.ll index 6c29c08..1582605 100644 --- a/llvm/test/CodeGen/Mips/fp16instrinsmc.ll +++ b/llvm/test/CodeGen/Mips/fp16instrinsmc.ll @@ -385,7 +385,7 @@ entry: ; Function Attrs: nounwind declare double @exp2(double) #0 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } +attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } diff --git a/llvm/test/CodeGen/Mips/fpneeded.ll b/llvm/test/CodeGen/Mips/fpneeded.ll index cc82f81..babfcad 100644 --- a/llvm/test/CodeGen/Mips/fpneeded.ll +++ b/llvm/test/CodeGen/Mips/fpneeded.ll @@ -131,7 +131,7 @@ entry: ; 32: .set reorder ; 32: .end foo3 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } define void @vv() #0 { entry: diff --git a/llvm/test/CodeGen/Mips/fpnotneeded.ll b/llvm/test/CodeGen/Mips/fpnotneeded.ll index 761ef30..2b98f7e 100644 --- a/llvm/test/CodeGen/Mips/fpnotneeded.ll +++ b/llvm/test/CodeGen/Mips/fpnotneeded.ll @@ -61,7 +61,7 @@ entry: ; cisle: .end foo -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } define float @fv() #0 { diff --git a/llvm/test/CodeGen/Mips/hf16call32.ll b/llvm/test/CodeGen/Mips/hf16call32.ll index e187b76..33353b6 100644 --- a/llvm/test/CodeGen/Mips/hf16call32.ll +++ b/llvm/test/CodeGen/Mips/hf16call32.ll @@ -1026,5 +1026,5 @@ declare { double, double } @dc_sf(float) #1 ; stel: jr $18 ; stel: .end __call_stub_fp_dc_sf -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/hf16call32_body.ll b/llvm/test/CodeGen/Mips/hf16call32_body.ll index 3bcb6f6..2eea4c3 100644 --- a/llvm/test/CodeGen/Mips/hf16call32_body.ll +++ b/llvm/test/CodeGen/Mips/hf16call32_body.ll @@ -303,4 +303,4 @@ entry: ; stel: $__fn_local_sf_df_df = sf_df_df ; stel: .end __fn_stub_sf_df_df -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/hfptrcall.ll b/llvm/test/CodeGen/Mips/hfptrcall.ll index 920c694..2babc67 100644 --- a/llvm/test/CodeGen/Mips/hfptrcall.ll +++ b/llvm/test/CodeGen/Mips/hfptrcall.ll @@ -118,8 +118,8 @@ entry: declare i32 @printf(ptr, ...) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/l3mc.ll b/llvm/test/CodeGen/Mips/l3mc.ll index 440da3a..dc68eaf 100644 --- a/llvm/test/CodeGen/Mips/l3mc.ll +++ b/llvm/test/CodeGen/Mips/l3mc.ll @@ -99,7 +99,7 @@ entry: ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } ; __call_stub_fp___fixunsdfsi: __call_stub_fp___fixunsdfsi: ; __call_stub_fp___floatdidf: __call_stub_fp___floatdidf: diff --git a/llvm/test/CodeGen/Mips/lcb2.ll b/llvm/test/CodeGen/Mips/lcb2.ll index 036de38..79f4b43 100644 --- a/llvm/test/CodeGen/Mips/lcb2.ll +++ b/llvm/test/CodeGen/Mips/lcb2.ll @@ -115,7 +115,7 @@ if.end: ; preds = %if.then, %entry ; lcb: .end btz ; lcbn: .end btz -attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } attributes #1 = { nounwind } !llvm.ident = !{!0} diff --git a/llvm/test/CodeGen/Mips/lcb3c.ll b/llvm/test/CodeGen/Mips/lcb3c.ll index 40912f3..dd88924 100644 --- a/llvm/test/CodeGen/Mips/lcb3c.ll +++ b/llvm/test/CodeGen/Mips/lcb3c.ll @@ -51,7 +51,7 @@ if.end: ; preds = %if.else, %if.then ; lcb: jal $BB1_2 # branch ; lcb: $BB1_1: # %if.then -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/Mips/lcb4a.ll b/llvm/test/CodeGen/Mips/lcb4a.ll index a0258b1..ad843bb 100644 --- a/llvm/test/CodeGen/Mips/lcb4a.ll +++ b/llvm/test/CodeGen/Mips/lcb4a.ll @@ -55,7 +55,7 @@ if.end: ; preds = %if.else, %if.then ; ci: nop ; ci: $BB1_1: # %if.else -attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/Mips/lcb5.ll b/llvm/test/CodeGen/Mips/lcb5.ll index 22baeba..0d479ff 100644 --- a/llvm/test/CodeGen/Mips/lcb5.ll +++ b/llvm/test/CodeGen/Mips/lcb5.ll @@ -216,7 +216,7 @@ if.end: ; preds = %if.then, %entry ; ci: .p2align 2 ; ci: .end z4 -attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/Mips/mbrsize4a.ll b/llvm/test/CodeGen/Mips/mbrsize4a.ll index b8d2e2d..e6c620a 100644 --- a/llvm/test/CodeGen/Mips/mbrsize4a.ll +++ b/llvm/test/CodeGen/Mips/mbrsize4a.ll @@ -30,8 +30,8 @@ declare i32 @foo(...) #1 declare i32 @printf(ptr, ...) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } attributes #2 = { nounwind } !1 = !{i32 68} diff --git a/llvm/test/CodeGen/Mips/micromips-attr.ll b/llvm/test/CodeGen/Mips/micromips-attr.ll index 8e70cc6..1915f3b 100644 --- a/llvm/test/CodeGen/Mips/micromips-attr.ll +++ b/llvm/test/CodeGen/Mips/micromips-attr.ll @@ -24,7 +24,7 @@ attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" - "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" + "stack-protector-buffer-size"="8" "use-soft-float"="false" } @@ -34,6 +34,6 @@ attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" - "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" + "stack-protector-buffer-size"="8" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/mips16-hf-attr-2.ll b/llvm/test/CodeGen/Mips/mips16-hf-attr-2.ll index 80294b5..eaa39e9 100644 --- a/llvm/test/CodeGen/Mips/mips16-hf-attr-2.ll +++ b/llvm/test/CodeGen/Mips/mips16-hf-attr-2.ll @@ -28,18 +28,18 @@ attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" - "unsafe-fp-math"="false" "use-soft-float"="false" + "use-soft-float"="false" } attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" - "unsafe-fp-math"="false" "use-soft-float"="true" + "use-soft-float"="true" } attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" - "unsafe-fp-math"="false" "use-soft-float"="true" + "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/Mips/mips16-hf-attr.ll b/llvm/test/CodeGen/Mips/mips16-hf-attr.ll index c8af712..cafa2d5 100644 --- a/llvm/test/CodeGen/Mips/mips16-hf-attr.ll +++ b/llvm/test/CodeGen/Mips/mips16-hf-attr.ll @@ -28,18 +28,18 @@ attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" - "unsafe-fp-math"="false" "use-soft-float"="false" + "use-soft-float"="false" } attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" - "unsafe-fp-math"="false" "use-soft-float"="true" + "use-soft-float"="true" } attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" - "unsafe-fp-math"="false" "use-soft-float"="true" + "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/Mips/mips16_32_1.ll b/llvm/test/CodeGen/Mips/mips16_32_1.ll index 0d02022..963fb58 100644 --- a/llvm/test/CodeGen/Mips/mips16_32_1.ll +++ b/llvm/test/CodeGen/Mips/mips16_32_1.ll @@ -10,4 +10,4 @@ entry: ; CHECK: .ent foo ; CHECK: jrc $ra ; CHECK: .end foo -attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/mips16_32_10.ll b/llvm/test/CodeGen/Mips/mips16_32_10.ll index 86378ff..e0d6859 100644 --- a/llvm/test/CodeGen/Mips/mips16_32_10.ll +++ b/llvm/test/CodeGen/Mips/mips16_32_10.ll @@ -53,6 +53,6 @@ entry: -attributes #0 = { nounwind "less-precise-fpmad"="false" "nomips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "nomips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/mips16_32_3.ll b/llvm/test/CodeGen/Mips/mips16_32_3.ll index ee33abc..dc2fe29 100644 --- a/llvm/test/CodeGen/Mips/mips16_32_3.ll +++ b/llvm/test/CodeGen/Mips/mips16_32_3.ll @@ -62,6 +62,6 @@ entry: ; 32: .set reorder ; 32: .end main -attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "use-soft-float"="false" } +attributes #2 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/mips16_32_4.ll b/llvm/test/CodeGen/Mips/mips16_32_4.ll index da926342..2fed74d 100644 --- a/llvm/test/CodeGen/Mips/mips16_32_4.ll +++ b/llvm/test/CodeGen/Mips/mips16_32_4.ll @@ -56,6 +56,6 @@ entry: ; 32: .end main -attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "use-soft-float"="false" } +attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/mips16_32_5.ll b/llvm/test/CodeGen/Mips/mips16_32_5.ll index 6692460..2bbe778 100644 --- a/llvm/test/CodeGen/Mips/mips16_32_5.ll +++ b/llvm/test/CodeGen/Mips/mips16_32_5.ll @@ -73,6 +73,6 @@ entry: -attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "use-soft-float"="false" } +attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/mips16_32_6.ll b/llvm/test/CodeGen/Mips/mips16_32_6.ll index 5a464a2..0503b3f 100644 --- a/llvm/test/CodeGen/Mips/mips16_32_6.ll +++ b/llvm/test/CodeGen/Mips/mips16_32_6.ll @@ -80,6 +80,6 @@ entry: -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "use-soft-float"="false" } +attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/mips16_32_7.ll b/llvm/test/CodeGen/Mips/mips16_32_7.ll index 236f791..2b2dd8b 100644 --- a/llvm/test/CodeGen/Mips/mips16_32_7.ll +++ b/llvm/test/CodeGen/Mips/mips16_32_7.ll @@ -68,6 +68,6 @@ entry: -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "use-soft-float"="false" } +attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/mips16_32_8.ll b/llvm/test/CodeGen/Mips/mips16_32_8.ll index 5c0cd32..1aff91c 100644 --- a/llvm/test/CodeGen/Mips/mips16_32_8.ll +++ b/llvm/test/CodeGen/Mips/mips16_32_8.ll @@ -67,7 +67,7 @@ entry: ; 32: .set reorder ; 32: .end main -attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "use-soft-float"="false" } +attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/mips16_32_9.ll b/llvm/test/CodeGen/Mips/mips16_32_9.ll index 609f054..82d7727 100644 --- a/llvm/test/CodeGen/Mips/mips16_32_9.ll +++ b/llvm/test/CodeGen/Mips/mips16_32_9.ll @@ -44,6 +44,6 @@ entry: -attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/nomips16.ll b/llvm/test/CodeGen/Mips/nomips16.ll index 62564f9..6b51eb9 100644 --- a/llvm/test/CodeGen/Mips/nomips16.ll +++ b/llvm/test/CodeGen/Mips/nomips16.ll @@ -33,6 +33,6 @@ entry: ; CHECK: .end nofoo -attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "stack-protector-buffer-size"="8" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/pbqp-reserved-physreg.ll b/llvm/test/CodeGen/Mips/pbqp-reserved-physreg.ll index 63a730c..a8eab07 100644 --- a/llvm/test/CodeGen/Mips/pbqp-reserved-physreg.ll +++ b/llvm/test/CodeGen/Mips/pbqp-reserved-physreg.ll @@ -31,5 +31,5 @@ bb35: ; preds = %bb unreachable } -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/powif64_16.ll b/llvm/test/CodeGen/Mips/powif64_16.ll index 3443b62..914ef94 100644 --- a/llvm/test/CodeGen/Mips/powif64_16.ll +++ b/llvm/test/CodeGen/Mips/powif64_16.ll @@ -17,7 +17,7 @@ define double @foo_pow_f64(double %y, i32 %p) { ret double %1 } -attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } attributes #1 = { nounwind readonly } !0 = !{!"double", !1} diff --git a/llvm/test/CodeGen/Mips/s2rem.ll b/llvm/test/CodeGen/Mips/s2rem.ll index fdf06ce..5d324cb 100644 --- a/llvm/test/CodeGen/Mips/s2rem.ll +++ b/llvm/test/CodeGen/Mips/s2rem.ll @@ -86,7 +86,7 @@ entry: declare void @vf(float) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/sel1c.ll b/llvm/test/CodeGen/Mips/sel1c.ll index 071f988..2aaf56d 100644 --- a/llvm/test/CodeGen/Mips/sel1c.ll +++ b/llvm/test/CodeGen/Mips/sel1c.ll @@ -16,6 +16,6 @@ entry: ; cond-b-short: bteqz $BB0_{{[0-9]+}} # 16 bit inst } -attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/Mips/sel2c.ll b/llvm/test/CodeGen/Mips/sel2c.ll index 0c3b957..44de4ac9 100644 --- a/llvm/test/CodeGen/Mips/sel2c.ll +++ b/llvm/test/CodeGen/Mips/sel2c.ll @@ -16,6 +16,6 @@ entry: ret void } -attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/Mips/simplebr.ll b/llvm/test/CodeGen/Mips/simplebr.ll index cfe547f..ae09d85 100644 --- a/llvm/test/CodeGen/Mips/simplebr.ll +++ b/llvm/test/CodeGen/Mips/simplebr.ll @@ -31,7 +31,7 @@ declare void @goo(...) #1 declare void @hoo(...) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } -attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } +attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/Mips/sr1.ll b/llvm/test/CodeGen/Mips/sr1.ll index c6fa9fc..6c42d45 100644 --- a/llvm/test/CodeGen/Mips/sr1.ll +++ b/llvm/test/CodeGen/Mips/sr1.ll @@ -50,7 +50,7 @@ entry: declare float @xf() #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Mips/tnaked.ll b/llvm/test/CodeGen/Mips/tnaked.ll index ac54f2f..287c009 100644 --- a/llvm/test/CodeGen/Mips/tnaked.ll +++ b/llvm/test/CodeGen/Mips/tnaked.ll @@ -25,5 +25,5 @@ entry: ; CHECK: .fmask 0x00000000,0 ; CHECK: addiu $sp, $sp, -8 -attributes #0 = { naked noinline nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { naked noinline nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll index 63fd184..2570b3b 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll @@ -951,3 +951,41 @@ fallthrough: %v = add i32 %v1, %v2 ret i32 %v } + +; Make sure we don't simplify an incomplete expression tree. +define i8 @pr163453(ptr %p, i1 %cond) { +; CHECK-LABEL: define i8 @pr163453( +; CHECK-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P_ADDR_0:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], ptr [[P_ADDR_0]], ptr [[INCDEC_PTR11]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[SPEC_SELECT]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + br label %for.cond + +for.cond: + %p.pn = phi ptr [ %p, %entry ], [ %p.addr.0, %for.inc ] + %p.addr.0 = getelementptr i8, ptr %p.pn, i64 1 + br i1 false, label %exit, label %for.body + +for.body: + %1 = load i8, ptr %p.pn, align 1 + br i1 false, label %for.inc, label %if.else + +if.else: + %incdec.ptr11 = getelementptr i8, ptr %p.pn, i64 2 + %spec.select = select i1 %cond, ptr %p.addr.0, ptr %incdec.ptr11 + br label %exit + +for.inc: + br label %for.cond + +exit: + %p.addr.3 = phi ptr [ %spec.select, %if.else ], [ %p.addr.0, %for.cond ] + %load = load i8, ptr %p.addr.3, align 1 + ret i8 %load +} diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index d74abc2..37db096 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -923,10 +923,10 @@ def AMDGPU_MFMAOp : AMDGPU_Op<"mfma", [AllTypesMatch<["destC", "destD"]>, Pure]>, Arguments<(ins - I32Attr:$m, - I32Attr:$n, - I32Attr:$k, - I32Attr:$blocks, + ConfinedAttr<I32Attr, [IntIsOneOf<[4, 16, 32]>]>:$m, + ConfinedAttr<I32Attr, [IntIsOneOf<[4, 16, 32]>]>:$n, + ConfinedAttr<I32Attr, [IntIsOneOf<[1, 2, 4, 8, 16, 32, 64, 128]>]>:$k, + DefaultValuedAttr<ConfinedAttr<I32Attr, [IntIsOneOf<[1, 2, 4, 16]>]>, "1">:$blocks, MFMAInTypes:$sourceA, MFMAInTypes:$sourceB, MFMAOutTypes:$destC, @@ -969,14 +969,16 @@ def AMDGPU_MFMAOp : Example: ```mlir - %0 = amdgpu.mfma %matA * %matB + %matC - { abid = 1 : i32, cbsz = 1 : i32, - m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32 } + %0 = amdgpu.mfma 16x16x16 %matA * %matB + %matC + : vector<4xf16>, vector<4xf16>, vector<4xf32> + + %1 = amdgpu.mfma 32x32x1 %matD * %matE + %matF + { abid = 1 : i32, cbsz = 1 : i32, blocks = 2 : i32 } blgp = bcast_second_32 : f32, f32, vector<32xf32> ``` }]; let assemblyFormat = [{ - $sourceA `*` $sourceB `+` $destC + custom<MNKDimensionList>($m, $n, $k) $sourceA `*` $sourceB `+` $destC attr-dict `blgp` `=` $blgp `:` type($sourceA) `,` type($sourceB) `,` type($destC) @@ -1109,9 +1111,9 @@ def AMDGPU_ScaledMFMAOp : AMDGPU_Op<"scaled_mfma", [AllTypesMatch<["destC", "destD"]>, Pure]>, Arguments<(ins - I32Attr:$m, - I32Attr:$n, - I32Attr:$k, + ConfinedAttr<I32Attr, [IntIsOneOf<[16, 32]>]>:$m, + ConfinedAttr<I32Attr, [IntIsOneOf<[16, 32]>]>:$n, + ConfinedAttr<I32Attr, [IntIsOneOf<[64, 128]>]>:$k, ScaledMFMAInTypes:$sourceA, ScaledMFMAInTypes:$sourceB, ScaledMFMAOutTypes:$destC, @@ -1124,8 +1126,8 @@ def AMDGPU_ScaledMFMAOp : let summary = "MLIR wrapper for CDNA scaled mfma instructions"; let description = [{ The `amdgpu.scaled_mfma` op is an MLIR wrapper around intrinsics - for various scaled versions of `mfma` instructions in the CDNA architecture, which perform - multiple outer products in order to allow fast matrix multiplication. + for various scaled versions of `mfma` instructions in the CDNA architecture, which + perform multiple outer products in order to allow fast matrix multiplication. The wrapper will select an appropriate `mfma` instruction, if one is available, based on the provided `m`, `k`, `n`, and `nBlks` attributes, along with the @@ -1140,15 +1142,23 @@ def AMDGPU_ScaledMFMAOp : This wrapper takes inspiration from `amdgpu.mfma`, but has some key differences: - `amdgpu.scaled_mfma` operates on fp4 (f4E2M1FN), fp6 (f6E2M3FN and f6E3M2FN) and - fp8 (f8E4M3FN and f8E5M2) types using either M=N=16, K=128 or M=N=32, K=64 as their tile - size. + fp8 (f8E4M3FN and f8E5M2) types using either M=N=16, K=128 or M=N=32, K=64 as + their tile size. - `amdgpu.scaled_mfma` does not support broadcasting. So, `cbsz`, `abid`, and `blgp` - are omitted from this wrapper. - - The `negateA`, `negateB`, and `negateC` flags in `amdgpu.mfma` are only supported for - double-precision operations on gfx94x and so are not included here. + are omitted from this wrapper. + - The `negateA`, `negateB`, and `negateC` flags in `amdgpu.mfma` are only supported + for double-precision operations on gfx94x and so are not included here. + + Example: + ```mlir + %0 = amdgpu.scaled_mfma 32x32x64 (%arg0[0] * %arg1) * (%arg0[1] * %arg1) + %arg2 + : vector<4xf8E8M0FNU>, vector<32xf6E2M3FN>, f8E8M0FNU, vector<32xf6E2M3FN>, vector<16xf32> + ``` }]; let assemblyFormat = [{ - `(` $scalesA `[` $scalesIdxA `]` `*` $sourceA `)` `*` `(` $scalesB `[` $scalesIdxB `]` `*` $sourceB `)` `+` $destC + custom<MNKDimensionList>($m, $n, $k) ` ` + `(` $scalesA `[` $scalesIdxA `]` `*` $sourceA `)` `*` + `(` $scalesB `[` $scalesIdxB `]` `*` $sourceB `)` `+` $destC attr-dict `:` type($scalesA) `,` type($sourceA) `,` type($scalesB) `,` type($sourceB) `,` type($destC) }]; diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp index 4c4965e..585b6da 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp +++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp @@ -422,11 +422,11 @@ LogicalResult MFMAOp::verify() { Type sourceElem = sourceType, destElem = destType; uint32_t sourceLen = 1, destLen = 1; - if (auto sourceVector = llvm::dyn_cast<VectorType>(sourceType)) { + if (auto sourceVector = dyn_cast<VectorType>(sourceType)) { sourceLen = sourceVector.getNumElements(); sourceElem = sourceVector.getElementType(); } - if (auto destVector = llvm::dyn_cast<VectorType>(destType)) { + if (auto destVector = dyn_cast<VectorType>(destType)) { destLen = destVector.getNumElements(); destElem = destVector.getElementType(); } @@ -451,7 +451,7 @@ LogicalResult MFMAOp::verify() { return emitOpError("expected both non-small-float source operand types " "to match exactly"); } - // Normalize the wider integer types the compiler expects to i8 + // Normalize the wider integer types the compiler expects to i8. if (sourceElem.isInteger(32)) { sourceLen *= 4; sourceElem = b.getI8Type(); diff --git a/mlir/test/Conversion/AMDGPUToROCDL/mfma-gfx950.mlir b/mlir/test/Conversion/AMDGPUToROCDL/mfma-gfx950.mlir index 39c31d5..c746d76 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/mfma-gfx950.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/mfma-gfx950.mlir @@ -8,46 +8,46 @@ func.func @mfma_to_rocdl(%arg0 : vector<8xf16>, %arg1 : vector<16xf32>, // CHECK: %[[c0:.+]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: rocdl.mfma.f32.32x32x16.f16{{.*}}: (vector<8xf16>, vector<8xf16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg0 * %arg0 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<8xf16>, vector<8xf16>, vector<16xf32> + amdgpu.mfma 32x32x16 %arg0 * %arg0 + %arg1 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf16>, vector<8xf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.16x16x32.f16{{.*}}: (vector<8xf16>, vector<8xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg0 * %arg0 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xf16>, vector<8xf16>, vector<4xf32> + amdgpu.mfma 16x16x32 %arg0 * %arg0 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf16>, vector<8xf16>, vector<4xf32> // CHECK: rocdl.mfma.f32.32x32x16.bf16{{.*}}: (vector<8xbf16>, vector<8xbf16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg3 * %arg3 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<8xbf16>, vector<8xbf16>, vector<16xf32> + amdgpu.mfma 32x32x16 %arg3 * %arg3 + %arg1 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xbf16>, vector<8xbf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.16x16x32.bf16{{.*}}: (vector<8xbf16>, vector<8xbf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg3 * %arg3 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xbf16>, vector<8xbf16>, vector<4xf32> + amdgpu.mfma 16x16x32 %arg3 * %arg3 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xbf16>, vector<8xbf16>, vector<4xf32> // CHECK: rocdl.mfma.i32.32x32x32.i8{{.*}}: (vector<4xi32>, vector<4xi32>, vector<16xi32>, i32, i32, i32) -> vector<16xi32> - amdgpu.mfma %arg4 * %arg4 + %arg5 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<16xi8>, vector<16xi8>, vector<16xi32> + amdgpu.mfma 32x32x32 %arg4 * %arg4 + %arg5 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<16xi8>, vector<16xi8>, vector<16xi32> // CHECK: rocdl.mfma.i32.16x16x64.i8{{.*}}: (vector<4xi32>, vector<4xi32>, vector<4xi32>, i32, i32, i32) -> vector<4xi32> - amdgpu.mfma %arg4 * %arg4 + %arg6 { abid = 0 : i32, cbsz = 0 : i32, k = 64 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<16xi8>, vector<16xi8>, vector<4xi32> + amdgpu.mfma 16x16x64 %arg4 * %arg4 + %arg6 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<16xi8>, vector<16xi8>, vector<4xi32> // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c0]], %[[c0]], %[[c0]], %[[c0]]{{.*}}: (vector<8xi32>, vector<8xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg7 * %arg7 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 64 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<32xf8E4M3FN>, vector<32xf8E4M3FN>, vector<16xf32> + amdgpu.mfma 32x32x64 %arg7 * %arg7 + %arg1 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf8E4M3FN>, vector<32xf8E4M3FN>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c0]], %[[c0]], %[[c0]], %[[c0]]{{.*}}: (vector<8xi32>, vector<8xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg7 * %arg7 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 128 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<32xf8E4M3FN>, vector<32xf8E4M3FN>, vector<4xf32> + amdgpu.mfma 16x16x128 %arg7 * %arg7 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf8E4M3FN>, vector<32xf8E4M3FN>, vector<4xf32> // CHECK: %[[c1:.+]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c1]], %[[c1]], %[[c0]], %[[c0]]{{.*}}: (vector<8xi32>, vector<8xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg8 * %arg8 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 64 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<32xf8E5M2>, vector<32xf8E5M2>, vector<16xf32> + amdgpu.mfma 32x32x64 %arg8 * %arg8 + %arg1 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf8E5M2>, vector<32xf8E5M2>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c1]], %[[c1]], %[[c0]], %[[c0]]{{.*}}: (vector<8xi32>, vector<8xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg8 * %arg8 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 128 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<32xf8E5M2>, vector<32xf8E5M2>, vector<4xf32> + amdgpu.mfma 16x16x128 %arg8 * %arg8 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf8E5M2>, vector<32xf8E5M2>, vector<4xf32> // CHECK: %[[c2:.+]] = llvm.mlir.constant(2 : i32) : i32 // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c2]], %[[c2]], %[[c0]], %[[c0]]{{.*}}: (vector<6xi32>, vector<6xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg9 * %arg9 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 64 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<32xf6E2M3FN>, vector<32xf6E2M3FN>, vector<16xf32> + amdgpu.mfma 32x32x64 %arg9 * %arg9 + %arg1 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf6E2M3FN>, vector<32xf6E2M3FN>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c2]], %[[c2]], %[[c0]], %[[c0]]{{.*}}: (vector<6xi32>, vector<6xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg9 * %arg9 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 128 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<32xf6E2M3FN>, vector<32xf6E2M3FN>, vector<4xf32> + amdgpu.mfma 16x16x128 %arg9 * %arg9 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf6E2M3FN>, vector<32xf6E2M3FN>, vector<4xf32> // CHECK: %[[c3:.+]] = llvm.mlir.constant(3 : i32) : i32 // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c3]], %[[c3]], %[[c0]], %[[c0]]{{.*}}: (vector<6xi32>, vector<6xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg10 * %arg10 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 64 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<32xf6E3M2FN>, vector<32xf6E3M2FN>, vector<16xf32> + amdgpu.mfma 32x32x64 %arg10 * %arg10 + %arg1 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf6E3M2FN>, vector<32xf6E3M2FN>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c3]], %[[c3]], %[[c0]], %[[c0]]{{.*}}: (vector<6xi32>, vector<6xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg10 * %arg10 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 128 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<32xf6E3M2FN>, vector<32xf6E3M2FN>, vector<4xf32> + amdgpu.mfma 16x16x128 %arg10 * %arg10 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf6E3M2FN>, vector<32xf6E3M2FN>, vector<4xf32> // CHECK-DAG: %[[c4:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c4]], %[[c4]], %[[c0]], %[[c0]]{{.*}}: (vector<4xi32>, vector<4xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg11 * %arg11 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 64 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<32xf4E2M1FN>, vector<32xf4E2M1FN>, vector<16xf32> + amdgpu.mfma 32x32x64 %arg11 * %arg11 + %arg1 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf4E2M1FN>, vector<32xf4E2M1FN>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c4]], %[[c4]], %[[c0]], %[[c0]]{{.*}}: (vector<4xi32>, vector<4xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg11 * %arg11 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 128 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<32xf4E2M1FN>, vector<32xf4E2M1FN>, vector<4xf32> + amdgpu.mfma 16x16x128 %arg11 * %arg11 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf4E2M1FN>, vector<32xf4E2M1FN>, vector<4xf32> // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c2]], %[[c4]], %[[c0]], %[[c0]]{{.*}}: (vector<6xi32>, vector<4xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg9 * %arg11 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 64 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<32xf6E2M3FN>, vector<32xf4E2M1FN>, vector<16xf32> + amdgpu.mfma 32x32x64 %arg9 * %arg11 + %arg1 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf6E2M3FN>, vector<32xf4E2M1FN>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c2]], %[[c4]], %[[c0]], %[[c0]]{{.*}}: (vector<6xi32>, vector<4xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg9 * %arg11 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 128 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<32xf6E2M3FN>, vector<32xf4E2M1FN>, vector<4xf32> + amdgpu.mfma 16x16x128 %arg9 * %arg11 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<32xf6E2M3FN>, vector<32xf4E2M1FN>, vector<4xf32> func.return } @@ -55,50 +55,50 @@ func.func @mfma_to_rocdl(%arg0 : vector<8xf16>, %arg1 : vector<16xf32>, // CHECK-LABEL: func @scaled_mfma_to_rocdl( // CHECK-SAME: %[[ARG0:.*]]: vector<16xf32>, %[[ARG1:.*]]: vector<4xf32>, %[[ARG2:.*]]: vector<32xf8E4M3FN>, %[[ARG3:.*]]: vector<32xf8E5M2>, %[[ARG4:.*]]: vector<32xf6E2M3FN>, %[[ARG5:.*]]: vector<32xf6E3M2FN>, %[[ARG6:.*]]: vector<32xf4E2M1FN>, %[[ARG7:.*]]: vector<4xf8E8M0FNU>, %[[ARG8:.*]]: f8E8M0FNU func.func @scaled_mfma_to_rocdl(%arg0 : vector<16xf32>, - %arg1 : vector<4xf32>, %arg2 : vector<32xf8E4M3FN>, - %arg3 : vector<32xf8E5M2>, %arg4 : vector<32xf6E2M3FN>, - %arg5 : vector<32xf6E3M2FN>, %arg6 : vector<32xf4E2M1FN>, - %arg7 : vector<4xf8E8M0FNU>, %arg8 : f8E8M0FNU) { - + %arg1 : vector<4xf32>, %arg2 : vector<32xf8E4M3FN>, + %arg3 : vector<32xf8E5M2>, %arg4 : vector<32xf6E2M3FN>, + %arg5 : vector<32xf6E3M2FN>, %arg6 : vector<32xf4E2M1FN>, + %arg7 : vector<4xf8E8M0FNU>, %arg8 : f8E8M0FNU) { + // CHECK: %[[c0:.+]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[c1:.+]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[b0:.+]] = llvm.bitcast {{.*}} : vector<4xi8> to i32 // CHECK: %[[z0:.+]] = llvm.zext {{.*}} : i8 to i32 // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<8xi32>, vector<8xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg2) * (%arg8[1] * %arg2) + %arg0 { k = 64 : i32, m = 32 : i32, n = 32 : i32 } : vector<4xf8E8M0FNU>, vector<32xf8E4M3FN>, f8E8M0FNU, vector<32xf8E4M3FN>, vector<16xf32> + amdgpu.scaled_mfma 32x32x64 (%arg7[0] * %arg2) * (%arg8[1] * %arg2) + %arg0 : vector<4xf8E8M0FNU>, vector<32xf8E4M3FN>, f8E8M0FNU, vector<32xf8E4M3FN>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<8xi32>, vector<8xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg2) * (%arg8[1] * %arg2) + %arg1 { k = 128 : i32, m = 16 : i32, n = 16 : i32 } : vector<4xf8E8M0FNU>, vector<32xf8E4M3FN>, f8E8M0FNU, vector<32xf8E4M3FN>, vector<4xf32> - + amdgpu.scaled_mfma 16x16x128 (%arg7[0] * %arg2) * (%arg8[1] * %arg2) + %arg1 : vector<4xf8E8M0FNU>, vector<32xf8E4M3FN>, f8E8M0FNU, vector<32xf8E4M3FN>, vector<4xf32> + // CHECK: llvm.bitcast - + // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<8xi32>, vector<8xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg3) * (%arg8[1] * %arg3) + %arg0 { k = 64 : i32, m = 32 : i32, n = 32 : i32 } : vector<4xf8E8M0FNU>, vector<32xf8E5M2>, f8E8M0FNU, vector<32xf8E5M2>, vector<16xf32> + amdgpu.scaled_mfma 32x32x64 (%arg7[0] * %arg3) * (%arg8[1] * %arg3) + %arg0 : vector<4xf8E8M0FNU>, vector<32xf8E5M2>, f8E8M0FNU, vector<32xf8E5M2>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<8xi32>, vector<8xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg3) * (%arg8[1] * %arg3) + %arg1 { k = 128 : i32, m = 16 : i32, n = 16 : i32 } : vector<4xf8E8M0FNU>, vector<32xf8E5M2>, f8E8M0FNU, vector<32xf8E5M2>, vector<4xf32> - + amdgpu.scaled_mfma 16x16x128 (%arg7[0] * %arg3) * (%arg8[1] * %arg3) + %arg1 : vector<4xf8E8M0FNU>, vector<32xf8E5M2>, f8E8M0FNU, vector<32xf8E5M2>, vector<4xf32> + // CHECK: llvm.bitcast - + // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<6xi32>, vector<6xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg4) * (%arg8[1] * %arg4) + %arg0 { k = 64 : i32, m = 32 : i32, n = 32 : i32 } : vector<4xf8E8M0FNU>, vector<32xf6E2M3FN>, f8E8M0FNU, vector<32xf6E2M3FN>, vector<16xf32> + amdgpu.scaled_mfma 32x32x64 (%arg7[0] * %arg4) * (%arg8[1] * %arg4) + %arg0 : vector<4xf8E8M0FNU>, vector<32xf6E2M3FN>, f8E8M0FNU, vector<32xf6E2M3FN>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<6xi32>, vector<6xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg4) * (%arg8[1] * %arg4) + %arg1 { k = 128 : i32, m = 16 : i32, n = 16 : i32 } : vector<4xf8E8M0FNU>, vector<32xf6E2M3FN>, f8E8M0FNU, vector<32xf6E2M3FN>, vector<4xf32> - + amdgpu.scaled_mfma 16x16x128 (%arg7[0] * %arg4) * (%arg8[1] * %arg4) + %arg1 : vector<4xf8E8M0FNU>, vector<32xf6E2M3FN>, f8E8M0FNU, vector<32xf6E2M3FN>, vector<4xf32> + // CHECK: llvm.bitcast // CHECK: llvm.mlir.constant(3 : i32) : i32 // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<6xi32>, vector<6xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg5) * (%arg8[1] * %arg5) + %arg0 { k = 64 : i32, m = 32 : i32, n = 32 : i32 } : vector<4xf8E8M0FNU>, vector<32xf6E3M2FN>, f8E8M0FNU, vector<32xf6E3M2FN>, vector<16xf32> + amdgpu.scaled_mfma 32x32x64 (%arg7[0] * %arg5) * (%arg8[1] * %arg5) + %arg0 : vector<4xf8E8M0FNU>, vector<32xf6E3M2FN>, f8E8M0FNU, vector<32xf6E3M2FN>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<6xi32>, vector<6xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg5) * (%arg8[1] * %arg5) + %arg1 { k = 128 : i32, m = 16 : i32, n = 16 : i32 } : vector<4xf8E8M0FNU>, vector<32xf6E3M2FN>, f8E8M0FNU, vector<32xf6E3M2FN>, vector<4xf32> - + amdgpu.scaled_mfma 16x16x128 (%arg7[0] * %arg5) * (%arg8[1] * %arg5) + %arg1 : vector<4xf8E8M0FNU>, vector<32xf6E3M2FN>, f8E8M0FNU, vector<32xf6E3M2FN>, vector<4xf32> + // CHECK: llvm.bitcast // CHECK: llvm.mlir.constant(4 : i32) : i32 - + // CHECK: rocdl.mfma.scale.f32.32x32x64.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<4xi32>, vector<4xi32>, vector<16xf32>, i32, i32, i32, i32, i32, i32) -> vector<16xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg6) * (%arg8[1] * %arg6) + %arg0 { k = 64 : i32, m = 32 : i32, n = 32 : i32 } : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, f8E8M0FNU, vector<32xf4E2M1FN>, vector<16xf32> + amdgpu.scaled_mfma 32x32x64 (%arg7[0] * %arg6) * (%arg8[1] * %arg6) + %arg0 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, f8E8M0FNU, vector<32xf4E2M1FN>, vector<16xf32> // CHECK: rocdl.mfma.scale.f32.16x16x128.f8f6f4{{.*}}, %[[c0]], %[[b0]], %[[c1]], %[[z0]] : (vector<4xi32>, vector<4xi32>, vector<4xf32>, i32, i32, i32, i32, i32, i32) -> vector<4xf32> - amdgpu.scaled_mfma(%arg7[0] * %arg6) * (%arg8[1] * %arg6) + %arg1 { k = 128 : i32, m = 16 : i32, n = 16 : i32 } : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, f8E8M0FNU, vector<32xf4E2M1FN>, vector<4xf32> + amdgpu.scaled_mfma 16x16x128 (%arg7[0] * %arg6) * (%arg8[1] * %arg6) + %arg1 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, f8E8M0FNU, vector<32xf4E2M1FN>, vector<4xf32> func.return } diff --git a/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir b/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir index 52db142..e292d98 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir @@ -9,89 +9,89 @@ func.func @mfma_to_rocdl(%arg0 : f32, %arg1 : vector<32xf32>, %arg14 : vector<2xf32>, %arg15 : vector<8xf8E5M2FNUZ>, %arg16 : vector<8xf8E4M3FNUZ>) { // CHECK: rocdl.mfma.f32.32x32x1f32{{.*}}: (f32, f32, vector<32xf32>, i32, i32, i32) -> vector<32xf32> - amdgpu.mfma %arg0 * %arg0 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 1 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = none : f32, f32, vector<32xf32> + amdgpu.mfma 32x32x1 %arg0 * %arg0 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, blocks = 2 : i32 } blgp = none : f32, f32, vector<32xf32> // CHECK: rocdl.mfma.f32.16x16x1f32{{.*}}: (f32, f32, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg0 * %arg0 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 1 : i32, m = 16 : i32, n = 16 : i32, blocks = 4 : i32 } blgp = none : f32, f32, vector<16xf32> + amdgpu.mfma 16x16x1 %arg0 * %arg0 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, blocks = 4 : i32 } blgp = none : f32, f32, vector<16xf32> // CHECK: rocdl.mfma.f32.4x4x1f32{{.*}}: (f32, f32, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg0 * %arg0 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 1 : i32, m = 4 : i32, n = 4 : i32, blocks = 16 : i32 } blgp = none : f32, f32, vector<4xf32> + amdgpu.mfma 4x4x1 %arg0 * %arg0 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, blocks = 16 : i32 } blgp = none : f32, f32, vector<4xf32> // CHECK: rocdl.mfma.f32.32x32x2f32{{.*}}: (f32, f32, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg0 * %arg0 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 2 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : f32, f32, vector<16xf32> + amdgpu.mfma 32x32x2 %arg0 * %arg0 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : f32, f32, vector<16xf32> // CHECK: rocdl.mfma.f32.16x16x4f32{{.*}}: (f32, f32, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg0 * %arg0 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : f32, f32, vector<4xf32> + amdgpu.mfma 16x16x4 %arg0 * %arg0 + %arg3 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : f32, f32, vector<4xf32> // CHECK: rocdl.mfma.f32.32x32x4f16{{.*}}: (vector<4xf16>, vector<4xf16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32> - amdgpu.mfma %arg4 * %arg4 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<32xf32> + amdgpu.mfma 32x32x4 %arg4 * %arg4 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, blocks = 2 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<32xf32> // CHECK: rocdl.mfma.f32.16x16x4f16{{.*}}: (vector<4xf16>, vector<4xf16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg4 * %arg4 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 16 : i32, n = 16 : i32, blocks = 4 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<16xf32> + amdgpu.mfma 16x16x4 %arg4 * %arg4 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, blocks = 4 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.4x4x4f16{{.*}}: (vector<4xf16>, vector<4xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg4 * %arg4 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 4 : i32, n = 4 : i32, blocks = 16 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> + amdgpu.mfma 4x4x4 %arg4 * %arg4 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, blocks = 16 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> // CHECK: rocdl.mfma.f32.32x32x8f16{{.*}}: (vector<4xf16>, vector<4xf16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg4 * %arg4 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 8 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<16xf32> + amdgpu.mfma 32x32x8 %arg4 * %arg4 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.16x16x16f16{{.*}}: (vector<4xf16>, vector<4xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg4 * %arg4 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> + amdgpu.mfma 16x16x16 %arg4 * %arg4 + %arg3 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> // CHECK: %[[BITCAST_4xi8_i32:.+]] = llvm.bitcast {{.*}} : vector<4xi8> to i32 // CHECK: rocdl.mfma.i32.32x32x4i8 %[[BITCAST_4xi8_i32]], %[[BITCAST_4xi8_i32]], {{.*}}: (i32, i32, vector<32xi32>, i32, i32, i32) -> vector<32xi32> - amdgpu.mfma %arg5 * %arg5 + %arg6 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<32xi32> + amdgpu.mfma 32x32x4 %arg5 * %arg5 + %arg6 { abid = 0 : i32, cbsz = 0 : i32, blocks = 2 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<32xi32> // CHECK: rocdl.mfma.i32.16x16x4i8{{.*}}: (i32, i32, vector<16xi32>, i32, i32, i32) -> vector<16xi32> - amdgpu.mfma %arg5 * %arg5 + %arg7 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 16 : i32, n = 16 : i32, blocks = 4 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<16xi32> + amdgpu.mfma 16x16x4 %arg5 * %arg5 + %arg7 { abid = 0 : i32, cbsz = 0 : i32, blocks = 4 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<16xi32> // CHECK: rocdl.mfma.i32.4x4x4i8{{.*}}: (i32, i32, vector<4xi32>, i32, i32, i32) -> vector<4xi32> - amdgpu.mfma %arg5 * %arg5 + %arg8 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 4 : i32, n = 4 : i32, blocks = 16 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<4xi32> + amdgpu.mfma 4x4x4 %arg5 * %arg5 + %arg8 { abid = 0 : i32, cbsz = 0 : i32, blocks = 16 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<4xi32> // CHECK: rocdl.mfma.i32.32x32x8i8{{.*}}: (i32, i32, vector<16xi32>, i32, i32, i32) -> vector<16xi32> - amdgpu.mfma %arg5 * %arg5 + %arg7 { abid = 0 : i32, cbsz = 0 : i32, k = 8 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<16xi32> + amdgpu.mfma 32x32x8 %arg5 * %arg5 + %arg7 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<16xi32> // CHECK: rocdl.mfma.i32.16x16x16i8{{.*}}: (i32, i32, vector<4xi32>, i32, i32, i32) -> vector<4xi32> - amdgpu.mfma %arg5 * %arg5 + %arg8 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<4xi32> + amdgpu.mfma 16x16x16 %arg5 * %arg5 + %arg8 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<4xi32> // CHECK: %[[BITCAST_2xbf16_2xi16:.+]] = llvm.bitcast {{.*}} : vector<2xbf16> to vector<2xi16> // CHECK: rocdl.mfma.f32.32x32x2bf16 %[[BITCAST_2xbf16_2xi16]], %[[BITCAST_2xbf16_2xi16]], %{{.*}}: (vector<2xi16>, vector<2xi16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32> - amdgpu.mfma %arg9 * %arg9 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 2 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<32xf32> + amdgpu.mfma 32x32x2 %arg9 * %arg9 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, blocks = 2 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<32xf32> // CHECK: rocdl.mfma.f32.16x16x2bf16{{.*}}: (vector<2xi16>, vector<2xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg9 * %arg9 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 2 : i32, m = 16 : i32, n = 16 : i32, blocks = 4 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<16xf32> + amdgpu.mfma 16x16x2 %arg9 * %arg9 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, blocks = 4 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.4x4x2bf16{{.*}}: (vector<2xi16>, vector<2xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg9 * %arg9 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 2 : i32, m = 4 : i32, n = 4 : i32, blocks = 16 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<4xf32> + amdgpu.mfma 4x4x2 %arg9 * %arg9 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, blocks = 16 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<4xf32> // CHECK: rocdl.mfma.f32.32x32x4bf16{{.*}}: (vector<2xi16>, vector<2xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg9 * %arg9 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<16xf32> + amdgpu.mfma 32x32x4 %arg9 * %arg9 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.16x16x8bf16{{.*}}: (vector<2xi16>, vector<2xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg9 * %arg9 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 8 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<4xf32> + amdgpu.mfma 16x16x8 %arg9 * %arg9 + %arg3 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<4xf32> // CHECK: %[[BITCAST_4xbf16_4xi16:.+]] = llvm.bitcast {{.*}} : vector<4xbf16> to vector<4xi16> // CHECK: rocdl.mfma.f32.32x32x4bf16.1k %[[BITCAST_4xbf16_4xi16]], %[[BITCAST_4xbf16_4xi16]], {{.*}}: (vector<4xi16>, vector<4xi16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32> - amdgpu.mfma %arg10 * %arg10 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<32xf32> + amdgpu.mfma 32x32x4 %arg10 * %arg10 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, blocks = 2 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<32xf32> // CHECK: rocdl.mfma.f32.16x16x4bf16.1k{{.*}}: (vector<4xi16>, vector<4xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg10 * %arg10 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 16 : i32, n = 16 : i32, blocks = 4 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<16xf32> + amdgpu.mfma 16x16x4 %arg10 * %arg10 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, blocks = 4 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.4x4x4bf16.1k{{.*}}: (vector<4xi16>, vector<4xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg10 * %arg10 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 4 : i32, n = 4 : i32, blocks = 16 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<4xf32> + amdgpu.mfma 4x4x4 %arg10 * %arg10 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, blocks = 16 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<4xf32> // CHECK: rocdl.mfma.f32.32x32x8bf16.1k{{.*}}: (vector<4xi16>, vector<4xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg10 * %arg10 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 8 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<16xf32> + amdgpu.mfma 32x32x8 %arg10 * %arg10 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.16x16x16bf16.1k{{.*}}: (vector<4xi16>, vector<4xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg10 * %arg10 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<4xf32> + amdgpu.mfma 16x16x16 %arg10 * %arg10 + %arg3 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<4xf32> // CHECK: rocdl.mfma.f64.16x16x4f64{{.*}}: (f64, f64, vector<4xf64>, i32, i32, i32) -> vector<4xf64> - amdgpu.mfma %arg11 * %arg11 + %arg12 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : f64, f64, vector<4xf64> + amdgpu.mfma 16x16x4 %arg11 * %arg11 + %arg12 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : f64, f64, vector<4xf64> // CHECK: rocdl.mfma.f64.4x4x4f64{{.*}}: (f64, f64, f64, i32, i32, i32) -> f64 - amdgpu.mfma %arg11 * %arg11 + %arg11 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 4 : i32, n = 4 : i32, blocks = 4 : i32 } blgp = none : f64, f64, f64 + amdgpu.mfma 4x4x4 %arg11 * %arg11 + %arg11 { abid = 0 : i32, cbsz = 0 : i32, blocks = 4 : i32 } blgp = none : f64, f64, f64 // CHECK: %[[BITCAST_8xi8_i64:.+]] = llvm.bitcast {{.*}} : vector<8xi8> to i64 // CHECK: rocdl.mfma.i32.16x16x32.i8 %[[BITCAST_8xi8_i64]], %[[BITCAST_8xi8_i64]], {{.*}}: (i64, i64, vector<4xi32>, i32, i32, i32) -> vector<4xi32> - amdgpu.mfma %arg13 * %arg13 + %arg8 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xi8>, vector<8xi8>, vector<4xi32> + amdgpu.mfma 16x16x32 %arg13 * %arg13 + %arg8 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xi8>, vector<8xi8>, vector<4xi32> // CHECK: rocdl.mfma.i32.32x32x16.i8{{.*}}: (i64, i64, vector<16xi32>, i32, i32, i32) -> vector<16xi32> - amdgpu.mfma %arg13 * %arg13 + %arg7 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<8xi8>, vector<8xi8>, vector<16xi32> + amdgpu.mfma 32x32x16 %arg13 * %arg13 + %arg7 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xi8>, vector<8xi8>, vector<16xi32> // CHECK: rocdl.mfma.f32.16x16x8.xf32{{.*}}: (vector<2xf32>, vector<2xf32>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg14 * %arg14 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 8 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32, reducePrecision } blgp = none : vector<2xf32>, vector<2xf32>, vector<4xf32> + amdgpu.mfma 16x16x8 %arg14 * %arg14 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, reducePrecision } blgp = none : vector<2xf32>, vector<2xf32>, vector<4xf32> // CHECK: rocdl.mfma.f32.32x32x4.xf32{{.*}}: (vector<2xf32>, vector<2xf32>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg14 * %arg14 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32, reducePrecision } blgp = none : vector<2xf32>, vector<2xf32>, vector<16xf32> + amdgpu.mfma 32x32x4 %arg14 * %arg14 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, reducePrecision } blgp = none : vector<2xf32>, vector<2xf32>, vector<16xf32> // CHECK: %[[BITCAST_8xi8_i64_1:.+]] = llvm.bitcast {{.*}} : vector<8xi8> to i64 // CHECK: rocdl.mfma.f32.16x16x32.bf8.bf8 %[[BITCAST_8xi8_i64_1]], %[[BITCAST_8xi8_i64_1]], {{.*}}: (i64, i64, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg15 * %arg15 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E5M2FNUZ>, vector<4xf32> + amdgpu.mfma 16x16x32 %arg15 * %arg15 + %arg3 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E5M2FNUZ>, vector<4xf32> // CHECK: %[[BITCAST_8xi8_i64_2:.+]] = llvm.bitcast {{.*}} : vector<8xi8> to i64 // CHECK: rocdl.mfma.f32.16x16x32.bf8.fp8 %[[BITCAST_8xi8_i64_1]], %[[BITCAST_8xi8_i64_2]], {{.*}}: (i64, i64, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg15 * %arg16 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E4M3FNUZ>, vector<4xf32> + amdgpu.mfma 16x16x32 %arg15 * %arg16 + %arg3 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E4M3FNUZ>, vector<4xf32> // CHECK: rocdl.mfma.f32.16x16x32.fp8.bf8{{.*}}: (i64, i64, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg16 * %arg15 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E4M3FNUZ>, vector<8xf8E5M2FNUZ>, vector<4xf32> + amdgpu.mfma 16x16x32 %arg16 * %arg15 + %arg3 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf8E4M3FNUZ>, vector<8xf8E5M2FNUZ>, vector<4xf32> // CHECK: rocdl.mfma.f32.16x16x32.fp8.fp8{{.*}}: (i64, i64, vector<4xf32>, i32, i32, i32) -> vector<4xf32> - amdgpu.mfma %arg16 * %arg16 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E4M3FNUZ>, vector<8xf8E4M3FNUZ>, vector<4xf32> + amdgpu.mfma 16x16x32 %arg16 * %arg16 + %arg3 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf8E4M3FNUZ>, vector<8xf8E4M3FNUZ>, vector<4xf32> // CHECK: rocdl.mfma.f32.32x32x16.bf8.bf8{{.*}}: (i64, i64, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg15 * %arg15 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E5M2FNUZ>, vector<16xf32> + amdgpu.mfma 32x32x16 %arg15 * %arg15 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E5M2FNUZ>, vector<16xf32> // CHECK: rocdl.mfma.f32.32x32x16.bf8.fp8{{.*}}: (i64, i64, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg15 * %arg16 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E4M3FNUZ>, vector<16xf32> + amdgpu.mfma 32x32x16 %arg15 * %arg16 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E4M3FNUZ>, vector<16xf32> // CHECK: rocdl.mfma.f32.32x32x16.fp8.bf8{{.*}}: (i64, i64, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg16 * %arg15 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E4M3FNUZ>, vector<8xf8E5M2FNUZ>, vector<16xf32> + amdgpu.mfma 32x32x16 %arg16 * %arg15 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf8E4M3FNUZ>, vector<8xf8E5M2FNUZ>, vector<16xf32> // CHECK: rocdl.mfma.f32.32x32x16.fp8.fp8{{.*}}: (i64, i64, vector<16xf32>, i32, i32, i32) -> vector<16xf32> - amdgpu.mfma %arg16 * %arg16 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E4M3FNUZ>, vector<8xf8E4M3FNUZ>, vector<16xf32> + amdgpu.mfma 32x32x16 %arg16 * %arg16 + %arg2 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<8xf8E4M3FNUZ>, vector<8xf8E4M3FNUZ>, vector<16xf32> func.return } diff --git a/mlir/test/Dialect/AMDGPU/canonicalize.mlir b/mlir/test/Dialect/AMDGPU/canonicalize.mlir index 52d3275..fee0c00 100644 --- a/mlir/test/Dialect/AMDGPU/canonicalize.mlir +++ b/mlir/test/Dialect/AMDGPU/canonicalize.mlir @@ -165,10 +165,10 @@ func.func @fold_gather_to_lds_of_cast_dest(%global: memref<128x72xf32, 1>, %lds: // CHECK-LABEL: func @scaled_mfma // CHECK: %[[SCALE_1:.*]] = vector.extract_strided_slice %0 {offsets = [0], sizes = [4], strides = [1]} : vector<16xf8E8M0FNU> to vector<4xf8E8M0FNU> // CHECK: %[[SCALE_2:.*]] = vector.extract_strided_slice %2 {offsets = [4], sizes = [4], strides = [1]} : vector<16xf8E8M0FNU> to vector<4xf8E8M0FNU> -// CHECK: amdgpu.scaled_mfma(%[[SCALE_1]][3] * %{{.*}}) * (%[[SCALE_2]][2] * %{{.*}}) {{.*}} +// CHECK: amdgpu.scaled_mfma 16x16x128 (%[[SCALE_1]][3] * %{{.*}}) * (%[[SCALE_2]][2] * %{{.*}}) {{.*}} // CHECK: %[[SCALE_3:.*]] = vector.extract_strided_slice %5 {offsets = [8], sizes = [4], strides = [1]} : vector<16xf8E8M0FNU> to vector<4xf8E8M0FNU> // CHECK: %[[SCALE_4:.*]] = vector.extract_strided_slice %7 {offsets = [12], sizes = [4], strides = [1]} : vector<16xf8E8M0FNU> to vector<4xf8E8M0FNU> -// CHECK: amdgpu.scaled_mfma(%[[SCALE_3]][1] * %{{.*}}) * (%[[SCALE_4]][0] * %{{.*}}) {{.*}} +// CHECK: amdgpu.scaled_mfma 16x16x128 (%[[SCALE_3]][1] * %{{.*}}) * (%[[SCALE_4]][0] * %{{.*}}) {{.*}} func.func @scaled_mfma(%opA: vector<32xf4E2M1FN>, %opB: vector<32xf4E2M1FN>, %scalesA: vector<2x1x8x1xf8E8M0FNU>, %scalesB: vector<2x1x8x1xf8E8M0FNU>) -> (vector<4xf32>, vector<4xf32>) { %cst_0 = arith.constant dense<0.000000e+00> : vector<4xf32> %cst_1 = arith.constant dense<5.877470e-39> : vector<4xf8E8M0FNU> @@ -176,12 +176,12 @@ func.func @scaled_mfma(%opA: vector<32xf4E2M1FN>, %opB: vector<32xf4E2M1FN>, %sc %sA = vector.insert %scaleA, %cst_1 [0] : f8E8M0FNU into vector<4xf8E8M0FNU> %scaleB = vector.extract %scalesB[0, 0, 6, 0] : f8E8M0FNU from vector<2x1x8x1xf8E8M0FNU> %sB = vector.insert %scaleB, %cst_1 [0] : f8E8M0FNU into vector<4xf8E8M0FNU> - %res_0 = amdgpu.scaled_mfma(%sA[0] * %opA) * (%sB[0] * %opB) + %cst_0 {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> + %res_0 = amdgpu.scaled_mfma 16x16x128 (%sA[0] * %opA) * (%sB[0] * %opB) + %cst_0 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> %scaleC = vector.extract %scalesA[1, 0, 1, 0] : f8E8M0FNU from vector<2x1x8x1xf8E8M0FNU> %sC = vector.insert %scaleC, %cst_1 [0] : f8E8M0FNU into vector<4xf8E8M0FNU> %scaleD = vector.extract %scalesB[1, 0, 4, 0] : f8E8M0FNU from vector<2x1x8x1xf8E8M0FNU> %sD = vector.insert %scaleD, %cst_1 [0] : f8E8M0FNU into vector<4xf8E8M0FNU> - %res_1 = amdgpu.scaled_mfma(%sC[0] * %opA) * (%sD[0] * %opB) + %cst_0 {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> + %res_1 = amdgpu.scaled_mfma 16x16x128 (%sC[0] * %opA) * (%sD[0] * %opB) + %cst_0 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> return %res_0, %res_1 : vector<4xf32>, vector<4xf32> } @@ -192,7 +192,7 @@ func.func @scaled_mfma(%opA: vector<32xf4E2M1FN>, %opB: vector<32xf4E2M1FN>, %sc // CHECK: vector.insert {{.*}} : f8E8M0FNU into vector<4xf8E8M0FNU> // CHECK: vector.extract {{.*}} : f8E8M0FNU from vector<2xf8E8M0FNU> // CHECK: vector.insert {{.*}} : f8E8M0FNU into vector<4xf8E8M0FNU> -// CHECK: amdgpu.scaled_mfma({{.*}}[0] * {{.*}}) * ({{.*}}[0] * {{.*}} +// CHECK: amdgpu.scaled_mfma 16x16x128 ({{.*}}[0] * {{.*}}) * ({{.*}}[0] * {{.*}} func.func @scaled_mfma_less_than_4(%opA: vector<32xf4E2M1FN>, %opB: vector<32xf4E2M1FN>, %scalesA: vector<2xf8E8M0FNU>, %scalesB: vector<2xf8E8M0FNU>) -> vector<4xf32> { %cst_0 = arith.constant dense<0.000000e+00> : vector<4xf32> %cst_1 = arith.constant dense<5.877470e-39> : vector<4xf8E8M0FNU> @@ -200,17 +200,17 @@ func.func @scaled_mfma_less_than_4(%opA: vector<32xf4E2M1FN>, %opB: vector<32xf4 %sA = vector.insert %scaleA, %cst_1 [0] : f8E8M0FNU into vector<4xf8E8M0FNU> %scaleB = vector.extract %scalesB[1] : f8E8M0FNU from vector<2xf8E8M0FNU> %sB = vector.insert %scaleB, %cst_1 [0] : f8E8M0FNU into vector<4xf8E8M0FNU> - %res_0 = amdgpu.scaled_mfma(%sA[0] * %opA) * (%sB[0] * %opB) + %cst_0 {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> + %res_0 = amdgpu.scaled_mfma 16x16x128 (%sA[0] * %opA) * (%sB[0] * %opB) + %cst_0 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> return %res_0 : vector<4xf32> } // ----- // CHECK-LABEL: func @scaled_mfma_ugly_shapes -// CHECK: amdgpu.scaled_mfma(%{{.*}}[0] * %{{.*}}) * (%{{.*}}[3] * %arg1) + %cst {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> -// CHECK: amdgpu.scaled_mfma(%{{.*}}[1] * %{{.*}}) * (%{{.*}}[3] * %arg1) + %cst {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> -// CHECK: amdgpu.scaled_mfma(%{{.*}}[2] * %{{.*}}) * (%{{.*}}[2] * %arg1) + %cst {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> -// CHECK: amdgpu.scaled_mfma(%{{.*}}[3] * %{{.*}}) * (%{{.*}}[1] * %arg1) + %cst {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> +// CHECK: amdgpu.scaled_mfma 16x16x128 (%{{.*}}[0] * %{{.*}}) * (%{{.*}}[3] * %arg1) + %cst : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> +// CHECK: amdgpu.scaled_mfma 16x16x128 (%{{.*}}[1] * %{{.*}}) * (%{{.*}}[3] * %arg1) + %cst : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> +// CHECK: amdgpu.scaled_mfma 16x16x128 (%{{.*}}[2] * %{{.*}}) * (%{{.*}}[2] * %arg1) + %cst : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> +// CHECK: amdgpu.scaled_mfma 16x16x128 (%{{.*}}[3] * %{{.*}}) * (%{{.*}}[1] * %arg1) + %cst : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> func.func @scaled_mfma_ugly_shapes(%opA: vector<32xf4E2M1FN>, %opB: vector<32xf4E2M1FN>, %scalesA: vector<5x5xf8E8M0FNU>, %scalesB: vector<7x23xf8E8M0FNU>) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) { %cst_0 = arith.constant dense<0.000000e+00> : vector<4xf32> %cst_1 = arith.constant dense<5.877470e-39> : vector<4xf8E8M0FNU> @@ -237,10 +237,10 @@ func.func @scaled_mfma_ugly_shapes(%opA: vector<32xf4E2M1FN>, %opB: vector<32xf4 %sB_6_21 = vector.insert %scaleB_6_21, %cst_1 [0] : f8E8M0FNU into vector<4xf8E8M0FNU> %sB_6_20 = vector.insert %scaleB_6_20, %cst_1 [0] : f8E8M0FNU into vector<4xf8E8M0FNU> %sB_6_19 = vector.insert %scaleB_6_19, %cst_1 [0] : f8E8M0FNU into vector<4xf8E8M0FNU> - - %res_4 = amdgpu.scaled_mfma(%sA_0_4[0] * %opA) * (%sB_6_22[0] * %opB) + %cst_0 {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> - %res_5 = amdgpu.scaled_mfma(%sA_0_5[0] * %opA) * (%sB_6_21[0] * %opB) + %cst_0 {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> - %res_6 = amdgpu.scaled_mfma(%sA_0_6[0] * %opA) * (%sB_6_20[0] * %opB) + %cst_0 {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> - %res_7 = amdgpu.scaled_mfma(%sA_0_7[0] * %opA) * (%sB_6_19[0] * %opB) + %cst_0 {k = 128 : i32, m = 16 : i32, n = 16 : i32} : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> + + %res_4 = amdgpu.scaled_mfma 16x16x128 (%sA_0_4[0] * %opA) * (%sB_6_22[0] * %opB) + %cst_0 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> + %res_5 = amdgpu.scaled_mfma 16x16x128 (%sA_0_5[0] * %opA) * (%sB_6_21[0] * %opB) + %cst_0 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> + %res_6 = amdgpu.scaled_mfma 16x16x128 (%sA_0_6[0] * %opA) * (%sB_6_20[0] * %opB) + %cst_0 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> + %res_7 = amdgpu.scaled_mfma 16x16x128 (%sA_0_7[0] * %opA) * (%sB_6_19[0] * %opB) + %cst_0 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf32> return %res_4, %res_5, %res_6, %res_7 : vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32> } diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir index 6a2518a..5784764 100644 --- a/mlir/test/Dialect/AMDGPU/invalid.mlir +++ b/mlir/test/Dialect/AMDGPU/invalid.mlir @@ -19,9 +19,7 @@ func.func @mixing_packed_stoch_round_types(%arg0: f32, %arg1: i32, %arg2: vector func.func @bad_source_types(%a: vector<2xf32>, %b: vector<4xf16>, %c: vector<32xf32>) -> vector<32xf32> { // expected-error@+1 {{'amdgpu.mfma' op expected both non-small-float source operand types to match exactly}} - %d = amdgpu.mfma %a * %b + %c { - m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, - abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<2xf32>, vector<4xf16>, vector<32xf32> + %d = amdgpu.mfma 32x32x1 %a * %b + %c { blocks = 2 : i32, abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<2xf32>, vector<4xf16>, vector<32xf32> func.return %d : vector<32xf32> } @@ -30,9 +28,7 @@ func.func @bad_source_types(%a: vector<2xf32>, %b: vector<4xf16>, func.func @bad_source_types_f8(%a: vector<8xf8E5M2FNUZ>, %b: vector<8xi8>, %c: vector<32xf32>) -> vector<32xf32> { // expected-error@+1 {{'amdgpu.mfma' op expected both source operands to have small-float elements if one does}} - %d = amdgpu.mfma %a * %b + %c { - m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, - abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<8xf8E5M2FNUZ>, vector<8xi8>, vector<32xf32> + %d = amdgpu.mfma 32x32x1 %a * %b + %c { blocks = 2 : i32, abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<8xf8E5M2FNUZ>, vector<8xi8>, vector<32xf32> func.return %d : vector<32xf32> } @@ -41,9 +37,7 @@ func.func @bad_source_types_f8(%a: vector<8xf8E5M2FNUZ>, %b: vector<8xi8>, func.func @bad_source_arguments(%a: vector<2xf32>, %b: vector<2xf32>, %c: vector<32xf32>) -> vector<32xf32> { // expected-error@+1 {{'amdgpu.mfma' op expected 1 source values for this operation but got 2}} - %d = amdgpu.mfma %a * %b + %c { - m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, - abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<2xf32>, vector<2xf32>, vector<32xf32> + %d = amdgpu.mfma 32x32x1 %a * %b + %c { blocks = 2 : i32, abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<2xf32>, vector<2xf32>, vector<32xf32> func.return %d : vector<32xf32> } @@ -52,9 +46,7 @@ func.func @bad_source_arguments(%a: vector<2xf32>, %b: vector<2xf32>, func.func @bad_source_arguments_i8(%a: vector<8xi8>, %b: vector<8xi8>, %c: vector<4xi32>) -> vector<4xi32> { // expected-error@+1 {{'amdgpu.mfma' op expected 4 source values for this operation but got 8}} - %d = amdgpu.mfma %a * %b + %c { - m = 32 : i32, n = 32 : i32, k = 4 : i32, blocks = 2 : i32, - abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<8xi8>, vector<8xi8>, vector<4xi32> + %d = amdgpu.mfma 32x32x4 %a * %b + %c { blocks = 2 : i32, abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<8xi8>, vector<8xi8>, vector<4xi32> func.return %d : vector<4xi32> } @@ -62,9 +54,7 @@ func.func @bad_source_arguments_i8(%a: vector<8xi8>, %b: vector<8xi8>, func.func @bad_dest_type(%a: f32, %b: f32, %c: vector<16xf32>) -> vector<16xf32> { // expected-error@+1 {{'amdgpu.mfma' op expected 32 result values for this operation but got 16}} - %d = amdgpu.mfma %a * %b + %c { - m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, - abid = 0 : i32, cbsz = 0 : i32} blgp = none : f32, f32, vector<16xf32> + %d = amdgpu.mfma 32x32x1 %a * %b + %c { blocks = 2 : i32, abid = 0 : i32, cbsz = 0 : i32} blgp = none : f32, f32, vector<16xf32> return %d : vector<16xf32> } @@ -72,9 +62,7 @@ func.func @bad_dest_type(%a: f32, %b: f32, %c: vector<16xf32>) -> vector<16xf32> func.func @f64_permuting_b(%a: f64, %b: f64, %c: vector<4xf64>) -> vector<4xf64> { // expected-error@+1 {{'amdgpu.mfma' op double-precision ops do not support permuting lanes of B}} - %d = amdgpu.mfma %a * %b + %c { - m = 16 : i32, n = 16 : i32, k = 4 : i32, blocks = 1 : i32, - abid = 0 : i32, cbsz = 0 : i32} blgp = bcast_first_32 : f64, f64, vector<4xf64> + %d = amdgpu.mfma 16x16x4 %a * %b + %c { abid = 0 : i32, cbsz = 0 : i32} blgp = bcast_first_32 : f64, f64, vector<4xf64> return %d : vector<4xf64> } @@ -82,9 +70,7 @@ func.func @f64_permuting_b(%a: f64, %b: f64, %c: vector<4xf64>) -> vector<4xf64> func.func @f64_permuting_a(%a: f64, %b: f64, %c: vector<4xf64>) -> vector<4xf64> { // expected-error@+1 {{'amdgpu.mfma' op double-precision ops do not support permuting lanes of A}} - %d = amdgpu.mfma %a * %b + %c { - m = 16 : i32, n = 16 : i32, k = 4 : i32, blocks = 1 : i32, - abid = 0 : i32, cbsz = 1 : i32} blgp = none : f64, f64, vector<4xf64> + %d = amdgpu.mfma 16x16x4 %a * %b + %c { abid = 0 : i32, cbsz = 1 : i32} blgp = none : f64, f64, vector<4xf64> return %d : vector<4xf64> } @@ -92,9 +78,7 @@ func.func @f64_permuting_a(%a: f64, %b: f64, %c: vector<4xf64>) -> vector<4xf64> func.func @abid_without_bradcast(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { // expected-error@+1 {{'amdgpu.mfma' op block ID for permuting A (abid) must be below 2 ** cbsz}} - %d = amdgpu.mfma %a * %b + %c { - m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, - abid = 1 : i32, cbsz = 0 : i32} blgp = none : f32, f32, vector<32xf32> + %d = amdgpu.mfma 32x32x1 %a * %b + %c { blocks = 2 : i32, abid = 1 : i32, cbsz = 0 : i32} blgp = none : f32, f32, vector<32xf32> func.return %d : vector<32xf32> } @@ -102,9 +86,7 @@ func.func @abid_without_bradcast(%a: f32, %b: f32, %c: vector<32xf32>) -> vector func.func @abid_too_large(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { // expected-error@+1 {{'amdgpu.mfma' op block ID for permuting A (abid) must be below 2 ** cbsz}} - %d = amdgpu.mfma %a * %b + %c { - m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, - abid = 2 : i32, cbsz = 1 : i32} blgp = none : f32, f32, vector<32xf32> + %d = amdgpu.mfma 32x32x1 %a * %b + %c { blocks = 2 : i32, abid = 2 : i32, cbsz = 1 : i32} blgp = none : f32, f32, vector<32xf32> func.return %d : vector<32xf32> } @@ -112,9 +94,39 @@ func.func @abid_too_large(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32 func.func @no_negation(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { // expected-error@+1 {{'amdgpu.mfma' op negation flags only available for double-precision operations}} - %d = amdgpu.mfma %a * %b + %c { - m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, - abid = 0 : i32, cbsz = 0 : i32, negateA} blgp = none : f32, f32, vector<32xf32> + %d = amdgpu.mfma 32x32x1 %a * %b + %c { blocks = 2 : i32, abid = 0 : i32, cbsz = 0 : i32, negateA} blgp = none : f32, f32, vector<32xf32> + func.return %d : vector<32xf32> +} + +// ----- + +func.func @mfma_invalid_m(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { + // expected-error@+1 {{'amdgpu.mfma' op attribute 'm' failed to satisfy constraint: 32-bit signless integer attribute whose value is one of {4, 16, 32}}} + %d = amdgpu.mfma 7x32x1 %a * %b + %c { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : f32, f32, vector<32xf32> + func.return %d : vector<32xf32> +} + +// ----- + +func.func @mfma_invalid_n(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { + // expected-error@+1 {{'amdgpu.mfma' op attribute 'n' failed to satisfy constraint: 32-bit signless integer attribute whose value is one of {4, 16, 32}}} + %d = amdgpu.mfma 32x7x1 %a * %b + %c { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : f32, f32, vector<32xf32> + func.return %d : vector<32xf32> +} + +// ----- + +func.func @mfma_invalid_k(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { + // expected-error@+1 {{'amdgpu.mfma' op attribute 'k' failed to satisfy constraint: 32-bit signless integer attribute whose value is one of {1, 2, 4, 8, 16, 32, 64, 128}}} + %d = amdgpu.mfma 32x32x3 %a * %b + %c { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : f32, f32, vector<32xf32> + func.return %d : vector<32xf32> +} + +// ----- + +func.func @mfma_invalid_blocks(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { + // expected-error@+1 {{'amdgpu.mfma' op attribute 'blocks' failed to satisfy constraint: 32-bit signless integer attribute whose value is one of {1, 2, 4, 16}}} + %d = amdgpu.mfma 32x32x1 %a * %b + %c { blocks = 7 : i32, abid = 0 : i32, cbsz = 0 : i32 } blgp = none : f32, f32, vector<32xf32> func.return %d : vector<32xf32> } @@ -302,3 +314,27 @@ func.func @amdgpu.scaled_ext_packed816_invalid_input_output_sizes(%v: vector<8xf %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<16xf16> func.return } + +// ----- + +func.func @scaled_mfma_invalid_m(%arg0 : vector<4xf8E8M0FNU>, %arg1 : vector<32xf4E2M1FN>, %arg2 : vector<16xf32>) -> vector<16xf32> { + // expected-error@+1 {{'amdgpu.scaled_mfma' op attribute 'm' failed to satisfy constraint: 32-bit signless integer attribute whose value is one of {16, 32}}} + %0 = amdgpu.scaled_mfma 8x32x64 (%arg0[0] * %arg1) * (%arg0[1] * %arg1) + %arg2 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<16xf32> + func.return %0 : vector<16xf32> +} + +// ----- + +func.func @scaled_mfma_invalid_n(%arg0 : vector<4xf8E8M0FNU>, %arg1 : vector<32xf4E2M1FN>, %arg2 : vector<16xf32>) -> vector<16xf32> { + // expected-error@+1 {{'amdgpu.scaled_mfma' op attribute 'n' failed to satisfy constraint: 32-bit signless integer attribute whose value is one of {16, 32}}} + %0 = amdgpu.scaled_mfma 32x8x64 (%arg0[0] * %arg1) * (%arg0[1] * %arg1) + %arg2 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<16xf32> + func.return %0 : vector<16xf32> +} + +// ----- + +func.func @scaled_mfma_invalid_k(%arg0 : vector<4xf8E8M0FNU>, %arg1 : vector<32xf4E2M1FN>, %arg2 : vector<16xf32>) -> vector<16xf32> { + // expected-error@+1 {{'amdgpu.scaled_mfma' op attribute 'k' failed to satisfy constraint: 32-bit signless integer attribute whose value is one of {64, 128}}} + %0 = amdgpu.scaled_mfma 32x32x32 (%arg0[0] * %arg1) * (%arg0[1] * %arg1) + %arg2 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<16xf32> + func.return %0 : vector<16xf32> +} diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir index a185eb6..a330967 100644 --- a/mlir/test/Dialect/AMDGPU/ops.mlir +++ b/mlir/test/Dialect/AMDGPU/ops.mlir @@ -559,9 +559,16 @@ func.func @sched_barrier() { } // CHECK-LABEL: func @mfma -func.func @mfma(%arg0 : f32, %arg1 : vector<32xf32>) -> vector<32xf32> { - // CHECK: amdgpu.mfma - %0 = amdgpu.mfma %arg0 * %arg0 + %arg1 { abid = 1 : i32, cbsz = 1 : i32, k = 1 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = bcast_second_32 : f32, f32, vector<32xf32> +func.func @mfma(%arg0 : vector<4xf16>, %arg1 : vector<4xf32>) -> vector<4xf32> { + // CHECK: amdgpu.mfma 16x16x16 + %0 = amdgpu.mfma 16x16x16 %arg0 * %arg0 + %arg1 { abid = 0 : i32, cbsz = 0 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> + func.return %0 : vector<4xf32> +} + +// CHECK-LABEL: func @mfma_with_blocks +func.func @mfma_with_blocks(%arg0 : f32, %arg1 : vector<32xf32>) -> vector<32xf32> { + // CHECK: amdgpu.mfma 32x32x1 + %0 = amdgpu.mfma 32x32x1 %arg0 * %arg0 + %arg1 { abid = 1 : i32, cbsz = 1 : i32, blocks = 2 : i32 } blgp = bcast_second_32 : f32, f32, vector<32xf32> func.return %0 : vector<32xf32> } @@ -602,8 +609,8 @@ func.func @permlane32_swap(%arg0 : f32) -> f32 { // CHECK-LABEL: func @scaled_mfma func.func @scaled_mfma(%arg0 : f8E8M0FNU, %arg1 : vector<32xf6E2M3FN>, %arg2 : vector<16xf32>) -> vector<16xf32> { - // CHECK: amdgpu.scaled_mfma - %0 = amdgpu.scaled_mfma(%arg0[0] * %arg1) * (%arg0[1] * %arg1) + %arg2 { k = 64 : i32, m = 32 : i32, n = 32 : i32 } : f8E8M0FNU, vector<32xf6E2M3FN>, f8E8M0FNU, vector<32xf6E2M3FN>, vector<16xf32> + // CHECK: amdgpu.scaled_mfma 32x32x64 + %0 = amdgpu.scaled_mfma 32x32x64 (%arg0[0] * %arg1) * (%arg0[1] * %arg1) + %arg2 : f8E8M0FNU, vector<32xf6E2M3FN>, f8E8M0FNU, vector<32xf6E2M3FN>, vector<16xf32> func.return %0 : vector<16xf32> } |
