diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/CodeGen/InlineSpiller.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Frontend/HLSL/CBuffer.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Support/GlobPattern.cpp | 67 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 13 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td | 34 | ||||
-rw-r--r-- | llvm/lib/TargetParser/TargetParser.cpp | 2 |
13 files changed, 172 insertions, 34 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 1fe38d6..b49040b 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1862,15 +1862,19 @@ bool IRTranslator::translateVectorDeinterleave2Intrinsic( void IRTranslator::getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder) { + Value *Global = TLI->getSDagStackGuard(*MF->getFunction().getParent()); + if (!Global) { + LLVMContext &Ctx = MIRBuilder.getContext(); + Ctx.diagnose(DiagnosticInfoGeneric("unable to lower stackguard")); + MIRBuilder.buildUndef(DstReg); + return; + } + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); MRI->setRegClass(DstReg, TRI->getPointerRegClass()); auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {}); - Value *Global = TLI->getSDagStackGuard(*MF->getFunction().getParent()); - if (!Global) - return; - unsigned AddrSpace = Global->getType()->getPointerAddressSpace(); LLT PtrTy = LLT::pointer(AddrSpace, DL->getPointerSizeInBits(AddrSpace)); diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index d6e8505..c3e0964 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -721,6 +721,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Allocate a new register for the remat. Register NewVReg = Edit->createFrom(Original); + // Constrain it to the register class of MI. + MRI.constrainRegClass(NewVReg, MRI.getRegClass(VirtReg.reg())); + // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index dcf2df3..d57c5fb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3131,12 +3131,16 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, if (TLI.useLoadStackGuardNode(M)) { Guard = getLoadStackGuard(DAG, dl, Chain); } else { - const Value *IRGuard = TLI.getSDagStackGuard(M); - SDValue GuardPtr = getValue(IRGuard); - - Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr, - MachinePointerInfo(IRGuard, 0), Align, - MachineMemOperand::MOVolatile); + if (const Value *IRGuard = TLI.getSDagStackGuard(M)) { + SDValue GuardPtr = getValue(IRGuard); + Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr, + MachinePointerInfo(IRGuard, 0), Align, + MachineMemOperand::MOVolatile); + } else { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.diagnose(DiagnosticInfoGeneric("unable to lower stackguard")); + Guard = DAG.getPOISON(PtrMemTy); + } } // Perform the comparison via a getsetcc. @@ -7324,6 +7328,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Res = DAG.getPtrExtOrTrunc(Res, sdl, PtrTy); } else { const Value *Global = TLI.getSDagStackGuard(M); + if (!Global) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.diagnose(DiagnosticInfoGeneric("unable to lower stackguard")); + setValue(&I, DAG.getPOISON(PtrTy)); + return; + } + Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), MachinePointerInfo(Global, 0), Align, diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp index 407b6ad..1f53c87 100644 --- a/llvm/lib/Frontend/HLSL/CBuffer.cpp +++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp @@ -43,8 +43,13 @@ std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) { for (const MDNode *MD : CBufMD->operands()) { assert(MD->getNumOperands() && "Invalid cbuffer metadata"); - auto *Handle = cast<GlobalVariable>( - cast<ValueAsMetadata>(MD->getOperand(0))->getValue()); + // For an unused cbuffer, the handle may have been optimized out + Metadata *OpMD = MD->getOperand(0); + if (!OpMD) + continue; + + auto *Handle = + cast<GlobalVariable>(cast<ValueAsMetadata>(OpMD)->getValue()); CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle); for (int I = 1, E = MD->getNumOperands(); I < E; ++I) { diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 0ecf47d..2715229 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -132,24 +132,70 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) { return std::move(SubPatterns); } +static StringRef maxPlainSubstring(StringRef S) { + StringRef Best; + while (!S.empty()) { + size_t PrefixSize = S.find_first_of("?*[{\\"); + if (PrefixSize == std::string::npos) + PrefixSize = S.size(); + + if (Best.size() < PrefixSize) + Best = S.take_front(PrefixSize); + + S = S.drop_front(PrefixSize); + + // It's impossible, as the first and last characters of the input string + // must be Glob special characters, otherwise they would be parts of + // the prefix or the suffix. + assert(!S.empty()); + + switch (S.front()) { + case '\\': + S = S.drop_front(2); + break; + case '[': { + // Drop '[' and the first character which can be ']'. + S = S.drop_front(2); + size_t EndBracket = S.find_first_of("]"); + // Should not be possible, SubGlobPattern::create should fail on invalid + // pattern before we get here. + assert(EndBracket != std::string::npos); + S = S.drop_front(EndBracket + 1); + break; + } + case '{': + // TODO: implement. + // Fallback to whatever is best for now. + return Best; + default: + S = S.drop_front(1); + } + } + + return Best; +} + Expected<GlobPattern> GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) { GlobPattern Pat; + Pat.Pattern = S; // Store the prefix that does not contain any metacharacter. - size_t PrefixSize = S.find_first_of("?*[{\\"); - Pat.Prefix = S.substr(0, PrefixSize); - if (PrefixSize == std::string::npos) + Pat.PrefixSize = S.find_first_of("?*[{\\"); + if (Pat.PrefixSize == std::string::npos) { + Pat.PrefixSize = S.size(); return Pat; - S = S.substr(PrefixSize); + } + S = S.substr(Pat.PrefixSize); // Just in case we stop on unmatched opening brackets. size_t SuffixStart = S.find_last_of("?*[]{}\\"); assert(SuffixStart != std::string::npos); if (S[SuffixStart] == '\\') ++SuffixStart; - ++SuffixStart; - Pat.Suffix = S.substr(SuffixStart); + if (SuffixStart < S.size()) + ++SuffixStart; + Pat.SuffixSize = S.size() - SuffixStart; S = S.substr(0, SuffixStart); SmallVector<std::string, 1> SubPats; @@ -199,10 +245,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) { return Pat; } +StringRef GlobPattern::longest_substr() const { + return maxPlainSubstring( + Pattern.drop_front(PrefixSize).drop_back(SuffixSize)); +} + bool GlobPattern::match(StringRef S) const { - if (!S.consume_front(Prefix)) + if (!S.consume_front(prefix())) return false; - if (!S.consume_back(Suffix)) + if (!S.consume_back(suffix())) return false; if (SubGlobs.empty() && S.empty()) return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index ea32748..1c8383c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1430,6 +1430,18 @@ def FeatureAddSubU64Insts def FeatureMadU32Inst : SubtargetFeature<"mad-u32-inst", "HasMadU32Inst", "true", "Has v_mad_u32 instruction">; +def FeatureAddMinMaxInsts : SubtargetFeature<"add-min-max-insts", + "HasAddMinMaxInsts", + "true", + "Has v_add_{min|max}_{i|u}32 instructions" +>; + +def FeaturePkAddMinMaxInsts : SubtargetFeature<"pk-add-min-max-insts", + "HasPkAddMinMaxInsts", + "true", + "Has v_pk_add_{min|max}_{i|u}16 instructions" +>; + def FeatureMemToLDSLoad : SubtargetFeature<"vmem-to-lds-load-insts", "HasVMemToLDSLoad", "true", @@ -2115,6 +2127,8 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureLshlAddU64Inst, FeatureAddSubU64Insts, FeatureMadU32Inst, + FeatureAddMinMaxInsts, + FeaturePkAddMinMaxInsts, FeatureLdsBarrierArriveAtomic, FeatureSetPrioIncWgInst, Feature45BitNumRecordsBufferResource, @@ -2658,11 +2672,11 @@ def HasFmaakFmamkF64Insts : def HasAddMinMaxInsts : Predicate<"Subtarget->hasAddMinMaxInsts()">, - AssemblerPredicate<(any_of FeatureGFX1250Insts)>; + AssemblerPredicate<(any_of FeatureAddMinMaxInsts)>; def HasPkAddMinMaxInsts : Predicate<"Subtarget->hasPkAddMinMaxInsts()">, - AssemblerPredicate<(any_of FeatureGFX1250Insts)>; + AssemblerPredicate<(any_of FeaturePkAddMinMaxInsts)>; def HasPkMinMax3Insts : Predicate<"Subtarget->hasPkMinMax3Insts()">, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 56807a4..54ba2f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4835,6 +4835,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_perm_pk16_b4_u4: case Intrinsic::amdgcn_perm_pk16_b6_u4: case Intrinsic::amdgcn_perm_pk16_b8_u4: + case Intrinsic::amdgcn_add_max_i32: + case Intrinsic::amdgcn_add_max_u32: + case Intrinsic::amdgcn_add_min_i32: + case Intrinsic::amdgcn_add_min_u32: + case Intrinsic::amdgcn_pk_add_max_i16: + case Intrinsic::amdgcn_pk_add_max_u16: + case Intrinsic::amdgcn_pk_add_min_i16: + case Intrinsic::amdgcn_pk_add_min_u16: return getDefaultMappingVOP(MI); case Intrinsic::amdgcn_log: case Intrinsic::amdgcn_exp2: diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index a466780..ac660d5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -277,6 +277,8 @@ protected: bool HasLshlAddU64Inst = false; bool HasAddSubU64Insts = false; bool HasMadU32Inst = false; + bool HasAddMinMaxInsts = false; + bool HasPkAddMinMaxInsts = false; bool HasPointSampleAccel = false; bool HasLdsBarrierArriveAtomic = false; bool HasSetPrioIncWgInst = false; @@ -1567,10 +1569,10 @@ public: bool hasIntMinMax64() const { return GFX1250Insts; } // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions. - bool hasAddMinMaxInsts() const { return GFX1250Insts; } + bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; } // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions. - bool hasPkAddMinMaxInsts() const { return GFX1250Insts; } + bool hasPkAddMinMaxInsts() const { return HasPkAddMinMaxInsts; } // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions. bool hasPkMinMax3Insts() const { return GFX1250Insts; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 7cce033..ee10190 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in { } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in { - defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; - defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; - defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; - defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; + defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_max_i32>; + defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_max_u32>; + defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_min_i32>; + defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_min_u32>; } defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 6500fce..c4692b7 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -75,7 +75,7 @@ multiclass VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit IsDOT = 0> { def NAME : VOP3P_Pseudo<OpName, P, !if (P.HasModifiers, - getVOP3PModPat<P, node, IsDOT, IsDOT>.ret, + getVOP3PModPat<P, node, !or(P.EnableClamp, IsDOT), IsDOT>.ret, getVOP3Pat<P, node>.ret)>; let SubtargetPredicate = isGFX11Plus in { if P.HasExtVOP3DPP then @@ -434,15 +434,16 @@ defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>; } // End SubtargetPredicate = HasFmaMixBF16Insts def PK_ADD_MINMAX_Profile : VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16, VOP3_PACKED> { - let HasModifiers = 0; + let HasNeg = 0; + let EnableClamp = 1; } let isCommutable = 1, isReMaterializable = 1 in { let SubtargetPredicate = HasPkAddMinMaxInsts in { -defm V_PK_ADD_MAX_I16 : VOP3PInst<"v_pk_add_max_i16", PK_ADD_MINMAX_Profile>; -defm V_PK_ADD_MAX_U16 : VOP3PInst<"v_pk_add_max_u16", PK_ADD_MINMAX_Profile>; -defm V_PK_ADD_MIN_I16 : VOP3PInst<"v_pk_add_min_i16", PK_ADD_MINMAX_Profile>; -defm V_PK_ADD_MIN_U16 : VOP3PInst<"v_pk_add_min_u16", PK_ADD_MINMAX_Profile>; +defm V_PK_ADD_MAX_I16 : VOP3PInst<"v_pk_add_max_i16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_max_i16>; +defm V_PK_ADD_MAX_U16 : VOP3PInst<"v_pk_add_max_u16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_max_u16>; +defm V_PK_ADD_MIN_I16 : VOP3PInst<"v_pk_add_min_i16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_min_i16>; +defm V_PK_ADD_MIN_U16 : VOP3PInst<"v_pk_add_min_u16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_min_u16>; } let SubtargetPredicate = HasPkMinMax3Insts in { defm V_PK_MAX3_I16 : VOP3PInst<"v_pk_max3_i16", PK_ADD_MINMAX_Profile>; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index f7deeaf..ca4a655 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2614,6 +2614,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; + // Try to widen vectors to gain more optimization opportunities. + if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG)) + return NewShuffle; if ((Result = lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index f7d1a09..b9c5b75 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -668,4 +668,38 @@ foreach vti = NoGroupBF16Vectors in { def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)), (vfmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>; } + +let Predicates = [HasStdExtZvfbfa] in { + foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + def : Pat<(fwti.Vector (any_riscv_fpextend_vl + (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask VMV0:$vm), + VLOpFrag)), + (!cast<Instruction>("PseudoVFWCVT_F_F_ALT_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, + (fvti.Mask VMV0:$vm), + GPR:$vl, fvti.Log2SEW, TA_MA)>; + + def : Pat<(fvti.Vector (any_riscv_fpround_vl + (fwti.Vector fwti.RegClass:$rs1), + (fwti.Mask VMV0:$vm), VLOpFrag)), + (!cast<Instruction>("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, + (fwti.Mask VMV0:$vm), + // Value to indicate no rounding mode change in + // RISCVInsertReadWriteCSR + FRM_DYN, + GPR:$vl, fvti.Log2SEW, TA_MA)>; + def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), + (!cast<Instruction>("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW) + (fvti.Vector (IMPLICIT_DEF)), + fwti.RegClass:$rs1, + // Value to indicate no rounding mode change in + // RISCVInsertReadWriteCSR + FRM_DYN, + fvti.AVL, fvti.Log2SEW, TA_MA)>; + } +} } // Predicates = [HasStdExtZvfbfa] diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 62a3c88..975a271 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -433,6 +433,8 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["fp8e5m3-insts"] = true; Features["permlane16-swap"] = true; Features["ashr-pk-insts"] = true; + Features["add-min-max-insts"] = true; + Features["pk-add-min-max-insts"] = true; Features["atomic-buffer-pk-add-bf16-inst"] = true; Features["vmem-pref-insts"] = true; Features["atomic-fadd-rtn-insts"] = true; |