aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp12
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp23
-rw-r--r--llvm/lib/Frontend/HLSL/CBuffer.cpp9
-rw-r--r--llvm/lib/Support/GlobPattern.cpp67
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h6
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td8
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td13
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td34
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp2
13 files changed, 172 insertions, 34 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 1fe38d6..b49040b 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1862,15 +1862,19 @@ bool IRTranslator::translateVectorDeinterleave2Intrinsic(
void IRTranslator::getStackGuard(Register DstReg,
MachineIRBuilder &MIRBuilder) {
+ Value *Global = TLI->getSDagStackGuard(*MF->getFunction().getParent());
+ if (!Global) {
+ LLVMContext &Ctx = MIRBuilder.getContext();
+ Ctx.diagnose(DiagnosticInfoGeneric("unable to lower stackguard"));
+ MIRBuilder.buildUndef(DstReg);
+ return;
+ }
+
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
MRI->setRegClass(DstReg, TRI->getPointerRegClass());
auto MIB =
MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});
- Value *Global = TLI->getSDagStackGuard(*MF->getFunction().getParent());
- if (!Global)
- return;
-
unsigned AddrSpace = Global->getType()->getPointerAddressSpace();
LLT PtrTy = LLT::pointer(AddrSpace, DL->getPointerSizeInBits(AddrSpace));
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index d6e8505..c3e0964 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -721,6 +721,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Allocate a new register for the remat.
Register NewVReg = Edit->createFrom(Original);
+ // Constrain it to the register class of MI.
+ MRI.constrainRegClass(NewVReg, MRI.getRegClass(VirtReg.reg()));
+
// Finally we can rematerialize OrigMI before MI.
SlotIndex DefIdx =
Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index dcf2df3..d57c5fb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3131,12 +3131,16 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
if (TLI.useLoadStackGuardNode(M)) {
Guard = getLoadStackGuard(DAG, dl, Chain);
} else {
- const Value *IRGuard = TLI.getSDagStackGuard(M);
- SDValue GuardPtr = getValue(IRGuard);
-
- Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
- MachinePointerInfo(IRGuard, 0), Align,
- MachineMemOperand::MOVolatile);
+ if (const Value *IRGuard = TLI.getSDagStackGuard(M)) {
+ SDValue GuardPtr = getValue(IRGuard);
+ Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
+ MachinePointerInfo(IRGuard, 0), Align,
+ MachineMemOperand::MOVolatile);
+ } else {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.diagnose(DiagnosticInfoGeneric("unable to lower stackguard"));
+ Guard = DAG.getPOISON(PtrMemTy);
+ }
}
// Perform the comparison via a getsetcc.
@@ -7324,6 +7328,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Res = DAG.getPtrExtOrTrunc(Res, sdl, PtrTy);
} else {
const Value *Global = TLI.getSDagStackGuard(M);
+ if (!Global) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.diagnose(DiagnosticInfoGeneric("unable to lower stackguard"));
+ setValue(&I, DAG.getPOISON(PtrTy));
+ return;
+ }
+
Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
MachinePointerInfo(Global, 0), Align,
diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp
index 407b6ad..1f53c87 100644
--- a/llvm/lib/Frontend/HLSL/CBuffer.cpp
+++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp
@@ -43,8 +43,13 @@ std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
for (const MDNode *MD : CBufMD->operands()) {
assert(MD->getNumOperands() && "Invalid cbuffer metadata");
- auto *Handle = cast<GlobalVariable>(
- cast<ValueAsMetadata>(MD->getOperand(0))->getValue());
+ // For an unused cbuffer, the handle may have been optimized out
+ Metadata *OpMD = MD->getOperand(0);
+ if (!OpMD)
+ continue;
+
+ auto *Handle =
+ cast<GlobalVariable>(cast<ValueAsMetadata>(OpMD)->getValue());
CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle);
for (int I = 1, E = MD->getNumOperands(); I < E; ++I) {
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 0ecf47d..2715229 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -132,24 +132,70 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
+static StringRef maxPlainSubstring(StringRef S) {
+ StringRef Best;
+ while (!S.empty()) {
+ size_t PrefixSize = S.find_first_of("?*[{\\");
+ if (PrefixSize == std::string::npos)
+ PrefixSize = S.size();
+
+ if (Best.size() < PrefixSize)
+ Best = S.take_front(PrefixSize);
+
+ S = S.drop_front(PrefixSize);
+
+ // It's impossible, as the first and last characters of the input string
+ // must be Glob special characters, otherwise they would be parts of
+ // the prefix or the suffix.
+ assert(!S.empty());
+
+ switch (S.front()) {
+ case '\\':
+ S = S.drop_front(2);
+ break;
+ case '[': {
+ // Drop '[' and the first character which can be ']'.
+ S = S.drop_front(2);
+ size_t EndBracket = S.find_first_of("]");
+ // Should not be possible, SubGlobPattern::create should fail on invalid
+ // pattern before we get here.
+ assert(EndBracket != std::string::npos);
+ S = S.drop_front(EndBracket + 1);
+ break;
+ }
+ case '{':
+ // TODO: implement.
+ // Fallback to whatever is best for now.
+ return Best;
+ default:
+ S = S.drop_front(1);
+ }
+ }
+
+ return Best;
+}
+
Expected<GlobPattern>
GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
+ Pat.Pattern = S;
// Store the prefix that does not contain any metacharacter.
- size_t PrefixSize = S.find_first_of("?*[{\\");
- Pat.Prefix = S.substr(0, PrefixSize);
- if (PrefixSize == std::string::npos)
+ Pat.PrefixSize = S.find_first_of("?*[{\\");
+ if (Pat.PrefixSize == std::string::npos) {
+ Pat.PrefixSize = S.size();
return Pat;
- S = S.substr(PrefixSize);
+ }
+ S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
size_t SuffixStart = S.find_last_of("?*[]{}\\");
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
- ++SuffixStart;
- Pat.Suffix = S.substr(SuffixStart);
+ if (SuffixStart < S.size())
+ ++SuffixStart;
+ Pat.SuffixSize = S.size() - SuffixStart;
S = S.substr(0, SuffixStart);
SmallVector<std::string, 1> SubPats;
@@ -199,10 +245,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
return Pat;
}
+StringRef GlobPattern::longest_substr() const {
+ return maxPlainSubstring(
+ Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+}
+
bool GlobPattern::match(StringRef S) const {
- if (!S.consume_front(Prefix))
+ if (!S.consume_front(prefix()))
return false;
- if (!S.consume_back(Suffix))
+ if (!S.consume_back(suffix()))
return false;
if (SubGlobs.empty() && S.empty())
return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index ea32748..1c8383c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1430,6 +1430,18 @@ def FeatureAddSubU64Insts
def FeatureMadU32Inst : SubtargetFeature<"mad-u32-inst", "HasMadU32Inst",
"true", "Has v_mad_u32 instruction">;
+def FeatureAddMinMaxInsts : SubtargetFeature<"add-min-max-insts",
+ "HasAddMinMaxInsts",
+ "true",
+ "Has v_add_{min|max}_{i|u}32 instructions"
+>;
+
+def FeaturePkAddMinMaxInsts : SubtargetFeature<"pk-add-min-max-insts",
+ "HasPkAddMinMaxInsts",
+ "true",
+ "Has v_pk_add_{min|max}_{i|u}16 instructions"
+>;
+
def FeatureMemToLDSLoad : SubtargetFeature<"vmem-to-lds-load-insts",
"HasVMemToLDSLoad",
"true",
@@ -2115,6 +2127,8 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureLshlAddU64Inst,
FeatureAddSubU64Insts,
FeatureMadU32Inst,
+ FeatureAddMinMaxInsts,
+ FeaturePkAddMinMaxInsts,
FeatureLdsBarrierArriveAtomic,
FeatureSetPrioIncWgInst,
Feature45BitNumRecordsBufferResource,
@@ -2658,11 +2672,11 @@ def HasFmaakFmamkF64Insts :
def HasAddMinMaxInsts :
Predicate<"Subtarget->hasAddMinMaxInsts()">,
- AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
+ AssemblerPredicate<(any_of FeatureAddMinMaxInsts)>;
def HasPkAddMinMaxInsts :
Predicate<"Subtarget->hasPkAddMinMaxInsts()">,
- AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
+ AssemblerPredicate<(any_of FeaturePkAddMinMaxInsts)>;
def HasPkMinMax3Insts :
Predicate<"Subtarget->hasPkMinMax3Insts()">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 56807a4..54ba2f8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4835,6 +4835,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_perm_pk16_b4_u4:
case Intrinsic::amdgcn_perm_pk16_b6_u4:
case Intrinsic::amdgcn_perm_pk16_b8_u4:
+ case Intrinsic::amdgcn_add_max_i32:
+ case Intrinsic::amdgcn_add_max_u32:
+ case Intrinsic::amdgcn_add_min_i32:
+ case Intrinsic::amdgcn_add_min_u32:
+ case Intrinsic::amdgcn_pk_add_max_i16:
+ case Intrinsic::amdgcn_pk_add_max_u16:
+ case Intrinsic::amdgcn_pk_add_min_i16:
+ case Intrinsic::amdgcn_pk_add_min_u16:
return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_log:
case Intrinsic::amdgcn_exp2:
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index a466780..ac660d5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -277,6 +277,8 @@ protected:
bool HasLshlAddU64Inst = false;
bool HasAddSubU64Insts = false;
bool HasMadU32Inst = false;
+ bool HasAddMinMaxInsts = false;
+ bool HasPkAddMinMaxInsts = false;
bool HasPointSampleAccel = false;
bool HasLdsBarrierArriveAtomic = false;
bool HasSetPrioIncWgInst = false;
@@ -1567,10 +1569,10 @@ public:
bool hasIntMinMax64() const { return GFX1250Insts; }
// \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
- bool hasAddMinMaxInsts() const { return GFX1250Insts; }
+ bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; }
// \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
- bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
+ bool hasPkAddMinMaxInsts() const { return HasPkAddMinMaxInsts; }
// \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
bool hasPkMinMax3Insts() const { return GFX1250Insts; }
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 7cce033..ee10190 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
- defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
- defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
- defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
- defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_max_i32>;
+ defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_max_u32>;
+ defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_min_i32>;
+ defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_min_u32>;
}
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 6500fce..c4692b7 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -75,7 +75,7 @@ multiclass VOP3PInst<string OpName, VOPProfile P,
SDPatternOperator node = null_frag, bit IsDOT = 0> {
def NAME : VOP3P_Pseudo<OpName, P,
!if (P.HasModifiers,
- getVOP3PModPat<P, node, IsDOT, IsDOT>.ret,
+ getVOP3PModPat<P, node, !or(P.EnableClamp, IsDOT), IsDOT>.ret,
getVOP3Pat<P, node>.ret)>;
let SubtargetPredicate = isGFX11Plus in {
if P.HasExtVOP3DPP then
@@ -434,15 +434,16 @@ defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>;
} // End SubtargetPredicate = HasFmaMixBF16Insts
def PK_ADD_MINMAX_Profile : VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16, VOP3_PACKED> {
- let HasModifiers = 0;
+ let HasNeg = 0;
+ let EnableClamp = 1;
}
let isCommutable = 1, isReMaterializable = 1 in {
let SubtargetPredicate = HasPkAddMinMaxInsts in {
-defm V_PK_ADD_MAX_I16 : VOP3PInst<"v_pk_add_max_i16", PK_ADD_MINMAX_Profile>;
-defm V_PK_ADD_MAX_U16 : VOP3PInst<"v_pk_add_max_u16", PK_ADD_MINMAX_Profile>;
-defm V_PK_ADD_MIN_I16 : VOP3PInst<"v_pk_add_min_i16", PK_ADD_MINMAX_Profile>;
-defm V_PK_ADD_MIN_U16 : VOP3PInst<"v_pk_add_min_u16", PK_ADD_MINMAX_Profile>;
+defm V_PK_ADD_MAX_I16 : VOP3PInst<"v_pk_add_max_i16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_max_i16>;
+defm V_PK_ADD_MAX_U16 : VOP3PInst<"v_pk_add_max_u16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_max_u16>;
+defm V_PK_ADD_MIN_I16 : VOP3PInst<"v_pk_add_min_i16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_min_i16>;
+defm V_PK_ADD_MIN_U16 : VOP3PInst<"v_pk_add_min_u16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_min_u16>;
}
let SubtargetPredicate = HasPkMinMax3Insts in {
defm V_PK_MAX3_I16 : VOP3PInst<"v_pk_max3_i16", PK_ADD_MINMAX_Profile>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f7deeaf..ca4a655 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2614,6 +2614,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
Subtarget)))
return Result;
+ // Try to widen vectors to gain more optimization opportunities.
+ if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
+ return NewShuffle;
if ((Result =
lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
return Result;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index f7d1a09..b9c5b75 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -668,4 +668,38 @@ foreach vti = NoGroupBF16Vectors in {
def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)),
(vfmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>;
}
+
+let Predicates = [HasStdExtZvfbfa] in {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ def : Pat<(fwti.Vector (any_riscv_fpextend_vl
+ (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFWCVT_F_F_ALT_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
+ (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
+
+ def : Pat<(fvti.Vector (any_riscv_fpround_vl
+ (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask VMV0:$vm), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
+ (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+ (fwti.Mask VMV0:$vm),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
+ def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
+ (!cast<Instruction>("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fwti.RegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
+ }
+}
} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 62a3c88..975a271 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -433,6 +433,8 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["fp8e5m3-insts"] = true;
Features["permlane16-swap"] = true;
Features["ashr-pk-insts"] = true;
+ Features["add-min-max-insts"] = true;
+ Features["pk-add-min-max-insts"] = true;
Features["atomic-buffer-pk-add-bf16-inst"] = true;
Features["vmem-pref-insts"] = true;
Features["atomic-fadd-rtn-insts"] = true;