diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 13 |
6 files changed, 40 insertions, 15 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index ea32748..1c8383c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1430,6 +1430,18 @@ def FeatureAddSubU64Insts def FeatureMadU32Inst : SubtargetFeature<"mad-u32-inst", "HasMadU32Inst", "true", "Has v_mad_u32 instruction">; +def FeatureAddMinMaxInsts : SubtargetFeature<"add-min-max-insts", + "HasAddMinMaxInsts", + "true", + "Has v_add_{min|max}_{i|u}32 instructions" +>; + +def FeaturePkAddMinMaxInsts : SubtargetFeature<"pk-add-min-max-insts", + "HasPkAddMinMaxInsts", + "true", + "Has v_pk_add_{min|max}_{i|u}16 instructions" +>; + def FeatureMemToLDSLoad : SubtargetFeature<"vmem-to-lds-load-insts", "HasVMemToLDSLoad", "true", @@ -2115,6 +2127,8 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureLshlAddU64Inst, FeatureAddSubU64Insts, FeatureMadU32Inst, + FeatureAddMinMaxInsts, + FeaturePkAddMinMaxInsts, FeatureLdsBarrierArriveAtomic, FeatureSetPrioIncWgInst, Feature45BitNumRecordsBufferResource, @@ -2658,11 +2672,11 @@ def HasFmaakFmamkF64Insts : def HasAddMinMaxInsts : Predicate<"Subtarget->hasAddMinMaxInsts()">, - AssemblerPredicate<(any_of FeatureGFX1250Insts)>; + AssemblerPredicate<(any_of FeatureAddMinMaxInsts)>; def HasPkAddMinMaxInsts : Predicate<"Subtarget->hasPkAddMinMaxInsts()">, - AssemblerPredicate<(any_of FeatureGFX1250Insts)>; + AssemblerPredicate<(any_of FeaturePkAddMinMaxInsts)>; def HasPkMinMax3Insts : Predicate<"Subtarget->hasPkMinMax3Insts()">, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 56807a4..54ba2f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4835,6 +4835,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_perm_pk16_b4_u4: case Intrinsic::amdgcn_perm_pk16_b6_u4: case Intrinsic::amdgcn_perm_pk16_b8_u4: + case Intrinsic::amdgcn_add_max_i32: + case Intrinsic::amdgcn_add_max_u32: + case Intrinsic::amdgcn_add_min_i32: + case Intrinsic::amdgcn_add_min_u32: + case Intrinsic::amdgcn_pk_add_max_i16: + case Intrinsic::amdgcn_pk_add_max_u16: + case Intrinsic::amdgcn_pk_add_min_i16: + case Intrinsic::amdgcn_pk_add_min_u16: return getDefaultMappingVOP(MI); case Intrinsic::amdgcn_log: case Intrinsic::amdgcn_exp2: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 996b55f..02c5390 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2086,7 +2086,7 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { (AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) addPass(AMDGPUAtomicOptimizerPass(TM, AMDGPUAtomicOptimizerStrategy)); - addPass(AtomicExpandPass(&TM)); + addPass(AtomicExpandPass(TM)); if (TM.getOptLevel() > CodeGenOptLevel::None) { addPass(AMDGPUPromoteAllocaPass(TM)); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index a466780..ac660d5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -277,6 +277,8 @@ protected: bool HasLshlAddU64Inst = false; bool HasAddSubU64Insts = false; bool HasMadU32Inst = false; + bool HasAddMinMaxInsts = false; + bool HasPkAddMinMaxInsts = false; bool HasPointSampleAccel = false; bool HasLdsBarrierArriveAtomic = false; bool HasSetPrioIncWgInst = false; @@ -1567,10 +1569,10 @@ public: bool hasIntMinMax64() const { return GFX1250Insts; } // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions. - bool hasAddMinMaxInsts() const { return GFX1250Insts; } + bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; } // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions. - bool hasPkAddMinMaxInsts() const { return GFX1250Insts; } + bool hasPkAddMinMaxInsts() const { return HasPkAddMinMaxInsts; } // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions. bool hasPkMinMax3Insts() const { return GFX1250Insts; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 7cce033..ee10190 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in { } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in { - defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; - defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; - defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; - defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; + defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_max_i32>; + defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_max_u32>; + defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_min_i32>; + defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_min_u32>; } defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 6500fce..c4692b7 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -75,7 +75,7 @@ multiclass VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit IsDOT = 0> { def NAME : VOP3P_Pseudo<OpName, P, !if (P.HasModifiers, - getVOP3PModPat<P, node, IsDOT, IsDOT>.ret, + getVOP3PModPat<P, node, !or(P.EnableClamp, IsDOT), IsDOT>.ret, getVOP3Pat<P, node>.ret)>; let SubtargetPredicate = isGFX11Plus in { if P.HasExtVOP3DPP then @@ -434,15 +434,16 @@ defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>; } // End SubtargetPredicate = HasFmaMixBF16Insts def PK_ADD_MINMAX_Profile : VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16, VOP3_PACKED> { - let HasModifiers = 0; + let HasNeg = 0; + let EnableClamp = 1; } let isCommutable = 1, isReMaterializable = 1 in { let SubtargetPredicate = HasPkAddMinMaxInsts in { -defm V_PK_ADD_MAX_I16 : VOP3PInst<"v_pk_add_max_i16", PK_ADD_MINMAX_Profile>; -defm V_PK_ADD_MAX_U16 : VOP3PInst<"v_pk_add_max_u16", PK_ADD_MINMAX_Profile>; -defm V_PK_ADD_MIN_I16 : VOP3PInst<"v_pk_add_min_i16", PK_ADD_MINMAX_Profile>; -defm V_PK_ADD_MIN_U16 : VOP3PInst<"v_pk_add_min_u16", PK_ADD_MINMAX_Profile>; +defm V_PK_ADD_MAX_I16 : VOP3PInst<"v_pk_add_max_i16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_max_i16>; +defm V_PK_ADD_MAX_U16 : VOP3PInst<"v_pk_add_max_u16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_max_u16>; +defm V_PK_ADD_MIN_I16 : VOP3PInst<"v_pk_add_min_i16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_min_i16>; +defm V_PK_ADD_MIN_U16 : VOP3PInst<"v_pk_add_min_u16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_min_u16>; } let SubtargetPredicate = HasPkMinMax3Insts in { defm V_PK_MAX3_I16 : VOP3PInst<"v_pk_max3_i16", PK_ADD_MINMAX_Profile>; |