diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 14 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 64 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 6 |
13 files changed, 57 insertions, 77 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index eaa1870..7003a40 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2589,6 +2589,8 @@ def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts() // only allow 32-bit registers in operands and use low halves thereof. def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">, AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>; +def NotUseRealTrue16Insts : True16PredicateClass<"!Subtarget->useRealTrue16Insts()">, + AssemblerPredicate<(not (all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts))>; def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && " "!Subtarget->useRealTrue16Insts()">, AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp index 38718c4..7504f1a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp @@ -150,7 +150,10 @@ public: if (!CVisited.insert(CII).second) continue; - if (CII->getParent() == II->getParent() && !IsLookThru(II)) + // Same-BB filter must look at the *user*; and allow non-lookthrough + // users when the def is a PHI (loop-header pattern). + if (CII->getParent() == II->getParent() && !IsLookThru(CII) && + !isa<PHINode>(II)) continue; if (isOpLegal(CII)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index d9bfeae..0a59132 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -2562,7 +2562,9 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) { for (Function *F : NeedsPostProcess) Splitter.processFunction(*F); for (Function *F : Intrinsics) { - if (isRemovablePointerIntrinsic(F->getIntrinsicID())) { + // use_empty() can also occur with cases like masked load, which will + // have been rewritten out of the module by now but not erased. + if (F->use_empty() || isRemovablePointerIntrinsic(F->getIntrinsicID())) { F->eraseFromParent(); } else { std::optional<Function *> NewF = Intrinsic::remangleIntrinsicFunction(F); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 0776d14..f413bbc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -840,7 +840,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}}); // clang-format on - addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD}, StandardB) + addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT, + G_AMDGPU_TBUFFER_LOAD_FORMAT}, + StandardB) .Div(B32, {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}) .Uni(B32, {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}) .Div(B64, {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}) diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index f2e432f..b2ff5a1 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -969,10 +969,9 @@ multiclass DSReadPat_t16<DS_Pseudo inst, ValueType vt, string frag> { } let OtherPredicates = [NotLDSRequiresM0Init] in { - foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in - let True16Predicate = p in { - def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>; - } + let True16Predicate = NotUseRealTrue16Insts in { + def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>; + } let True16Predicate = UseRealTrue16Insts in { def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>; } @@ -1050,10 +1049,9 @@ multiclass DSWritePat_t16 <DS_Pseudo inst, ValueType vt, string frag> { } let OtherPredicates = [NotLDSRequiresM0Init] in { - foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in - let True16Predicate = p in { - def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>; - } + let True16Predicate = NotUseRealTrue16Insts in { + def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>; + } let True16Predicate = UseRealTrue16Insts in { def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 9f33bac..5a22b23 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1982,8 +1982,7 @@ defm : FlatLoadPats <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; defm : FlatLoadPats <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>; defm : FlatLoadPats <FLAT_LOAD_DWORDX3, load_flat, v3i32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; @@ -2127,8 +2126,7 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; @@ -2187,8 +2185,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = p in { +let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = NotUseRealTrue16Insts in { defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; @@ -2356,8 +2353,7 @@ defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 31a2d55..c2252af 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1006,9 +1006,8 @@ public: Opcode == AMDGPU::S_BARRIER_INIT_M0 || Opcode == AMDGPU::S_BARRIER_INIT_IMM || Opcode == AMDGPU::S_BARRIER_JOIN_IMM || - Opcode == AMDGPU::S_BARRIER_LEAVE || - Opcode == AMDGPU::S_BARRIER_LEAVE_IMM || - Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER; + Opcode == AMDGPU::S_BARRIER_LEAVE || Opcode == AMDGPU::DS_GWS_INIT || + Opcode == AMDGPU::DS_GWS_BARRIER; } static bool isF16PseudoScalarTrans(unsigned Opcode) { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 59fd2f1..be084a9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1466,8 +1466,7 @@ class VOPSelectPat_t16 <ValueType vt> : GCNPat < def : VOPSelectModsPat <i32>; def : VOPSelectModsPat <f32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : VOPSelectPat <f16>; def : VOPSelectPat <i16>; } // End True16Predicate = p @@ -2137,8 +2136,7 @@ def : GCNPat < >; foreach fp16vt = [f16, bf16] in { -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let SubtargetPredicate = p in { +let SubtargetPredicate = NotUseRealTrue16Insts in { def : GCNPat < (fabs (fp16vt VGPR_32:$src)), (V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src) @@ -2230,8 +2228,7 @@ def : GCNPat < } foreach fp16vt = [f16, bf16] in { -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (fcopysign fp16vt:$src0, fp16vt:$src1), (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) @@ -2354,23 +2351,21 @@ def : GCNPat < (S_MOV_B32 $ga) >; -foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in { - let True16Predicate = pred in { - def : GCNPat < - (VGPRImm<(i16 imm)>:$imm), - (V_MOV_B32_e32 imm:$imm) - >; +let True16Predicate = NotUseRealTrue16Insts in { + def : GCNPat < + (VGPRImm<(i16 imm)>:$imm), + (V_MOV_B32_e32 imm:$imm) + >; - // FIXME: Workaround for ordering issue with peephole optimizer where - // a register class copy interferes with immediate folding. Should - // use s_mov_b32, which can be shrunk to s_movk_i32 + // FIXME: Workaround for ordering issue with peephole optimizer where + // a register class copy interferes with immediate folding. Should + // use s_mov_b32, which can be shrunk to s_movk_i32 - foreach vt = [f16, bf16] in { - def : GCNPat < - (VGPRImm<(vt fpimm)>:$imm), - (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm))) - >; - } + foreach vt = [f16, bf16] in { + def : GCNPat < + (VGPRImm<(vt fpimm)>:$imm), + (V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm))) + >; } } @@ -2859,8 +2854,7 @@ def : GCNPat< (i32 (DivergentSextInreg<i1> i32:$src)), (V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (i16 (DivergentSextInreg<i1> i16:$src)), (V_BFE_I32_e64 $src, (i32 0), (i32 1)) @@ -3205,8 +3199,7 @@ def : GCNPat< } } // AddedComplexity = 1 -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat< (i32 (DivergentUnaryFrag<zext> i16:$src)), (V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src) @@ -3416,8 +3409,7 @@ def : GCNPat < // Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24) // The 12s emit 0s. -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (i16 (bswap i16:$a)), (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001))) @@ -3670,8 +3662,7 @@ def : GCNPat < (S_LSHL_B32 SReg_32:$src1, (i16 16)) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 VGPR_32:$src1))), (v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1)) @@ -3707,8 +3698,7 @@ def : GCNPat < (COPY_TO_REGCLASS SReg_32:$src0, SReg_32) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))), (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32) @@ -3735,8 +3725,7 @@ def : GCNPat < >; let SubtargetPredicate = HasVOP3PInsts in { -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat < (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))), (v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0)))) @@ -3766,8 +3755,7 @@ def : GCNPat < (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { // Take the lower 16 bits from each VGPR_32 and concat them def : GCNPat < (vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))), @@ -3838,8 +3826,7 @@ def : GCNPat < >; // Take the upper 16 bits from each VGPR_32 and concat them -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat < (vecTy (DivergentBinFrag<build_vector> (Ty !if(!eq(Ty, i16), @@ -3881,8 +3868,7 @@ def : GCNPat < (v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1)) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (v2f16 (scalar_to_vector f16:$src0)), (COPY $src0) diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 296ce5a..b3fd8c7 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1616,7 +1616,8 @@ def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm let isConvergent = 1; } -def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> { + def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", + (ins), "", [(int_amdgcn_s_barrier_leave (i16 srcvalue))] > { let SchedRW = [WriteBarrier]; let simm16 = 0; let fixed_imm = 1; @@ -1624,9 +1625,6 @@ def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> { let Defs = [SCC]; } -def S_BARRIER_LEAVE_IMM : SOPP_Pseudo <"s_barrier_leave", - (ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_barrier_leave timm:$simm16)]>; - def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > { let SubtargetPredicate = isGFX8Plus; let simm16 = 0; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 6230c17..77df721 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1561,8 +1561,7 @@ def : GCNPat < } // End OtherPredicates = [isGFX8Plus] -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let OtherPredicates = [isGFX8Plus, p] in { +let OtherPredicates = [isGFX8Plus, NotUseRealTrue16Insts] in { def : GCNPat< (i32 (anyext i16:$src)), (COPY $src) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 37d92bc..30dab55 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1378,8 +1378,7 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < $src) >; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in { +let True16Predicate = NotUseRealTrue16Insts in { def : GCNPat < (and i16:$src0, i16:$src1), (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index e6a7c35..4a2b54d 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -387,8 +387,7 @@ let SchedRW = [Write64Bit] in { } // End SchedRW = [Write64Bit] } // End isReMaterializable = 1 -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat< (i32 (DivergentUnaryFrag<sext> i16:$src)), (i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10))) @@ -501,8 +500,7 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in def : GCNPat< (i64 (DivergentUnaryFrag<sext> i16:$src)), (REG_SEQUENCE VReg_64, diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 52ee1e8..5daf860 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -402,8 +402,7 @@ defm V_FMA_MIX_F16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_f16_t16", VOP3P_Mix_Profi defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>; let True16Predicate = UseRealTrue16Insts in defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_F16_t16>; @@ -428,8 +427,7 @@ defm V_FMA_MIX_BF16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_bf16_t16", VOP3P_Mix_Pro } // End isCommutable = 1 defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32_BF16, bf16>; -foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in -let True16Predicate = p in +let True16Predicate = NotUseRealTrue16Insts in defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_BF16, V_FMA_MIXHI_BF16, bf16, v2bf16>; let True16Predicate = UseRealTrue16Insts in defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>; |