diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600InstrInfo.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600Subtarget.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 66 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 39 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOPInstructions.td | 8 |
9 files changed, 91 insertions, 50 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 54ba2f8..dbca5af 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -5081,17 +5081,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { unsigned MinNumRegsRequired = DstSize / 32; const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + bool UseAGPRForm = Info->selectAGPRFormMFMA(MinNumRegsRequired); + OpdsMapping[0] = - Info->getMinNumAGPRs() >= MinNumRegsRequired - ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) - : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + UseAGPRForm ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[4] = - Info->getMinNumAGPRs() >= MinNumRegsRequired - ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) - : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI) + : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); OpdsMapping[8] = getVGPROpMapping(MI.getOperand(8).getReg(), MRI, *TRI); OpdsMapping[10] = getVGPROpMapping(MI.getOperand(10).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index 0ea9add..b03d50f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -261,13 +261,6 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage( const Function *Callee = getCalleeFunction(*CalleeOp); - // Avoid crashing on undefined behavior with an illegal call to a - // kernel. If a callsite's calling convention doesn't match the - // function's, it's undefined behavior. If the callsite calling - // convention does match, that would have errored earlier. - if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv())) - report_fatal_error("invalid call to entry function"); - auto isSameFunction = [](const MachineFunction &MF, const Function *F) { return F == &MF.getFunction(); }; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f377b8a..da4bd87 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1040,6 +1040,8 @@ public: return true; } + bool enableTerminalRule() const override { return true; } + bool useAA() const override; bool enableSubRegLiveness() const override { diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 3e256cc..0104085 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -29,7 +29,7 @@ using namespace llvm; #include "R600GenInstrInfo.inc" R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) - : R600GenInstrInfo(ST, -1, -1), RI(), ST(ST) {} + : R600GenInstrInfo(ST, RI, -1, -1), RI(), ST(ST) {} bool R600InstrInfo::isVector(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h index 22e56b6..efd99db 100644 --- a/llvm/lib/Target/AMDGPU/R600Subtarget.h +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h @@ -126,6 +126,8 @@ public: return true; } + bool enableTerminalRule() const override { return true; } + bool enableSubRegLiveness() const override { return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9c74c65..6b397e0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -63,7 +63,8 @@ static cl::opt<bool> Fix16BitCopies( cl::ReallyHidden); SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) - : AMDGPUGenInstrInfo(ST, AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), + : AMDGPUGenInstrInfo(ST, RI, AMDGPU::ADJCALLSTACKUP, + AMDGPU::ADJCALLSTACKDOWN), RI(ST), ST(ST) { SchedModel.init(&ST); } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index f0d1117..00aae2c9 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -233,10 +233,11 @@ private: void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore, - AMDGPU::OpName OpName, Register DestReg) const; + const DebugLoc &DL, AMDGPU::OpName OpName, + Register DestReg) const; Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore, - AMDGPU::OpName OpName) const; + const DebugLoc &DL, AMDGPU::OpName OpName) const; unsigned read2Opcode(unsigned EltSize) const; unsigned read2ST64Opcode(unsigned EltSize) const; @@ -1367,10 +1368,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, // Paired. void SILoadStoreOptimizer::copyToDestRegs( CombineInfo &CI, CombineInfo &Paired, - MachineBasicBlock::iterator InsertBefore, AMDGPU::OpName OpName, - Register DestReg) const { + MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, + AMDGPU::OpName OpName, Register DestReg) const { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired); @@ -1398,9 +1398,9 @@ void SILoadStoreOptimizer::copyToDestRegs( Register SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore, + const DebugLoc &DL, AMDGPU::OpName OpName) const { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired); @@ -1456,7 +1456,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); Register DestReg = MRI->createVirtualRegister(SuperRC); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); @@ -1484,7 +1485,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, .addImm(0) // gds .cloneMergedMemRefs({&*CI.I, &*Paired.I}); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1541,7 +1542,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( (NewOffset0 != NewOffset1) && "Computed offset doesn't fit"); const MCInstrDesc &Write2Desc = TII->get(Opc); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); @@ -1582,7 +1584,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); + const unsigned Opcode = getNewOpcode(CI, Paired); const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); @@ -1607,7 +1611,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1618,7 +1622,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); + const unsigned Opcode = getNewOpcode(CI, Paired); const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); @@ -1639,7 +1645,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair( New.addImm(MergedOffset); New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::sdst, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1650,7 +1656,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1680,7 +1688,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( .addImm(0) // swz .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1691,7 +1699,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1731,7 +1741,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( .addImm(0) // swz .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1742,12 +1752,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); Register SrcReg = - copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata); + copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata); auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); @@ -1789,7 +1800,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1807,7 +1820,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair( .addImm(CI.CPol) .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); - copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg); + copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1818,12 +1831,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); Register SrcReg = - copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata); + copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata); auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)) @@ -2094,12 +2109,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); + DebugLoc DL = + DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc()); const unsigned Opcode = getNewOpcode(CI, Paired); Register SrcReg = - copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata); + copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata); auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 54f57e0..85adcab 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -513,6 +513,13 @@ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16", defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>; + +let HasClamp = 0, HasOMod = 0 in { +def V_TRANS_BF16_Profile : VOPProfile <[bf16, bf16, untyped, untyped]>; +def V_TRANS_BF16_t16_Profile : VOPProfile_True16 <VOP_BF16_BF16>; +def V_TRANS_BF16_fake16_Profile : VOPProfile_Fake16 <VOP_BF16_BF16>; +} + let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; @@ -527,14 +534,30 @@ defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>; } let SubtargetPredicate = HasBF16TransInsts in { -defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>; -defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>; -defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>; -defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>; -defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>; -defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>; -defm V_SIN_BF16 : VOP1Inst_t16 <"v_sin_bf16", VOP_BF16_BF16, AMDGPUsin>; -defm V_COS_BF16 : VOP1Inst_t16 <"v_cos_bf16", VOP_BF16_BF16, AMDGPUcos>; +defm V_TANH_BF16 : VOP1Inst_t16_with_profiles<"v_tanh_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + int_amdgcn_tanh>; +defm V_RCP_BF16 : VOP1Inst_t16_with_profiles<"v_rcp_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUrcp>; +defm V_SQRT_BF16 : VOP1Inst_t16_with_profiles<"v_sqrt_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + any_amdgcn_sqrt>; +defm V_RSQ_BF16 : VOP1Inst_t16_with_profiles<"v_rsq_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUrsq>; +defm V_LOG_BF16 : VOP1Inst_t16_with_profiles<"v_log_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUlogf16>; +defm V_EXP_BF16 : VOP1Inst_t16_with_profiles<"v_exp_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUexpf16>; +defm V_SIN_BF16 : VOP1Inst_t16_with_profiles<"v_sin_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUsin>; +defm V_COS_BF16 : VOP1Inst_t16_with_profiles<"v_cos_bf16", V_TRANS_BF16_Profile, + V_TRANS_BF16_t16_Profile, V_TRANS_BF16_fake16_Profile, + AMDGPUcos>; } } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 8325c62..6df91a1 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1357,8 +1357,12 @@ class VOPBinOpClampPat<SDPatternOperator node, Instruction inst, ValueType vt> : class getVOP3ModPat<VOPProfile P, SDPatternOperator node> { dag src0 = !if(P.HasOMod, - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); + !if(P.HasClamp, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i32:$omod)), + !if(P.HasClamp, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers))); list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret (P.Src0VT src0), |
