diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 51 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 19 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 |
12 files changed, 40 insertions, 75 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index d81c188..537d3a4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -271,11 +271,8 @@ def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT_D16, SItbuffer_store_d16>; // FIXME: Check MMO is atomic def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap_glue>; def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap_glue>; -def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, SIatomic_fmin>; -def : GINodeEquiv<G_AMDGPU_ATOMIC_FMAX, SIatomic_fmax>; -def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, atomic_load_fmin_glue>; -def : GINodeEquiv<G_AMDGPU_ATOMIC_FMAX, atomic_load_fmax_glue>; - +def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin_glue>; +def : GINodeEquiv<G_ATOMICRMW_FMAX, atomic_load_fmax_glue>; def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SWAP, SIbuffer_atomic_swap>; def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_ADD, SIbuffer_atomic_add>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b50c0cc..d60c62a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -502,9 +502,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { // isa<MemSDNode> almost works but is slightly too permissive for some DS // intrinsics. - if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) || - Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || - Opc == AMDGPUISD::ATOMIC_LOAD_FMAX) { + if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N)) { N = glueCopyToM0LDSInit(N); SelectCode(N); return; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 519e623..522b3a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -5524,8 +5524,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) NODE_NAME_CASE(DS_ORDERED_COUNT) NODE_NAME_CASE(ATOMIC_CMP_SWAP) - NODE_NAME_CASE(ATOMIC_LOAD_FMIN) - NODE_NAME_CASE(ATOMIC_LOAD_FMAX) NODE_NAME_CASE(BUFFER_LOAD) NODE_NAME_CASE(BUFFER_LOAD_UBYTE) NODE_NAME_CASE(BUFFER_LOAD_USHORT) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 206bb46..37572af 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -575,8 +575,6 @@ enum NodeType : unsigned { TBUFFER_LOAD_FORMAT_D16, DS_ORDERED_COUNT, ATOMIC_CMP_SWAP, - ATOMIC_LOAD_FMIN, - ATOMIC_LOAD_FMAX, BUFFER_LOAD, BUFFER_LOAD_UBYTE, BUFFER_LOAD_USHORT, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index ae3f2b8..03e2d62 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3620,8 +3620,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_ATOMICRMW_UINC_WRAP: case TargetOpcode::G_ATOMICRMW_UDEC_WRAP: case TargetOpcode::G_ATOMICRMW_FADD: - case AMDGPU::G_AMDGPU_ATOMIC_FMIN: - case AMDGPU::G_AMDGPU_ATOMIC_FMAX: + case TargetOpcode::G_ATOMICRMW_FMIN: + case TargetOpcode::G_ATOMICRMW_FMAX: return selectG_LOAD_STORE_ATOMICRMW(I); case TargetOpcode::G_SELECT: return selectG_SELECT(I); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index ebc6402..21f541d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -685,6 +685,8 @@ defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>; defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>; defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>; defm atomic_load_fadd : binary_atomic_op_fp_all_as<atomic_load_fadd>; +defm atomic_load_fmin : binary_atomic_op_fp_all_as<atomic_load_fmin>; +defm atomic_load_fmax : binary_atomic_op_fp_all_as<atomic_load_fmax>; defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>; defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>; defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 0c7b196..4ff945e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -283,7 +283,9 @@ static const LLT S1 = LLT::scalar(1); static const LLT S8 = LLT::scalar(8); static const LLT S16 = LLT::scalar(16); static const LLT S32 = LLT::scalar(32); +static const LLT F32 = LLT::float32(); static const LLT S64 = LLT::scalar(64); +static const LLT F64 = LLT::float64(); static const LLT S96 = LLT::scalar(96); static const LLT S128 = LLT::scalar(128); static const LLT S160 = LLT::scalar(160); @@ -1648,6 +1650,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasFlatAtomicFaddF32Inst()) Atomic.legalFor({{S32, FlatPtr}}); + getActionDefinitionsBuilder({G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX}) + .legalFor({{F32, LocalPtr}, {F64, LocalPtr}}); + if (ST.hasGFX90AInsts()) { // These are legal with some caveats, and should have undergone expansion in // the IR in most situations @@ -5401,9 +5406,9 @@ static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) { case Intrinsic::amdgcn_ds_fadd: return AMDGPU::G_ATOMICRMW_FADD; case Intrinsic::amdgcn_ds_fmin: - return AMDGPU::G_AMDGPU_ATOMIC_FMIN; + return AMDGPU::G_ATOMICRMW_FMIN; case Intrinsic::amdgcn_ds_fmax: - return AMDGPU::G_AMDGPU_ATOMIC_FMAX; + return AMDGPU::G_ATOMICRMW_FMAX; default: llvm_unreachable("not a DS FP intrinsic"); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 313d53a..0510a1d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -5219,11 +5219,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_ATOMICRMW_UMAX: case AMDGPU::G_ATOMICRMW_UMIN: case AMDGPU::G_ATOMICRMW_FADD: + case AMDGPU::G_ATOMICRMW_FMIN: + case AMDGPU::G_ATOMICRMW_FMAX: case AMDGPU::G_ATOMICRMW_UINC_WRAP: case AMDGPU::G_ATOMICRMW_UDEC_WRAP: - case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: - case AMDGPU::G_AMDGPU_ATOMIC_FMIN: - case AMDGPU::G_AMDGPU_ATOMIC_FMAX: { + case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: { OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg()); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c436e03..c607437 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -945,6 +945,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX, ISD::ATOMIC_LOAD_FADD, + ISD::ATOMIC_LOAD_FMIN, + ISD::ATOMIC_LOAD_FMAX, ISD::ATOMIC_LOAD_UINC_WRAP, ISD::ATOMIC_LOAD_UDEC_WRAP, ISD::INTRINSIC_VOID, @@ -8707,25 +8709,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_fmax: { MemSDNode *M = cast<MemSDNode>(Op); - unsigned Opc; - switch (IntrID) { - case Intrinsic::amdgcn_ds_fmin: - Opc = AMDGPUISD::ATOMIC_LOAD_FMIN; - break; - case Intrinsic::amdgcn_ds_fmax: - Opc = AMDGPUISD::ATOMIC_LOAD_FMAX; - break; - default: - llvm_unreachable("Unknown intrinsic!"); - } - SDValue Ops[] = { - M->getOperand(0), // Chain - M->getOperand(2), // Ptr - M->getOperand(3) // Value - }; - - return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops, - M->getMemoryVT(), M->getMemOperand()); + unsigned Opc = IntrID == Intrinsic::amdgcn_ds_fmin ? ISD::ATOMIC_LOAD_FMIN + : ISD::ATOMIC_LOAD_FMAX; + return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(), M->getOperand(0), + M->getOperand(2), M->getOperand(3), + M->getMemOperand()); } case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_ptr_buffer_load: @@ -9130,22 +9118,21 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmin_num: { - Opcode = AMDGPUISD::ATOMIC_LOAD_FMIN; + Opcode = ISD::ATOMIC_LOAD_FMIN; break; } case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmax_num: case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmax_num: { - Opcode = AMDGPUISD::ATOMIC_LOAD_FMAX; + Opcode = ISD::ATOMIC_LOAD_FMAX; break; } default: llvm_unreachable("unhandled atomic opcode"); } - return DAG.getMemIntrinsicNode(Opcode, SDLoc(Op), - M->getVTList(), Ops, M->getMemoryVT(), - M->getMemOperand()); + return DAG.getAtomic(Opcode, SDLoc(Op), M->getMemoryVT(), M->getVTList(), + Ops, M->getMemOperand()); } case Intrinsic::amdgcn_s_get_barrier_state: { SDValue Chain = Op->getOperand(0); @@ -15816,8 +15803,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N, case ISD::INTRINSIC_W_CHAIN: return AMDGPU::isIntrinsicSourceOfDivergence(N->getConstantOperandVal(1)); case AMDGPUISD::ATOMIC_CMP_SWAP: - case AMDGPUISD::ATOMIC_LOAD_FMIN: - case AMDGPUISD::ATOMIC_LOAD_FMAX: case AMDGPUISD::BUFFER_ATOMIC_SWAP: case AMDGPUISD::BUFFER_ATOMIC_ADD: case AMDGPUISD::BUFFER_ATOMIC_SUB: @@ -16077,17 +16062,21 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AtomicExpansionKind::CmpXChg; } case AtomicRMWInst::FMin: - case AtomicRMWInst::FMax: + case AtomicRMWInst::FMax: { + Type *Ty = RMW->getType(); + + // LDS float and double fmin/fmax were always supported. + if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy())) + return AtomicExpansionKind::None; + + return AtomicExpansionKind::CmpXChg; + } case AtomicRMWInst::Min: case AtomicRMWInst::Max: case AtomicRMWInst::UMin: case AtomicRMWInst::UMax: { if (AMDGPU::isFlatGlobalAddrSpace(AS) || AS == AMDGPUAS::BUFFER_FAT_POINTER) { - if (RMW->getType()->isFloatTy() && - unsafeFPAtomicsDisabled(RMW->getFunction())) - return AtomicExpansionKind::CmpXChg; - // Always expand system scope min/max atomics. if (HasSystemScope) return AtomicExpansionKind::CmpXChg; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 9b9ff4a..80c6235 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -72,14 +72,6 @@ def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> ]>; -def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32, - [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] ->; - -def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32, - [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] ->; - // load_d16_{lo|hi} ptr, tied_input def SIload_d16 : SDTypeProfile<1, 2, [ SDTCisPtrTy<1>, @@ -314,13 +306,6 @@ class isIntType<ValueType SrcVT> { } //===----------------------------------------------------------------------===// -// PatFrags for global memory operations -//===----------------------------------------------------------------------===// - -defm atomic_load_fmin : binary_atomic_op_fp_all_as<SIatomic_fmin>; -defm atomic_load_fmax : binary_atomic_op_fp_all_as<SIatomic_fmax>; - -//===----------------------------------------------------------------------===// // SDNodes PatFrags for loads/stores with a glue input. // This is for SDNodes and PatFrag for local loads and stores to // enable s_mov_b32 m0, -1 to be glued to the memory instructions. @@ -742,8 +727,8 @@ defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; -defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; -defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; +defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 0, SDTAtomic2_f32, 0>; +defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 0, SDTAtomic2_f32, 0>; def as_i1timm : SDNodeXForm<timm, [{ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index e32bb8f..531b23d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3863,11 +3863,6 @@ def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction { let mayStore = 1; } -let Namespace = "AMDGPU" in { -def G_AMDGPU_ATOMIC_FMIN : G_ATOMICRMW_OP; -def G_AMDGPU_ATOMIC_FMAX : G_ATOMICRMW_OP; -} - class BufferAtomicGenericInstruction : AMDGPUGenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$vdata, type1:$rsrc, type2:$vindex, type2:$voffset, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index ef635fd..e6a439e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -590,9 +590,7 @@ bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { } bool isGenericAtomic(unsigned Opc) { - return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN || - Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX || - Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || + return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || |