diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2022-11-02 23:42:58 -0700 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2023-01-27 22:17:16 -0400 |
commit | 93ec3fa4021d7080ff4e1d1eb2f36997ebd2f86c (patch) | |
tree | 5b3080c5987ff9050d48e68645cfbabbd61b55b8 | |
parent | 7d31d3b09844897821db029f96682853160863d0 (diff) | |
download | llvm-93ec3fa4021d7080ff4e1d1eb2f36997ebd2f86c.zip llvm-93ec3fa4021d7080ff4e1d1eb2f36997ebd2f86c.tar.gz llvm-93ec3fa4021d7080ff4e1d1eb2f36997ebd2f86c.tar.bz2 |
AMDGPU: Support atomicrmw uinc_wrap/udec_wrap
For now keep the exising intrinsics working.
23 files changed, 925 insertions, 53 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 7e7dbac..9ef59a4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -227,10 +227,8 @@ def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT, SItbuffer_store>; def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT_D16, SItbuffer_store_d16>; // FIXME: Check MMO is atomic -def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>; -def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, SIatomic_dec>; -def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, atomic_inc_glue>; -def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, atomic_dec_glue>; +def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap_glue>; +def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap_glue>; def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, SIatomic_fmin>; def : GINodeEquiv<G_AMDGPU_ATOMIC_FMAX, SIatomic_fmax>; def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, atomic_load_fmin_glue>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 42d1f58..ca2d7ff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -503,10 +503,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { // isa<MemSDNode> almost works but is slightly too permissive for some DS // intrinsics. if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) || - (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || - Opc == ISD::ATOMIC_LOAD_FADD || - Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || - Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) { + Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || + Opc == AMDGPUISD::ATOMIC_LOAD_FMAX) { N = glueCopyToM0LDSInit(N); SelectCode(N); return; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 8121b38..f4d49fc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4508,8 +4508,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) NODE_NAME_CASE(DS_ORDERED_COUNT) NODE_NAME_CASE(ATOMIC_CMP_SWAP) - NODE_NAME_CASE(ATOMIC_INC) - NODE_NAME_CASE(ATOMIC_DEC) NODE_NAME_CASE(ATOMIC_LOAD_FMIN) NODE_NAME_CASE(ATOMIC_LOAD_FMAX) NODE_NAME_CASE(BUFFER_LOAD) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index bc3b57a..e5efa13 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -505,8 +505,6 @@ enum NodeType : unsigned { TBUFFER_LOAD_FORMAT_D16, DS_ORDERED_COUNT, ATOMIC_CMP_SWAP, - ATOMIC_INC, - ATOMIC_DEC, ATOMIC_LOAD_FMIN, ATOMIC_LOAD_FMAX, BUFFER_LOAD, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index b0c5df5..5f630ea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3446,9 +3446,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_ATOMICRMW_MAX: case TargetOpcode::G_ATOMICRMW_UMIN: case TargetOpcode::G_ATOMICRMW_UMAX: + case TargetOpcode::G_ATOMICRMW_UINC_WRAP: + case TargetOpcode::G_ATOMICRMW_UDEC_WRAP: case TargetOpcode::G_ATOMICRMW_FADD: - case AMDGPU::G_AMDGPU_ATOMIC_INC: - case AMDGPU::G_AMDGPU_ATOMIC_DEC: case AMDGPU::G_AMDGPU_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_ATOMIC_FMAX: return selectG_LOAD_STORE_ATOMICRMW(I); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 22b3272..64977b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -638,6 +638,8 @@ defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>; defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>; defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>; defm atomic_load_fadd : binary_atomic_op_all_as<atomic_load_fadd, 0>; +defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>; +defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>; let MemoryVT = v2f16 in defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as<atomic_load_fadd, 0>; defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 41cb0a9..992200b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1335,7 +1335,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, - G_ATOMICRMW_UMIN}) + G_ATOMICRMW_UMIN, G_ATOMICRMW_UINC_WRAP, G_ATOMICRMW_UDEC_WRAP}) .legalFor({{S32, GlobalPtr}, {S32, LocalPtr}, {S64, GlobalPtr}, {S64, LocalPtr}, {S32, RegionPtr}, {S64, RegionPtr}}); @@ -4627,8 +4627,8 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B, bool IsInc) const { - unsigned Opc = IsInc ? AMDGPU::G_AMDGPU_ATOMIC_INC : - AMDGPU::G_AMDGPU_ATOMIC_DEC; + unsigned Opc = IsInc ? AMDGPU::G_ATOMICRMW_UINC_WRAP : + AMDGPU::G_ATOMICRMW_UDEC_WRAP; B.buildInstr(Opc) .addDef(MI.getOperand(0).getReg()) .addUse(MI.getOperand(2).getReg()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 5e16a40..d462ee5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4828,9 +4828,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_ATOMICRMW_UMAX: case AMDGPU::G_ATOMICRMW_UMIN: case AMDGPU::G_ATOMICRMW_FADD: + case AMDGPU::G_ATOMICRMW_UINC_WRAP: + case AMDGPU::G_ATOMICRMW_UDEC_WRAP: case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: - case AMDGPU::G_AMDGPU_ATOMIC_INC: - case AMDGPU::G_AMDGPU_ATOMIC_DEC: case AMDGPU::G_AMDGPU_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_ATOMIC_FMAX: { OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index ebd32c6..b55ebaa 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1465,8 +1465,8 @@ defm : BufferAtomicPat<"atomic_load_umax_global", Ty, "BUFFER_ATOMIC_UMAX" # Suf defm : BufferAtomicPat<"atomic_load_and_global", Ty, "BUFFER_ATOMIC_AND" # Suffix>; defm : BufferAtomicPat<"atomic_load_or_global", Ty, "BUFFER_ATOMIC_OR" # Suffix>; defm : BufferAtomicPat<"atomic_load_xor_global", Ty, "BUFFER_ATOMIC_XOR" # Suffix>; -defm : BufferAtomicPat<"atomic_inc_global", Ty, "BUFFER_ATOMIC_INC" # Suffix>; -defm : BufferAtomicPat<"atomic_dec_global", Ty, "BUFFER_ATOMIC_DEC" # Suffix>; +defm : BufferAtomicPat<"atomic_load_uinc_wrap_global", Ty, "BUFFER_ATOMIC_INC" # Suffix>; +defm : BufferAtomicPat<"atomic_load_udec_wrap_global", Ty, "BUFFER_ATOMIC_DEC" # Suffix>; } // end foreach Ty diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 26f3537..326266c 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1069,8 +1069,8 @@ multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">; defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">; defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">; -defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_inc">; -defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_dec">; +defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_load_uinc_wrap">; +defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_load_udec_wrap">; defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">; defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">; defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">; @@ -1097,8 +1097,8 @@ defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">; defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">; defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">; -defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_inc">; -defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_dec">; +defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_load_uinc_wrap">; +defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_load_udec_wrap">; defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">; defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">; defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 3168aaa..dff3091 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1160,8 +1160,8 @@ def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>; foreach as = [ "flat", "global" ] in { defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_inc_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_dec_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>; defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; @@ -1174,8 +1174,8 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_inc_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_dec_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>; defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; @@ -1429,8 +1429,8 @@ defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_inc_global", i32>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_dec_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>; @@ -1444,8 +1444,8 @@ defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_inc_global", i64>; -defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_dec_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>; +defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index fad3932..c7e3755 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -2182,3 +2182,18 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, return Node; } + +TargetLowering::AtomicExpansionKind +R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { + switch (RMW->getOperation()) { + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: + // FIXME: Cayman at least appears to have instructions for this, but the + // instruction defintions appear to be missing. + return AtomicExpansionKind::CmpXChg; + default: + break; + } + + return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW); +} diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h index 8a5479d..fc361c01 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -114,6 +114,9 @@ private: SelectionDAG &DAG) const; SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; + + TargetLowering::AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override; }; } // End namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e0ad11d..056daee 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -791,6 +791,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX, ISD::ATOMIC_LOAD_FADD, + ISD::ATOMIC_LOAD_UINC_WRAP, + ISD::ATOMIC_LOAD_UDEC_WRAP, ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN}); @@ -7317,6 +7319,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, M->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); } + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: case Intrinsic::amdgcn_ds_fadd: { MemSDNode *M = cast<MemSDNode>(Op); unsigned Opc; @@ -7324,24 +7328,28 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_ds_fadd: Opc = ISD::ATOMIC_LOAD_FADD; break; + case Intrinsic::amdgcn_atomic_inc: + Opc = ISD::ATOMIC_LOAD_UINC_WRAP; + break; + case Intrinsic::amdgcn_atomic_dec: + Opc = ISD::ATOMIC_LOAD_UDEC_WRAP; + break; } return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(), M->getOperand(0), M->getOperand(2), M->getOperand(3), M->getMemOperand()); } - case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec: case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_fmax: { MemSDNode *M = cast<MemSDNode>(Op); unsigned Opc; switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: - Opc = AMDGPUISD::ATOMIC_INC; + Opc = ISD::ATOMIC_LOAD_UINC_WRAP; break; case Intrinsic::amdgcn_atomic_dec: - Opc = AMDGPUISD::ATOMIC_DEC; + Opc = ISD::ATOMIC_LOAD_UDEC_WRAP; break; case Intrinsic::amdgcn_ds_fmin: Opc = AMDGPUISD::ATOMIC_LOAD_FMIN; @@ -12794,8 +12802,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence( return AMDGPU::isIntrinsicSourceOfDivergence( cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()); case AMDGPUISD::ATOMIC_CMP_SWAP: - case AMDGPUISD::ATOMIC_INC: - case AMDGPUISD::ATOMIC_DEC: case AMDGPUISD::ATOMIC_LOAD_FMIN: case AMDGPUISD::ATOMIC_LOAD_FMAX: case AMDGPUISD::BUFFER_ATOMIC_SWAP: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 0fb39c5..41a7ced 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -50,14 +50,6 @@ def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] >; -def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2, - [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] ->; - -def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2, - [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] ->; - def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> ]>; @@ -355,8 +347,6 @@ class isPackedType<ValueType SrcVT> { // PatFrags for global memory operations //===----------------------------------------------------------------------===// -defm atomic_inc : binary_atomic_op_all_as<SIatomic_inc>; -defm atomic_dec : binary_atomic_op_all_as<SIatomic_dec>; defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>; defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>; @@ -762,8 +752,8 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; -defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>; -defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>; +defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">; +defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">; defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 0c2a138..19f2f27 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3486,8 +3486,6 @@ def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction { } let Namespace = "AMDGPU" in { -def G_AMDGPU_ATOMIC_INC : G_ATOMICRMW_OP; -def G_AMDGPU_ATOMIC_DEC : G_ATOMICRMW_OP; def G_AMDGPU_ATOMIC_FMIN : G_ATOMICRMW_OP; def G_AMDGPU_ATOMIC_FMAX : G_ATOMICRMW_OP; } diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll index a15854c..c5d8414 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll @@ -7106,3 +7106,209 @@ entry: store atomic half %in, ptr %out seq_cst, align 2 ret void } + +; GCN-LABEL: {{^}}atomic_inc_i32_offset: +; CIVI: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; GFX9: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} +define amdgpu_kernel void @atomic_inc_i32_offset(ptr %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr %out, i32 4 + %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_max_offset: +; CIVI: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; GFX9: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}} +define amdgpu_kernel void @atomic_inc_i32_max_offset(ptr %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr %out, i32 1023 + %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_max_offset_p1: +; GCN: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +define amdgpu_kernel void @atomic_inc_i32_max_offset_p1(ptr %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr %out, i32 1024 + %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_ret_offset: +; CIVI: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_inc_i32_ret_offset(ptr %out, ptr %out2, i32 %in) { +entry: + %gep = getelementptr i32, ptr %out, i32 4 + %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst + store i32 %val, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_incr64_offset: +; CIVI: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} +define amdgpu_kernel void @atomic_inc_i32_incr64_offset(ptr %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr %out, i64 %index + %gep = getelementptr i32, ptr %ptr, i32 4 + %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_ret_incr64_offset: +; CIVI: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_inc_i32_ret_incr64_offset(ptr %out, ptr %out2, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr %out, i64 %index + %gep = getelementptr i32, ptr %ptr, i32 4 + %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst + store i32 %val, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32: +; GCN: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +define amdgpu_kernel void @atomic_inc_i32(ptr %out, i32 %in) { +entry: + %val = atomicrmw volatile uinc_wrap ptr %out, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_ret: +; GCN: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_inc_i32_ret(ptr %out, ptr %out2, i32 %in) { +entry: + %val = atomicrmw volatile uinc_wrap ptr %out, i32 %in seq_cst + store i32 %val, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_incr64: +; GCN: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +define amdgpu_kernel void @atomic_inc_i32_incr64(ptr %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr %out, i64 %index + %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_ret_incr64: +; GCN: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_inc_i32_ret_incr64(ptr %out, ptr %out2, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr %out, i64 %index + %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in seq_cst + store i32 %val, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_offset: +; CIVI: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; GFX9: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} +define amdgpu_kernel void @atomic_dec_i32_offset(ptr %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr %out, i32 4 + %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_max_offset: +; CIVI: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; GFX9: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}} +define amdgpu_kernel void @atomic_dec_i32_max_offset(ptr %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr %out, i32 1023 + %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_max_offset_p1: +; GCN: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +define amdgpu_kernel void @atomic_dec_i32_max_offset_p1(ptr %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr %out, i32 1024 + %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_ret_offset: +; CIVI: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_dec_i32_ret_offset(ptr %out, ptr %out2, i32 %in) { +entry: + %gep = getelementptr i32, ptr %out, i32 4 + %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst + store i32 %val, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_decr64_offset: +; CIVI: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} +define amdgpu_kernel void @atomic_dec_i32_decr64_offset(ptr %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr %out, i64 %index + %gep = getelementptr i32, ptr %ptr, i32 4 + %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_ret_decr64_offset: +; CIVI: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_dec_i32_ret_decr64_offset(ptr %out, ptr %out2, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr %out, i64 %index + %gep = getelementptr i32, ptr %ptr, i32 4 + %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst + store i32 %val, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32: +; GCN: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +define amdgpu_kernel void @atomic_dec_i32(ptr %out, i32 %in) { +entry: + %val = atomicrmw volatile udec_wrap ptr %out, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_ret: +; GCN: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_dec_i32_ret(ptr %out, ptr %out2, i32 %in) { +entry: + %val = atomicrmw volatile udec_wrap ptr %out, i32 %in seq_cst + store i32 %val, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_decr64: +; GCN: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +define amdgpu_kernel void @atomic_dec_i32_decr64(ptr %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr %out, i64 %index + %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_ret_decr64: +; GCN: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_dec_i32_ret_decr64(ptr %out, ptr %out2, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr %out, i64 %index + %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in seq_cst + store i32 %val, ptr %out2 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll index 027f3c3..e5dc413 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -4585,3 +4585,163 @@ entry: store atomic double %in, ptr %ptr seq_cst, align 8 ret void } + +; GCN-LABEL: {{^}}atomic_inc_i64_offset: +; GCN: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} +define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) { +entry: + %gep = getelementptr i64, ptr %out, i64 4 + %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i64_ret_offset: +; GCN: flat_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %in) { +entry: + %gep = getelementptr i64, ptr %out, i64 4 + %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst + store i64 %tmp0, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i64_incr64_offset: +; GCN: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} +define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr %out, i64 %index + %gep = getelementptr i64, ptr %ptr, i64 4 + %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i64_ret_incr64_offset: +; GCN: flat_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr %out, i64 %index + %gep = getelementptr i64, ptr %ptr, i64 4 + %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst + store i64 %tmp0, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i64: +; GCN: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @atomic_inc_i64(ptr %out, i64 %in) { +entry: + %tmp0 = atomicrmw volatile uinc_wrap ptr %out, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i64_ret: +; GCN: flat_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_inc_i64_ret(ptr %out, ptr %out2, i64 %in) { +entry: + %tmp0 = atomicrmw volatile uinc_wrap ptr %out, i64 %in seq_cst + store i64 %tmp0, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i64_incr64: +; GCN: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr %out, i64 %index + %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i64_ret_incr64: +; GCN: flat_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr %out, i64 %index + %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in seq_cst + store i64 %tmp0, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_offset: +; GCN: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} +define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) { +entry: + %gep = getelementptr i64, ptr %out, i64 4 + %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_ret_offset: +; GCN: flat_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %in) { +entry: + %gep = getelementptr i64, ptr %out, i64 4 + %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst + store i64 %tmp0, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_decr64_offset: +; GCN: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} +define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr %out, i64 %index + %gep = getelementptr i64, ptr %ptr, i64 4 + %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_ret_decr64_offset: +; GCN: flat_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr %out, i64 %index + %gep = getelementptr i64, ptr %ptr, i64 4 + %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst + store i64 %tmp0, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64: +; GCN: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @atomic_dec_i64(ptr %out, i64 %in) { +entry: + %tmp0 = atomicrmw volatile udec_wrap ptr %out, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_ret: +; GCN: flat_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_dec_i64_ret(ptr %out, ptr %out2, i64 %in) { +entry: + %tmp0 = atomicrmw volatile udec_wrap ptr %out, i64 %in seq_cst + store i64 %tmp0, ptr %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_decr64: +; GCN: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr %out, i64 %index + %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_ret_decr64: +; GCN: flat_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr %out, i64 %index + %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in seq_cst + store i64 %tmp0, ptr %out2 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll index 7c6058b..1d5cd69 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll @@ -6598,3 +6598,185 @@ entry: store atomic half %in, ptr addrspace(1) %out seq_cst, align 2 ret void } + +; GCN-LABEL: {{^}}atomic_inc_i32_offset: +; SIVI: buffer_atomic_inc v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} +define amdgpu_kernel void @atomic_inc_i32_offset(ptr addrspace(1) %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_max_neg_offset: +; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-4096{{$}} +define amdgpu_kernel void @atomic_inc_i32_max_neg_offset(ptr addrspace(1) %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 -1024 + %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_soffset: +; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0 +; SIVI: buffer_atomic_inc v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} + +; GFX9: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000{{$}} +; GFX9: global_atomic_inc [[OFFSET]], v{{[0-9]+}}, s{{\[[0-9]:[0-9]+\]}} offset:3232{{$}} +define amdgpu_kernel void @atomic_inc_i32_soffset(ptr addrspace(1) %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 9000 + %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_huge_offset: +; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac +; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd +; SI: buffer_atomic_inc v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} + +; VI: flat_atomic_inc + +; GFX9: s_add_u32 s[[LOW_K:[0-9]+]], s{{[0-9]+}}, 0xdeac +; GFX9: s_addc_u32 s[[HIGH_K:[0-9]+]], s{{[0-9]+}}, 0xabcd +; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, s[[[LOW_K]]:[[HIGH_K]]]{{$}} +define amdgpu_kernel void @atomic_inc_i32_huge_offset(ptr addrspace(1) %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595 + + %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_ret_offset: +; SIVI: buffer_atomic_inc [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} +define amdgpu_kernel void @atomic_inc_i32_ret_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst + store i32 %val, ptr addrspace(1) %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_addr64_offset: +; SI: buffer_atomic_inc v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} +define amdgpu_kernel void @atomic_inc_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i32_ret_addr64_offset: +; SI: buffer_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} +; VI: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_inc [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} +; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s +define amdgpu_kernel void @atomic_inc_i32_ret_addr64_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst + store i32 %val, ptr addrspace(1) %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_offset: +; SIVI: buffer_atomic_dec v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} +define amdgpu_kernel void @atomic_dec_i32_offset(ptr addrspace(1) %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_max_neg_offset: +; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-4096{{$}} +define amdgpu_kernel void @atomic_dec_i32_max_neg_offset(ptr addrspace(1) %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 -1024 + %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_soffset: +; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0 +; SIVI: buffer_atomic_dec v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} + +; GFX9: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000{{$}} +; GFX9: global_atomic_dec [[OFFSET]], v{{[0-9]+}}, s{{\[[0-9]:[0-9]+\]}} offset:3232{{$}} +define amdgpu_kernel void @atomic_dec_i32_soffset(ptr addrspace(1) %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 9000 + %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_huge_offset: +; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac +; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd +; SI: buffer_atomic_dec v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} + +; VI: flat_atomic_dec + +; GFX9: s_add_u32 s[[LOW_K:[0-9]+]], s{{[0-9]+}}, 0xdeac +; GFX9: s_addc_u32 s[[HIGH_K:[0-9]+]], s{{[0-9]+}}, 0xabcd +; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, s[[[LOW_K]]:[[HIGH_K]]]{{$}} +define amdgpu_kernel void @atomic_dec_i32_huge_offset(ptr addrspace(1) %out, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595 + + %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_ret_offset: +; SIVI: buffer_atomic_dec [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} +define amdgpu_kernel void @atomic_dec_i32_ret_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i32 %in) { +entry: + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst + store i32 %val, ptr addrspace(1) %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_addr64_offset: +; SI: buffer_atomic_dec v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} +define amdgpu_kernel void @atomic_dec_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i32_ret_addr64_offset: +; SI: buffer_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} +; VI: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_dec [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} +; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s +define amdgpu_kernel void @atomic_dec_i32_ret_addr64_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst + store i32 %val, ptr addrspace(1) %out2 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll index d295efc..3182a1e 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -5969,3 +5969,75 @@ entry: store atomic double %in, ptr addrspace(1) %gep seq_cst, align 8 ret void } + +; GCN-LABEL: {{^}}atomic_inc_i64_offset: +; CIVI: buffer_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} + +; GFX9: global_atomic_inc_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}} +define amdgpu_kernel void @atomic_inc_i64_offset(ptr addrspace(1) %out, i64 %in) { +entry: + %gep = getelementptr i64, ptr addrspace(1) %out, i64 4 + %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i64_ret_offset: +; CIVI: buffer_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}} +define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i64 %in) { +entry: + %gep = getelementptr i64, ptr addrspace(1) %out, i64 4 + %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in seq_cst + store i64 %tmp0, ptr addrspace(1) %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_inc_i64_incr64_offset: +; CI: buffer_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} +; VI: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} +; GFX9: global_atomic_inc_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} +define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr addrspace(1) %out, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4 + %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_offset: +; CIVI: buffer_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} + +; GFX9: global_atomic_dec_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}} +define amdgpu_kernel void @atomic_dec_i64_offset(ptr addrspace(1) %out, i64 %in) { +entry: + %gep = getelementptr i64, ptr addrspace(1) %out, i64 4 + %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_ret_offset: +; CIVI: buffer_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}} +define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i64 %in) { +entry: + %gep = getelementptr i64, ptr addrspace(1) %out, i64 4 + %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in seq_cst + store i64 %tmp0, ptr addrspace(1) %out2 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_i64_decr64_offset: +; CI: buffer_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} +; VI: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} +; GFX9: global_atomic_dec_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} +define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr addrspace(1) %out, i64 %in, i64 %index) { +entry: + %ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4 + %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in seq_cst + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics.ll b/llvm/test/CodeGen/AMDGPU/local-atomics.ll index 140c615..22b5220 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomics.ll @@ -728,3 +728,103 @@ define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(ptr addrspace(3) %pt %result = atomicrmw umax ptr addrspace(3) %gep, i32 4 seq_cst ret void } + +; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; EG: LDS_CMPST +; GCN: ds_inc_rtn_u32 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 4 seq_cst + store i32 %result, ptr addrspace(1) %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; EG: LDS_CMPST +; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 4 seq_cst + store i32 %result, ptr addrspace(1) %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_inc_u32 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) nounwind { + %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 4 seq_cst + ret void +} + +; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 4 seq_cst + ret void +} + +; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; EG: LDS_CMPST +; GCN: ds_dec_rtn_u32 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 4 seq_cst + store i32 %result, ptr addrspace(1) %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; EG: LDS_CMPST +; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 4 seq_cst + store i32 %result, ptr addrspace(1) %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_dec_u32 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) nounwind { + %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 4 seq_cst + ret void +} + +; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_dec_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 4 seq_cst + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics64.ll b/llvm/test/CodeGen/AMDGPU/local-atomics64.ll index aa93adf..8c43c77 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomics64.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomics64.ll @@ -639,3 +639,121 @@ define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(ptr addrspace(3) %pt %result = atomicrmw umax ptr addrspace(3) %gep, i64 4 seq_cst ret void } + +; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_inc_rtn_u64 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 4 seq_cst + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset: +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32 +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4 + %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 9 seq_cst + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64: +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} +; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_inc1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 1 seq_cst + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_inc_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_inc1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 1 seq_cst + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_dec_rtn_u64 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 4 seq_cst + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset: +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32 +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4 + %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 9 seq_cst + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64: +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} +; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_dec1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 1 seq_cst + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} + +; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_dec_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_dec1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 1 seq_cst + store i64 %result, ptr addrspace(1) %out, align 8 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll index 3c9a05a..b89d9ea 100644 --- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll @@ -280,6 +280,34 @@ define amdgpu_kernel void @atomic_umax_shl_base_lds_0(ptr addrspace(1) %out, ptr ret void } +; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0: +; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} +; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 +; GCN: s_endpgm +define amdgpu_kernel void @atomic_inc_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 + %idx.0 = add nsw i32 %tid.x, 2 + %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0 + %val = atomicrmw uinc_wrap ptr addrspace(3) %arrayidx0, i32 31 seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 + store i32 %idx.0, ptr addrspace(1) %add_use, align 4 + ret void +} + +; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0: +; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} +; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 +; GCN: s_endpgm +define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 + %idx.0 = add nsw i32 %tid.x, 2 + %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0 + %val = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 31 seq_cst + store i32 %val, ptr addrspace(1) %out, align 4 + store i32 %idx.0, ptr addrspace(1) %add_use, align 4 + ret void +} + ; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_lds: ; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0 ; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0 |