diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
7 files changed, 655 insertions, 58 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll index 221e2fd..09e1fca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll @@ -1200,7 +1200,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX6-NEXT: s_mov_b32 s5, s7 ; GFX6-NEXT: s_mov_b32 s6, s8 ; GFX6-NEXT: s_mov_b32 s7, s9 -; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm ; GFX6-NEXT: s_endpgm ; ; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1213,7 +1213,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX8-NEXT: s_mov_b32 s5, s7 ; GFX8-NEXT: s_mov_b32 s6, s8 ; GFX8-NEXT: s_mov_b32 s7, s9 -; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm ; GFX8-NEXT: s_endpgm ; ; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1226,7 +1226,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX900-NEXT: s_mov_b32 s5, s7 ; GFX900-NEXT: s_mov_b32 s6, s8 ; GFX900-NEXT: s_mov_b32 s7, s9 -; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm ; GFX900-NEXT: s_endpgm ; ; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1239,7 +1239,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX90A-NEXT: s_mov_b32 s5, s7 ; GFX90A-NEXT: s_mov_b32 s6, s8 ; GFX90A-NEXT: s_mov_b32 s7, s9 -; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm ; GFX90A-NEXT: s_endpgm ; ; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1252,7 +1252,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 -; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1265,7 +1265,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX12-NEXT: s_mov_b32 s5, s7 ; GFX12-NEXT: s_mov_b32 s6, s8 ; GFX12-NEXT: s_mov_b32 s7, s9 -; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN +; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -3194,7 +3194,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX6-NEXT: s_mov_b32 s5, s7 ; GFX6-NEXT: s_mov_b32 s6, s8 ; GFX6-NEXT: s_mov_b32 s7, s9 -; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm ; GFX6-NEXT: s_endpgm ; ; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3207,7 +3207,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX8-NEXT: s_mov_b32 s5, s7 ; GFX8-NEXT: s_mov_b32 s6, s8 ; GFX8-NEXT: s_mov_b32 s7, s9 -; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm ; GFX8-NEXT: s_endpgm ; ; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3220,7 +3220,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX900-NEXT: s_mov_b32 s5, s7 ; GFX900-NEXT: s_mov_b32 s6, s8 ; GFX900-NEXT: s_mov_b32 s7, s9 -; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm ; GFX900-NEXT: s_endpgm ; ; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3233,7 +3233,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX90A-NEXT: s_mov_b32 s5, s7 ; GFX90A-NEXT: s_mov_b32 s6, s8 ; GFX90A-NEXT: s_mov_b32 s7, s9 -; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm ; GFX90A-NEXT: s_endpgm ; ; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3246,7 +3246,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 -; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3259,7 +3259,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX12-NEXT: s_mov_b32 s5, s7 ; GFX12-NEXT: s_mov_b32 s6, s8 ; GFX12-NEXT: s_mov_b32 s7, s9 -; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN +; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir index 292fa4b..4f160b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -25,6 +25,7 @@ body: | ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX8-LABEL: name: atomic_cmpswap_i32_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} @@ -35,6 +36,7 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0 ; GFX8-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX10-LABEL: name: atomic_cmpswap_i32_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -45,6 +47,7 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX11-LABEL: name: atomic_cmpswap_i32_1d ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -55,6 +58,7 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_]].sub0 ; GFX11-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX12-LABEL: name: atomic_cmpswap_i32_1d ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} @@ -89,39 +93,43 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX6-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX6-NEXT: S_ENDPGM 0 + ; ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX8-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX10-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX11-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX11-NEXT: S_ENDPGM 0 + ; ; GFX12-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX12-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -150,6 +158,7 @@ body: | ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX8-LABEL: name: atomic_cmpswap_i64_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} @@ -160,6 +169,7 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi]].sub0_sub1 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX10-LABEL: name: atomic_cmpswap_i64_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} @@ -170,6 +180,7 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_]].sub0_sub1 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX11-LABEL: name: atomic_cmpswap_i64_1d ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} @@ -180,6 +191,7 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_]].sub0_sub1 ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX12-LABEL: name: atomic_cmpswap_i64_1d ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} @@ -214,39 +226,43 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX6-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX6-NEXT: S_ENDPGM 0 + ; ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX8-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX10-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX11-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX11-NEXT: S_ENDPGM 0 + ; ; GFX12-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX12-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll index 6c4f504..33ce278 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll @@ -23,7 +23,9 @@ define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -75,7 +77,9 @@ define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(ptr addrs ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 0, [[BALLOT]] ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -126,6 +130,8 @@ define protected amdgpu_kernel void @trivial_waterfall_ne_zero(ptr addrspace(1) ; PASS-CHECK-NEXT: br label %[[WHILE:.*]] ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]]) +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 0, [[BALLOT]] ; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -175,6 +181,8 @@ define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(ptr addrspac ; PASS-CHECK-NEXT: br label %[[WHILE:.*]] ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]]) +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -225,7 +233,9 @@ define protected amdgpu_kernel void @trivial_uniform_waterfall(ptr addrspace(1) ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 0, 0 @@ -292,7 +302,9 @@ define protected amdgpu_kernel void @uniform_waterfall(ptr addrspace(1) %out, i3 ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 [[MYMASK]], [[MYMASK]] @@ -359,7 +371,9 @@ define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(ptr addrspace ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i32 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -410,6 +424,8 @@ define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(ptr addrspace ; PASS-CHECK-NEXT: br label %[[WHILE:.*]] ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[DONE]]) +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i32 0, [[BALLOT]] ; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll index aa11574..a3e42e5 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll @@ -595,6 +595,8 @@ define amdgpu_kernel void @ballot_i32(i32 %v, ptr addrspace(1) %out) { ; PASS-CHECK-LABEL: define amdgpu_kernel void @ballot_i32( ; PASS-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { ; PASS-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1 +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[C]]) +; PASS-CHECK-NEXT: [[BALLOT_NE_ZERO:%.*]] = icmp ne i32 [[BALLOT]], 0 ; PASS-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1 ; PASS-CHECK-NEXT: ret void ; @@ -623,6 +625,8 @@ define amdgpu_kernel void @ballot_i64(i32 %v, ptr addrspace(1) %out) { ; PASS-CHECK-LABEL: define amdgpu_kernel void @ballot_i64( ; PASS-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { ; PASS-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1 +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[C]]) +; PASS-CHECK-NEXT: [[BALLOT_NE_ZERO:%.*]] = icmp ne i64 [[BALLOT]], 0 ; PASS-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1 ; PASS-CHECK-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll index 49607e3..83f0229 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll @@ -92,8 +92,7 @@ define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a0 ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v1, a0 -; GFX90A-NEXT: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm ; GFX90A-NEXT: s_endpgm %data = call i32 asm "; def $0", "=a"() %unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -106,8 +105,7 @@ define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a0 ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 -; GFX90A-NEXT: image_atomic_add v2, v[0:1], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm ; GFX90A-NEXT: s_endpgm %data = call i32 asm "; def $0", "=a"() %unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -123,9 +121,7 @@ define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 % ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a1 ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 -; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 -; GFX90A-NEXT: image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: image_atomic_cmpswap a[0:1], v0, s[0:7] dmask:0x3 unorm ; GFX90A-NEXT: s_endpgm %cmp = call i32 asm "; def $0", "=a"() %swap = call i32 asm "; def $0", "=a"() @@ -139,9 +135,7 @@ define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[0:1] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 -; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 -; GFX90A-NEXT: image_atomic_swap v[2:3], v0, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm ; GFX90A-NEXT: s_endpgm %data = call i64 asm "; def $0", "=a"() %unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -154,14 +148,10 @@ define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i3 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[0:1] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 -; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def a[0:1] +; GFX90A-NEXT: ; def a[2:3] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v5, a1 -; GFX90A-NEXT: v_accvgpr_read_b32 v4, a0 -; GFX90A-NEXT: image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: image_atomic_cmpswap a[0:3], v0, s[0:7] dmask:0xf unorm ; GFX90A-NEXT: s_endpgm %cmp = call i64 asm "; def $0", "=a"() %swap = call i64 asm "; def $0", "=a"() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll new file mode 100644 index 0000000..6c58a1a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll @@ -0,0 +1,581 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS-GISE %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISE %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s + +define amdgpu_ps void @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_swap_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_swap_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_swap_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_swap_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_swap_1d_i64(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_swap_1d_i64: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_swap_1d_i64: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_swap_1d_i64: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_swap_1d_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_swap_1d_float(<8 x i32> inreg %rsrc, float %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_swap_1d_float: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_swap_1d_float: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_swap_1d_float: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_swap_1d_float: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_add_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_sub_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_sub_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_sub_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_sub_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_smin_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_smin_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_smin_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_smin_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_umin_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_umin_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_umin_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_umin_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_smax_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_smax_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_smax_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_smax_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_umax_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_umax_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_umax_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_umax_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_and_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_and_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_and_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_and_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_or_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_or_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_or_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_or_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_xor_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_xor_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_xor_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_xor_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_inc_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_inc_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_inc_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_inc_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_dec_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_dec_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_dec_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_dec_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_cmpswap_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_cmpswap_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_cmpswap_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_cmpswap_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_cmpswap_1d_64(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_cmpswap_1d_64: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_cmpswap_1d_64: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_cmpswap_1d_64: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_cmpswap_1d_64: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) { +; GFX10PLUS-GISE-LABEL: atomic_add_2d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_2d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_2d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_2d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) { +; GFX10PLUS-GISE-LABEL: atomic_add_3d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_3d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_3d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_3d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) { +; GFX10PLUS-GISE-LABEL: atomic_add_cube: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_cube: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_cube: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_cube: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) { +; GFX10PLUS-GISE-LABEL: atomic_add_1darray: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_1darray: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_1darray: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_1darray: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) { +; GFX10PLUS-GISE-LABEL: atomic_add_2darray: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_2darray: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_2darray: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_2darray: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) { +; GFX10PLUS-GISE-LABEL: atomic_add_2dmsaa: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_2dmsaa: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_2dmsaa: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_2dmsaa: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +; GFX10PLUS-GISE-LABEL: atomic_add_2darraymsaa: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_2darraymsaa: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_2darraymsaa: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_2darraymsaa: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_add_1d_slc: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm slc +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_1d_slc: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm slc +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_1d_slc: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_1d_slc: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll index 3d1d6c8..0ba62e4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll @@ -41,15 +41,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_f16_1d_v2_noret(<8 x i32> inreg %rsrc, <2 x half> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_f16_1d_v2_noret: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v2_noret: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: @@ -79,15 +77,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_f16_1d_v4_noret(<8 x i32> inreg %rsrc, <4 x half> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_f16_1d_v4_noret: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v4_noret: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: @@ -126,15 +122,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_bf16_1d_v2_noret(<8 x i32> inreg %rsrc, <2 x bfloat> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v2_noret: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v2_noret: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: @@ -173,15 +167,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_noret(<8 x i32> inreg %rsrc, <4 x bfloat> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_noret: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_noret: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: @@ -192,15 +184,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_nt(<8 x i32> inreg %rsrc, <4 x bfloat> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_nt: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_nt: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: |
