diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll | 240 |
1 files changed, 120 insertions, 120 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll index 873fcee..6067194 100644 --- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll @@ -71,12 +71,12 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_add_f64 v[2:3], v0, s[0:3], 4 offen glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: @@ -86,12 +86,12 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[2:3], v0, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) @@ -154,12 +154,12 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_add_f64 v[2:3], v0, s[0:3], 4 offen glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc: @@ -169,12 +169,12 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[2:3], v0, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) @@ -237,12 +237,12 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> % ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_add_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: @@ -252,12 +252,12 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> % ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) @@ -320,12 +320,12 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_add_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc: @@ -335,12 +335,12 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) @@ -403,12 +403,12 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_min_f64 v[2:3], v0, s[0:3], 4 offen glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: @@ -418,12 +418,12 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[2:3], v0, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) @@ -486,12 +486,12 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_min_f64 v[2:3], v0, s[0:3], 4 offen glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: @@ -501,12 +501,12 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[2:3], v0, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) @@ -569,12 +569,12 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> % ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_min_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: @@ -584,12 +584,12 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> % ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) @@ -652,12 +652,12 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_min_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc: @@ -667,12 +667,12 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) @@ -735,12 +735,12 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_max_f64 v[2:3], v0, s[0:3], 4 offen glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: @@ -750,12 +750,12 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[2:3], v0, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) @@ -818,12 +818,12 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_max_f64 v[2:3], v0, s[0:3], 4 offen glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: @@ -833,12 +833,12 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[2:3], v0, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) @@ -901,12 +901,12 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> % ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_max_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: @@ -916,12 +916,12 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> % ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) @@ -984,12 +984,12 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 +; GFX90A-NEXT: buffer_atomic_max_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 glc slc +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX90A-NEXT: s_endpgm ; ; GFX942-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc: @@ -999,12 +999,12 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX942-NEXT: v_mov_b32_e32 v2, s10 -; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v0, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[2:3], v0, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) |