# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s # GCN-LABEL: name: diffoporder_add # GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0 # GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 name: diffoporder_add body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 $sgpr4 = COPY %4 $vgpr0 = V_MOV_B32_e32 0, implicit $exec %6:vreg_64 = COPY $vgpr0_vgpr1 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 4096 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6144 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec ... --- # GCN-LABEL: name: LowestInMiddle # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200 # GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0 # # GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400 # GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] # GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]] # GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 name: LowestInMiddle body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 $sgpr4 = COPY %4 $vgpr0 = V_MOV_B32_e32 0, implicit $exec %6:vreg_64 = COPY $vgpr0_vgpr1 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 8000 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6400 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 11200 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec ... --- # GCN-LABEL: name: NegativeDistance # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240 # GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 name: NegativeDistance body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 $sgpr4 = COPY %4 $vgpr0 = V_MOV_B32_e32 0, implicit $exec %6:vreg_64 = COPY $vgpr0_vgpr1 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 6144 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 8192 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 10240 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec ... --- # Tests for a successful compilation. name: assert_hit body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 $sgpr4 = COPY %4 $vgpr0 = V_MOV_B32_e32 0, implicit $exec %6:vreg_64 = COPY $vgpr0_vgpr1 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 6144 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec ... --- # GCN-LABEL: name: diffoporder_add_store # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, # GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0 # GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0 name: diffoporder_add_store body: | bb.0.entry: %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec %8:sgpr_32 = S_MOV_B32 3000 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec ... --- # GCN-LABEL: name: diffoporder_add_flat_load # GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0, # GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, # GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, # GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, name: diffoporder_add_flat_load body: | bb.0.entry: %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 %14:vgpr_32 = FLAT_LOAD_DWORD %6, 0, 0, implicit $exec, implicit $flat_scr %8:sgpr_32 = S_MOV_B32 3000 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 %15:vgpr_32 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr ... --- # GCN-LABEL: name: diffoporder_add_flat_store # GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, # GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, # GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0, # GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, name: diffoporder_add_flat_store body: | bb.0.entry: %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 FLAT_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr %8:sgpr_32 = S_MOV_B32 3000 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr ... --- # GCN-LABEL: name: diffoporder_add_global_atomic_cmpswap # GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0, # GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0, # GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0, # GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0, name: diffoporder_add_global_atomic_cmpswap body: | bb.0.entry: %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 GLOBAL_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec %8:sgpr_32 = S_MOV_B32 3000 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 GLOBAL_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec ... --- # GCN-LABEL: name: diffoporder_add_flat_atomic_cmpswap # GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0, # GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0, # GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0, # GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0, name: diffoporder_add_flat_atomic_cmpswap body: | bb.0.entry: %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 FLAT_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr %8:sgpr_32 = S_MOV_B32 3000 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 FLAT_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr ... --- # GCN-LABEL: name: diffoporder_add_global_atomic_add # GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0, # GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0, # GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0, # GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0, name: diffoporder_add_global_atomic_add body: | bb.0.entry: %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 GLOBAL_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec %8:sgpr_32 = S_MOV_B32 3000 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 GLOBAL_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec ... --- # GCN-LABEL: name: diffoporder_add_flat_atomic_add # GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0, # GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0, # GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0, # GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0, name: diffoporder_add_flat_atomic_add body: | bb.0.entry: %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 FLAT_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr %8:sgpr_32 = S_MOV_B32 3000 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 FLAT_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr ... --- # GCN-LABEL: name: diffoporder_add_global_atomic_add_rtn # GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0, # GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0, # GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0, # GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0, name: diffoporder_add_global_atomic_add_rtn body: | bb.0.entry: %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 %14:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec %8:sgpr_32 = S_MOV_B32 3000 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 %15:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec ... --- # GCN-LABEL: name: diffoporder_add_flat_atomic_add_rtn # GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0, # GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0, # GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0, # GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0, name: diffoporder_add_flat_atomic_add_rtn body: | bb.0.entry: %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 %14:vgpr_32 = FLAT_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr %8:sgpr_32 = S_MOV_B32 3000 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 %15:vgpr_32 = FLAT_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr ... --- # GCN-LABEL: name: negative_offset_nullptr # GCN: V_ADD_CO_U32_e64 -1, 0, 0 # GCN: V_ADDC_U32_e64 -1, %{{[0-9]+}}, %{{[0-9]+}}, 0 name: negative_offset_nullptr body: | bb.0: %0:sreg_64 = S_MOV_B64 $src_private_base %1:sreg_32 = S_MOV_B32 0 %2:sreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0.sub1, %subreg.sub1 %3:vgpr_32, %4:sreg_64_xexec = V_ADD_CO_U32_e64 -1, 0, 0, implicit $exec %5:vgpr_32 = COPY %2.sub1 %6:vgpr_32, %7:sreg_64 = V_ADDC_U32_e64 -1, %5, %4, 0, implicit $exec %8:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %6, %subreg.sub1 %9:vgpr_32 = FLAT_LOAD_UBYTE %8, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0, implicit %9 ...