; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12 %s define amdgpu_ps void @sextload_P0_i8(ptr addrspace(0) %ptra, ptr addrspace(0) %out) { ; GFX12-LABEL: sextload_P0_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: flat_load_i8 v0, v[0:1] ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: flat_store_b32 v[2:3], v0 ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(0) %ptra %a32 = sext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(0) %out ret void } define amdgpu_ps void @sextload_P0_i16(ptr addrspace(0) %ptra, ptr addrspace(0) %out) { ; GFX12-LABEL: sextload_P0_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: flat_load_i16 v0, v[0:1] ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: flat_store_b32 v[2:3], v0 ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(0) %ptra %a32 = sext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(0) %out ret void } define amdgpu_ps void @zextload_P0_i8(ptr addrspace(0) %ptra, ptr addrspace(0) %out) { ; GFX12-LABEL: zextload_P0_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: flat_store_b32 v[2:3], v0 ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(0) %ptra %a32 = zext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(0) %out ret void } define amdgpu_ps void @zextload_P0_i16(ptr addrspace(0) %ptra, ptr addrspace(0) %out) { ; GFX12-LABEL: zextload_P0_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: flat_load_u16 v0, v[0:1] ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: flat_store_b32 v[2:3], v0 ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(0) %ptra %a32 = zext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(0) %out ret void } define amdgpu_ps void @sextload_P1_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %out) { ; GFX12-LABEL: sextload_P1_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_i8 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: global_store_b32 v[2:3], v0, off ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(1) %ptra %a32 = sext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_ps void @sextload_P1_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %out) { ; GFX12-LABEL: sextload_P1_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_i16 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: global_store_b32 v[2:3], v0, off ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(1) %ptra %a32 = sext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_ps void @zextload_P1_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %out) { ; GFX12-LABEL: zextload_P1_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_u8 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: global_store_b32 v[2:3], v0, off ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(1) %ptra %a32 = zext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_ps void @zextload_P1_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %out) { ; GFX12-LABEL: zextload_P1_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_u16 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: global_store_b32 v[2:3], v0, off ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(1) %ptra %a32 = zext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_ps void @sextload_P3_i8(ptr addrspace(3) %ptra, ptr addrspace(3) %out) { ; GFX12-LABEL: sextload_P3_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: ds_load_i8 v0, v0 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: ds_store_b32 v1, v0 ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(3) %ptra %a32 = sext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(3) %out ret void } define amdgpu_ps void @sextload_P3_i16(ptr addrspace(3) %ptra, ptr addrspace(3) %out) { ; GFX12-LABEL: sextload_P3_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: ds_load_i16 v0, v0 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: ds_store_b32 v1, v0 ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(3) %ptra %a32 = sext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(3) %out ret void } define amdgpu_ps void @zextload_P3_i8(ptr addrspace(3) %ptra, ptr addrspace(3) %out) { ; GFX12-LABEL: zextload_P3_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: ds_load_u8 v0, v0 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: ds_store_b32 v1, v0 ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(3) %ptra %a32 = zext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(3) %out ret void } define amdgpu_ps void @zextload_P3_i16(ptr addrspace(3) %ptra, ptr addrspace(3) %out) { ; GFX12-LABEL: zextload_P3_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: ds_load_u16 v0, v0 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: ds_store_b32 v1, v0 ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(3) %ptra %a32 = zext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(3) %out ret void } define amdgpu_ps void @sextload_P4_i8(ptr addrspace(4) %ptra, ptr addrspace(1) %out) { ; GFX12-LABEL: sextload_P4_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_i8 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: global_store_b32 v[2:3], v0, off ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(4) %ptra %a32 = sext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_ps void @sextload_P4_i16(ptr addrspace(4) %ptra, ptr addrspace(1) %out) { ; GFX12-LABEL: sextload_P4_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_i16 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: global_store_b32 v[2:3], v0, off ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(4) %ptra %a32 = sext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_ps void @zextload_P4_i8(ptr addrspace(4) %ptra, ptr addrspace(1) %out) { ; GFX12-LABEL: zextload_P4_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_u8 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: global_store_b32 v[2:3], v0, off ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(4) %ptra %a32 = zext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_ps void @zextload_P4_i16(ptr addrspace(4) %ptra, ptr addrspace(1) %out) { ; GFX12-LABEL: zextload_P4_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_u16 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: global_store_b32 v[2:3], v0, off ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(4) %ptra %a32 = zext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_ps void @sextload_P5_i8(ptr addrspace(5) %ptra, ptr addrspace(5) %out) { ; GFX12-LABEL: sextload_P5_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: scratch_load_i8 v0, v0, off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: scratch_store_b32 v1, v0, off ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(5) %ptra %a32 = sext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(5) %out ret void } define amdgpu_ps void @sextload_P5_i16(ptr addrspace(5) %ptra, ptr addrspace(5) %out) { ; GFX12-LABEL: sextload_P5_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: scratch_load_i16 v0, v0, off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: scratch_store_b32 v1, v0, off ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(5) %ptra %a32 = sext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(5) %out ret void } define amdgpu_ps void @zextload_P5_i8(ptr addrspace(5) %ptra, ptr addrspace(5) %out) { ; GFX12-LABEL: zextload_P5_i8: ; GFX12: ; %bb.0: ; GFX12-NEXT: scratch_load_u8 v0, v0, off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: scratch_store_b32 v1, v0, off ; GFX12-NEXT: s_endpgm %a = load i8, ptr addrspace(5) %ptra %a32 = zext i8 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(5) %out ret void } define amdgpu_ps void @zextload_P5_i16(ptr addrspace(5) %ptra, ptr addrspace(5) %out) { ; GFX12-LABEL: zextload_P5_i16: ; GFX12: ; %bb.0: ; GFX12-NEXT: scratch_load_u16 v0, v0, off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 ; GFX12-NEXT: scratch_store_b32 v1, v0, off ; GFX12-NEXT: s_endpgm %a = load i16, ptr addrspace(5) %ptra %a32 = zext i16 %a to i32 %res = add i32 %a32, %a32 store i32 %res, ptr addrspace(5) %out ret void }