; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s ; FIXME-TRUE16. enable gisel ; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s ; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s define amdgpu_ps void @sample_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; GFX10PLUS-LABEL: sample_1d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_1d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { ; GFX10PLUS-LABEL: sample_2d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_2d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.2d.nortn.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_3d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) { ; GFX10PLUS-LABEL: sample_3d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_3d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.3d.nortn.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_cube_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { ; GFX10PLUS-LABEL: sample_cube_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_cube_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.cube.nortn.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_1darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) { ; GFX10PLUS-LABEL: sample_1darray_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_1darray_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_2darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { ; GFX10PLUS-LABEL: sample_2darray_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_2darray_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_b_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { ; GFX10PLUS-LABEL: sample_b_1d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample_b off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_b_1d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample_b off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_b_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { ; GFX10PLUS-LABEL: sample_b_2d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample_b off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_b_2d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample_b off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_c_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { ; GFX10PLUS-LABEL: sample_c_1d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample_c off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_c_1d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample_c off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_c_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { ; GFX10PLUS-LABEL: sample_c_2d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: image_sample_c off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_c_2d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: image_sample_c off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_d_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { ; GFX10PLUS-LABEL: sample_d_1d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: image_sample_d off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_d_1d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: image_sample_d off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.d.1d.nortn.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_d_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { ; GFX10PLUS-LABEL: sample_d_2d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: image_sample_d off, v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_d_2d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: image_sample_d off, [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.d.2d.nortn.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_l_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { ; GFX10PLUS-LABEL: sample_l_1d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: image_sample_l off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_l_1d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: image_sample_l off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps void @sample_l_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { ; GFX10PLUS-LABEL: sample_l_2d_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: image_sample_l off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_l_2d_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: image_sample_l off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } define amdgpu_ps <4 x float> @sample_nortn_mix_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; GFX10PLUS-LABEL: sample_nortn_mix_1: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) ; GFX10PLUS-NEXT: ; return to shader part epilog ; ; GFX12-LABEL: sample_nortn_mix_1: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_mov_b32 s12, exec_lo ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: ; return to shader part epilog main_body: call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @sample_nortn_mix_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; GFX10PLUS-LABEL: sample_nortn_mix_2: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) ; GFX10PLUS-NEXT: ; return to shader part epilog ; ; GFX12-LABEL: sample_nortn_mix_2: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: s_mov_b32 s12, exec_lo ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-NEXT: v_mov_b32_e32 v4, v0 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @sample_nortn_mix_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_3: ; GFX10PLUS-SDAG: ; %bb.0: ; %main_body ; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo ; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1) ; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog ; ; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_3: ; GFX10PLUS-GISEL: ; %bb.0: ; %main_body ; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo ; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1) ; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog ; ; GFX12-SDAG-LABEL: sample_nortn_mix_3: ; GFX12-SDAG: ; %bb.0: ; %main_body ; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo ; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1 ; GFX12-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: sample_nortn_mix_3: ; GFX12-GISEL: ; %bb.0: ; %main_body ; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo ; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1 ; GFX12-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %v.0 = extractelement <4 x float> %v, i32 0 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %u } define amdgpu_ps <4 x float> @sample_nortn_mix_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_4: ; GFX10PLUS-SDAG: ; %bb.0: ; %main_body ; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo ; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1) ; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(2) ; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog ; ; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_4: ; GFX10PLUS-GISEL: ; %bb.0: ; %main_body ; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo ; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10PLUS-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1) ; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(2) ; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog ; ; GFX12-SDAG-LABEL: sample_nortn_mix_4: ; GFX12-SDAG: ; %bb.0: ; %main_body ; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo ; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1 ; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x2 ; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: sample_nortn_mix_4: ; GFX12-GISEL: ; %bb.0: ; %main_body ; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo ; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX12-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1 ; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x2 ; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %v.0 = extractelement <4 x float> %v, i32 0 %v.1 = extractelement <4 x float> %v, i32 0 %v.2 = extractelement <4 x float> %v, i32 0 %v.3 = extractelement <4 x float> %v, i32 0 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %u.0 = extractelement <4 x float> %u, i32 0 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.3, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %u.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %u } define amdgpu_ps void @sample_d_1d_g16_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { ; GFX10PLUS-LABEL: sample_d_1d_g16_nortn: ; GFX10PLUS: ; %bb.0: ; %main_body ; GFX10PLUS-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: sample_d_1d_g16_nortn: ; GFX12: ; %bb.0: ; %main_body ; GFX12-NEXT: image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: call void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret void } declare void @llvm.amdgcn.image.sample.1d.nortn.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.2d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.3d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.cube.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.d.1d.f32.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.d.2d.f32.nortn.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX10: {{.*}} ; GFX10-GISEL: {{.*}} ; GFX10-SDAG: {{.*}} ; GFX11: {{.*}} ; GFX11-GISEL: {{.*}} ; GFX11-GISEL-FAKE16: {{.*}} ; GFX11-SDAG: {{.*}} ; GFX11-SDAG-FAKE16: {{.*}} ; GFX11-SDAG-TRUE16: {{.*}} ; GFX12-GISEL-FAKE16: {{.*}} ; GFX12-SDAG-FAKE16: {{.*}} ; GFX12-SDAG-TRUE16: {{.*}}